1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54   /// Kinds of OpenMP regions used in codegen.
55   enum CGOpenMPRegionKind {
56     /// Region with outlined function for standalone 'parallel'
57     /// directive.
58     ParallelOutlinedRegion,
59     /// Region with outlined function for standalone 'task' directive.
60     TaskOutlinedRegion,
61     /// Region for constructs that do not require function outlining,
62     /// like 'for', 'sections', 'atomic' etc. directives.
63     InlinedRegion,
64     /// Region with outlined function for standalone 'target' directive.
65     TargetRegion,
66   };
67 
68   CGOpenMPRegionInfo(const CapturedStmt &CS,
69                      const CGOpenMPRegionKind RegionKind,
70                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71                      bool HasCancel)
72       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77                      bool HasCancel)
78       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79         Kind(Kind), HasCancel(HasCancel) {}
80 
81   /// Get a variable or parameter for storing global thread id
82   /// inside OpenMP construct.
83   virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85   /// Emit the captured statement body.
86   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88   /// Get an LValue for the current ThreadID variable.
89   /// \return LValue for thread id variable. This LValue always has type int32*.
90   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98   bool hasCancel() const { return HasCancel; }
99 
100   static bool classof(const CGCapturedStmtInfo *Info) {
101     return Info->getKind() == CR_OpenMP;
102   }
103 
104   ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107   CGOpenMPRegionKind RegionKind;
108   RegionCodeGenTy CodeGen;
109   OpenMPDirectiveKind Kind;
110   bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117                              const RegionCodeGenTy &CodeGen,
118                              OpenMPDirectiveKind Kind, bool HasCancel,
119                              StringRef HelperName)
120       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121                            HasCancel),
122         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124   }
125 
126   /// Get a variable or parameter for storing global thread id
127   /// inside OpenMP construct.
128   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130   /// Get the name of the capture helper.
131   StringRef getHelperName() const override { return HelperName; }
132 
133   static bool classof(const CGCapturedStmtInfo *Info) {
134     return CGOpenMPRegionInfo::classof(Info) &&
135            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136                ParallelOutlinedRegion;
137   }
138 
139 private:
140   /// A variable or parameter storing global thread id for OpenMP
141   /// constructs.
142   const VarDecl *ThreadIDVar;
143   StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149   class UntiedTaskActionTy final : public PrePostActionTy {
150     bool Untied;
151     const VarDecl *PartIDVar;
152     const RegionCodeGenTy UntiedCodeGen;
153     llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155   public:
156     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157                        const RegionCodeGenTy &UntiedCodeGen)
158         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159     void Enter(CodeGenFunction &CGF) override {
160       if (Untied) {
161         // Emit task switching point.
162         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163             CGF.GetAddrOfLocalVar(PartIDVar),
164             PartIDVar->getType()->castAs<PointerType>());
165         llvm::Value *Res =
166             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169         CGF.EmitBlock(DoneBB);
170         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173                               CGF.Builder.GetInsertBlock());
174         emitUntiedSwitch(CGF);
175       }
176     }
177     void emitUntiedSwitch(CodeGenFunction &CGF) const {
178       if (Untied) {
179         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180             CGF.GetAddrOfLocalVar(PartIDVar),
181             PartIDVar->getType()->castAs<PointerType>());
182         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183                               PartIdLVal);
184         UntiedCodeGen(CGF);
185         CodeGenFunction::JumpDest CurPoint =
186             CGF.getJumpDestInCurrentScope(".untied.next.");
187         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190                               CGF.Builder.GetInsertBlock());
191         CGF.EmitBranchThroughCleanup(CurPoint);
192         CGF.EmitBlock(CurPoint.getBlock());
193       }
194     }
195     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196   };
197   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198                                  const VarDecl *ThreadIDVar,
199                                  const RegionCodeGenTy &CodeGen,
200                                  OpenMPDirectiveKind Kind, bool HasCancel,
201                                  const UntiedTaskActionTy &Action)
202       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203         ThreadIDVar(ThreadIDVar), Action(Action) {
204     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205   }
206 
207   /// Get a variable or parameter for storing global thread id
208   /// inside OpenMP construct.
209   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211   /// Get an LValue for the current ThreadID variable.
212   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214   /// Get the name of the capture helper.
215   StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217   void emitUntiedSwitch(CodeGenFunction &CGF) override {
218     Action.emitUntiedSwitch(CGF);
219   }
220 
221   static bool classof(const CGCapturedStmtInfo *Info) {
222     return CGOpenMPRegionInfo::classof(Info) &&
223            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224                TaskOutlinedRegion;
225   }
226 
227 private:
228   /// A variable or parameter storing global thread id for OpenMP
229   /// constructs.
230   const VarDecl *ThreadIDVar;
231   /// Action for emitting code for untied tasks.
232   const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240                             const RegionCodeGenTy &CodeGen,
241                             OpenMPDirectiveKind Kind, bool HasCancel)
242       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243         OldCSI(OldCSI),
244         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246   // Retrieve the value of the context parameter.
247   llvm::Value *getContextValue() const override {
248     if (OuterRegionInfo)
249       return OuterRegionInfo->getContextValue();
250     llvm_unreachable("No context value for inlined OpenMP region");
251   }
252 
253   void setContextValue(llvm::Value *V) override {
254     if (OuterRegionInfo) {
255       OuterRegionInfo->setContextValue(V);
256       return;
257     }
258     llvm_unreachable("No context value for inlined OpenMP region");
259   }
260 
261   /// Lookup the captured field decl for a variable.
262   const FieldDecl *lookup(const VarDecl *VD) const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->lookup(VD);
265     // If there is no outer outlined region,no need to lookup in a list of
266     // captured variables, we can use the original one.
267     return nullptr;
268   }
269 
270   FieldDecl *getThisFieldDecl() const override {
271     if (OuterRegionInfo)
272       return OuterRegionInfo->getThisFieldDecl();
273     return nullptr;
274   }
275 
276   /// Get a variable or parameter for storing global thread id
277   /// inside OpenMP construct.
278   const VarDecl *getThreadIDVariable() const override {
279     if (OuterRegionInfo)
280       return OuterRegionInfo->getThreadIDVariable();
281     return nullptr;
282   }
283 
284   /// Get an LValue for the current ThreadID variable.
285   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286     if (OuterRegionInfo)
287       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288     llvm_unreachable("No LValue for inlined OpenMP construct");
289   }
290 
291   /// Get the name of the capture helper.
292   StringRef getHelperName() const override {
293     if (auto *OuterRegionInfo = getOldCSI())
294       return OuterRegionInfo->getHelperName();
295     llvm_unreachable("No helper name for inlined OpenMP construct");
296   }
297 
298   void emitUntiedSwitch(CodeGenFunction &CGF) override {
299     if (OuterRegionInfo)
300       OuterRegionInfo->emitUntiedSwitch(CGF);
301   }
302 
303   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305   static bool classof(const CGCapturedStmtInfo *Info) {
306     return CGOpenMPRegionInfo::classof(Info) &&
307            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308   }
309 
310   ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313   /// CodeGen info about outer OpenMP region.
314   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315   CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
327       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328                            /*HasCancel=*/false),
329         HelperName(HelperName) {}
330 
331   /// This is unused for target regions because each starts executing
332   /// with a single thread.
333   const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335   /// Get the name of the capture helper.
336   StringRef getHelperName() const override { return HelperName; }
337 
338   static bool classof(const CGCapturedStmtInfo *Info) {
339     return CGOpenMPRegionInfo::classof(Info) &&
340            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341   }
342 
343 private:
344   StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348   llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356                                   OMPD_unknown,
357                                   /*HasCancel=*/false),
358         PrivScope(CGF) {
359     // Make sure the globals captured in the provided statement are local by
360     // using the privatization logic. We assume the same variable is not
361     // captured more than once.
362     for (const auto &C : CS.captures()) {
363       if (!C.capturesVariable() && !C.capturesVariableByCopy())
364         continue;
365 
366       const VarDecl *VD = C.getCapturedVar();
367       if (VD->isLocalVarDeclOrParm())
368         continue;
369 
370       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371                       /*RefersToEnclosingVariableOrCapture=*/false,
372                       VD->getType().getNonReferenceType(), VK_LValue,
373                       C.getLocation());
374       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   if (DRD)
691     SrcAddr =
692         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694   llvm::Value *SrcBegin = nullptr;
695   if (DRD)
696     SrcBegin = SrcAddr.getPointer();
697   llvm::Value *DestBegin = DestAddr.getPointer();
698   // Cast from pointer to array type to pointer to single element.
699   llvm::Value *DestEnd =
700       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701   // The basic structure here is a while-do loop.
702   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704   llvm::Value *IsEmpty =
705       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708   // Enter the loop body, making that address the current address.
709   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710   CGF.EmitBlock(BodyBB);
711 
712   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714   llvm::PHINode *SrcElementPHI = nullptr;
715   Address SrcElementCurrent = Address::invalid();
716   if (DRD) {
717     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718                                           "omp.arraycpy.srcElementPast");
719     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720     SrcElementCurrent =
721         Address(SrcElementPHI, SrcAddr.getElementType(),
722                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723   }
724   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726   DestElementPHI->addIncoming(DestBegin, EntryBB);
727   Address DestElementCurrent =
728       Address(DestElementPHI, DestAddr.getElementType(),
729               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731   // Emit copy.
732   {
733     CodeGenFunction::RunCleanupsScope InitScope(CGF);
734     if (EmitDeclareReductionInit) {
735       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736                                        SrcElementCurrent, ElementTy);
737     } else
738       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739                            /*IsInitializer=*/false);
740   }
741 
742   if (DRD) {
743     // Shift the address forward by one element.
744     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746         "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753       "omp.arraycpy.dest.element");
754   // Check whether we've reached the end.
755   llvm::Value *Done =
756       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760   // Done.
761   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765   return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769                                             const Expr *E) {
770   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772   return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777     const OMPDeclareReductionDecl *DRD) {
778   // Emit VarDecl with copy init for arrays.
779   // Get the address of the original variable captured in current
780   // captured region.
781   const auto *PrivateVD =
782       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783   bool EmitDeclareReductionInit =
784       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786                        EmitDeclareReductionInit,
787                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788                                                 : PrivateVD->getInit(),
789                        DRD, SharedAddr);
790 }
791 
792 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793                                    ArrayRef<const Expr *> Origs,
794                                    ArrayRef<const Expr *> Privates,
795                                    ArrayRef<const Expr *> ReductionOps) {
796   ClausesData.reserve(Shareds.size());
797   SharedAddresses.reserve(Shareds.size());
798   Sizes.reserve(Shareds.size());
799   BaseDecls.reserve(Shareds.size());
800   const auto *IOrig = Origs.begin();
801   const auto *IPriv = Privates.begin();
802   const auto *IRed = ReductionOps.begin();
803   for (const Expr *Ref : Shareds) {
804     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805     std::advance(IOrig, 1);
806     std::advance(IPriv, 1);
807     std::advance(IRed, 1);
808   }
809 }
810 
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813          "Number of generated lvalues must be exactly N.");
814   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816   SharedAddresses.emplace_back(First, Second);
817   if (ClausesData[N].Shared == ClausesData[N].Ref) {
818     OrigAddresses.emplace_back(First, Second);
819   } else {
820     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822     OrigAddresses.emplace_back(First, Second);
823   }
824 }
825 
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827   QualType PrivateType = getPrivateType(N);
828   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829   if (!PrivateType->isVariablyModifiedType()) {
830     Sizes.emplace_back(
831         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832         nullptr);
833     return;
834   }
835   llvm::Value *Size;
836   llvm::Value *SizeInChars;
837   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839   if (AsArraySection) {
840     Size = CGF.Builder.CreatePtrDiff(ElemType,
841                                      OrigAddresses[N].second.getPointer(CGF),
842                                      OrigAddresses[N].first.getPointer(CGF));
843     Size = CGF.Builder.CreateNUWAdd(
844         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846   } else {
847     SizeInChars =
848         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850   }
851   Sizes.emplace_back(SizeInChars, Size);
852   CodeGenFunction::OpaqueValueMapping OpaqueMap(
853       CGF,
854       cast<OpaqueValueExpr>(
855           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856       RValue::get(Size));
857   CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
860 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861                                          llvm::Value *Size) {
862   QualType PrivateType = getPrivateType(N);
863   if (!PrivateType->isVariablyModifiedType()) {
864     assert(!Size && !Sizes[N].second &&
865            "Size should be nullptr for non-variably modified reduction "
866            "items.");
867     return;
868   }
869   CodeGenFunction::OpaqueValueMapping OpaqueMap(
870       CGF,
871       cast<OpaqueValueExpr>(
872           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873       RValue::get(Size));
874   CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
877 void ReductionCodeGen::emitInitialization(
878     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880   assert(SharedAddresses.size() > N && "No variable was generated");
881   const auto *PrivateVD =
882       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883   const OMPDeclareReductionDecl *DRD =
884       getReductionInit(ClausesData[N].ReductionOp);
885   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886     if (DRD && DRD->getInitializer())
887       (void)DefaultInit(CGF);
888     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890     (void)DefaultInit(CGF);
891     QualType SharedType = SharedAddresses[N].first.getType();
892     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893                                      PrivateAddr, SharedAddr, SharedType);
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   QualType PrivateType = getPrivateType(N);
904   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905   return DTorKind != QualType::DK_none;
906 }
907 
908 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909                                     Address PrivateAddr) {
910   QualType PrivateType = getPrivateType(N);
911   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912   if (needCleanups(N)) {
913     PrivateAddr = CGF.Builder.CreateElementBitCast(
914         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916   }
917 }
918 
919 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920                           LValue BaseLV) {
921   BaseTy = BaseTy.getNonReferenceType();
922   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926     } else {
927       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929     }
930     BaseTy = BaseTy->getPointeeType();
931   }
932   return CGF.MakeAddrLValue(
933       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934                                        CGF.ConvertTypeForMem(ElTy)),
935       BaseLV.getType(), BaseLV.getBaseInfo(),
936       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
939 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           Address OriginalBaseAddress, llvm::Value *Addr) {
941   Address Tmp = Address::invalid();
942   Address TopTmp = Address::invalid();
943   Address MostTopTmp = Address::invalid();
944   BaseTy = BaseTy.getNonReferenceType();
945   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947     Tmp = CGF.CreateMemTemp(BaseTy);
948     if (TopTmp.isValid())
949       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950     else
951       MostTopTmp = Tmp;
952     TopTmp = Tmp;
953     BaseTy = BaseTy->getPointeeType();
954   }
955 
956   if (Tmp.isValid()) {
957     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958         Addr, Tmp.getElementType());
959     CGF.Builder.CreateStore(Addr, Tmp);
960     return MostTopTmp;
961   }
962 
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964       Addr, OriginalBaseAddress.getType());
965   return OriginalBaseAddress.withPointer(Addr);
966 }
967 
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969   const VarDecl *OrigVD = nullptr;
970   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973       Base = TempOASE->getBase()->IgnoreParenImpCasts();
974     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975       Base = TempASE->getBase()->IgnoreParenImpCasts();
976     DE = cast<DeclRefExpr>(Base);
977     OrigVD = cast<VarDecl>(DE->getDecl());
978   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981       Base = TempASE->getBase()->IgnoreParenImpCasts();
982     DE = cast<DeclRefExpr>(Base);
983     OrigVD = cast<VarDecl>(DE->getDecl());
984   }
985   return OrigVD;
986 }
987 
988 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989                                                Address PrivateAddr) {
990   const DeclRefExpr *DE;
991   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992     BaseDecls.emplace_back(OrigVD);
993     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994     LValue BaseLValue =
995         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996                     OriginalBaseLValue);
997     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000         SharedAddr.getPointer());
1001     llvm::Value *PrivatePointer =
1002         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003             PrivateAddr.getPointer(), SharedAddr.getType());
1004     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006     return castToBase(CGF, OrigVD->getType(),
1007                       SharedAddresses[N].first.getType(),
1008                       OriginalBaseLValue.getAddress(CGF), Ptr);
1009   }
1010   BaseDecls.emplace_back(
1011       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012   return PrivateAddr;
1013 }
1014 
1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016   const OMPDeclareReductionDecl *DRD =
1017       getReductionInit(ClausesData[N].ReductionOp);
1018   return DRD && DRD->getInitializer();
1019 }
1020 
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022   return CGF.EmitLoadOfPointerLValue(
1023       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024       getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028   if (!CGF.HaveInsertPoint())
1029     return;
1030   // 1.2.2 OpenMP Language Terminology
1031   // Structured block - An executable statement with a single entry at the
1032   // top and a single exit at the bottom.
1033   // The point of exit cannot be a branch out of the structured block.
1034   // longjmp() and throw() must not violate the entry/exit criteria.
1035   CGF.EHStack.pushTerminate();
1036   if (S)
1037     CGF.incrementProfileCounter(S);
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065 
1066   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067   OMPBuilder.initialize();
1068   loadOffloadInfoMetadata();
1069 }
1070 
1071 void CGOpenMPRuntime::clear() {
1072   InternalVars.clear();
1073   // Clean non-target variable declarations possibly used only in debug info.
1074   for (const auto &Data : EmittedNonTargetVariables) {
1075     if (!Data.getValue().pointsToAliveValue())
1076       continue;
1077     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078     if (!GV)
1079       continue;
1080     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081       continue;
1082     GV->eraseFromParent();
1083   }
1084 }
1085 
1086 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087   SmallString<128> Buffer;
1088   llvm::raw_svector_ostream OS(Buffer);
1089   StringRef Sep = FirstSeparator;
1090   for (StringRef Part : Parts) {
1091     OS << Sep << Part;
1092     Sep = Separator;
1093   }
1094   return std::string(OS.str());
1095 }
1096 
1097 static llvm::Function *
1098 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099                           const Expr *CombinerInitializer, const VarDecl *In,
1100                           const VarDecl *Out, bool IsCombiner) {
1101   // void .omp_combiner.(Ty *in, Ty *out);
1102   ASTContext &C = CGM.getContext();
1103   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104   FunctionArgList Args;
1105   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   Args.push_back(&OmpOutParm);
1110   Args.push_back(&OmpInParm);
1111   const CGFunctionInfo &FnInfo =
1112       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114   std::string Name = CGM.getOpenMPRuntime().getName(
1115       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117                                     Name, &CGM.getModule());
1118   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119   if (CGM.getLangOpts().Optimize) {
1120     Fn->removeFnAttr(llvm::Attribute::NoInline);
1121     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123   }
1124   CodeGenFunction CGF(CGM);
1125   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128                     Out->getLocation());
1129   CodeGenFunction::OMPPrivateScope Scope(CGF);
1130   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131   Scope.addPrivate(
1132       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133               .getAddress(CGF));
1134   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135   Scope.addPrivate(
1136       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137                .getAddress(CGF));
1138   (void)Scope.Privatize();
1139   if (!IsCombiner && Out->hasInit() &&
1140       !CGF.isTrivialInitializer(Out->getInit())) {
1141     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142                          Out->getType().getQualifiers(),
1143                          /*IsInitializer=*/true);
1144   }
1145   if (CombinerInitializer)
1146     CGF.EmitIgnoredExpr(CombinerInitializer);
1147   Scope.ForceCleanup();
1148   CGF.FinishFunction();
1149   return Fn;
1150 }
1151 
1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154   if (UDRMap.count(D) > 0)
1155     return;
1156   llvm::Function *Combiner = emitCombinerOrInitializer(
1157       CGM, D->getType(), D->getCombiner(),
1158       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160       /*IsCombiner=*/true);
1161   llvm::Function *Initializer = nullptr;
1162   if (const Expr *Init = D->getInitializer()) {
1163     Initializer = emitCombinerOrInitializer(
1164         CGM, D->getType(),
1165         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166                                                                      : nullptr,
1167         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169         /*IsCombiner=*/false);
1170   }
1171   UDRMap.try_emplace(D, Combiner, Initializer);
1172   if (CGF) {
1173     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174     Decls.second.push_back(D);
1175   }
1176 }
1177 
1178 std::pair<llvm::Function *, llvm::Function *>
1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180   auto I = UDRMap.find(D);
1181   if (I != UDRMap.end())
1182     return I->second;
1183   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184   return UDRMap.lookup(D);
1185 }
1186 
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
1191   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192                       bool HasCancel, llvm::omp::Directive Kind)
1193       : OMPBuilder(OMPBuilder) {
1194     if (!OMPBuilder)
1195       return;
1196 
1197     // The following callback is the crucial part of clangs cleanup process.
1198     //
1199     // NOTE:
1200     // Once the OpenMPIRBuilder is used to create parallel regions (and
1201     // similar), the cancellation destination (Dest below) is determined via
1202     // IP. That means if we have variables to finalize we split the block at IP,
1203     // use the new block (=BB) as destination to build a JumpDest (via
1204     // getJumpDestInCurrentScope(BB)) which then is fed to
1205     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206     // to push & pop an FinalizationInfo object.
1207     // The FiniCB will still be needed but at the point where the
1208     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210       assert(IP.getBlock()->end() == IP.getPoint() &&
1211              "Clang CG should cause non-terminated block!");
1212       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213       CGF.Builder.restoreIP(IP);
1214       CodeGenFunction::JumpDest Dest =
1215           CGF.getOMPCancelDestination(OMPD_parallel);
1216       CGF.EmitBranchThroughCleanup(Dest);
1217     };
1218 
1219     // TODO: Remove this once we emit parallel regions through the
1220     //       OpenMPIRBuilder as it can do this setup internally.
1221     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222     OMPBuilder->pushFinalizationCB(std::move(FI));
1223   }
1224   ~PushAndPopStackRAII() {
1225     if (OMPBuilder)
1226       OMPBuilder->popFinalizationCB();
1227   }
1228   llvm::OpenMPIRBuilder *OMPBuilder;
1229 };
1230 } // namespace
1231 
1232 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236   assert(ThreadIDVar->getType()->isPointerType() &&
1237          "thread id variable must be of type kmp_int32 *");
1238   CodeGenFunction CGF(CGM, true);
1239   bool HasCancel = false;
1240   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241     HasCancel = OPD->hasCancel();
1242   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243     HasCancel = OPD->hasCancel();
1244   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245     HasCancel = OPSD->hasCancel();
1246   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247     HasCancel = OPFD->hasCancel();
1248   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249     HasCancel = OPFD->hasCancel();
1250   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251     HasCancel = OPFD->hasCancel();
1252   else if (const auto *OPFD =
1253                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD =
1256                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257     HasCancel = OPFD->hasCancel();
1258 
1259   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260   //       parallel region to make cancellation barriers work properly.
1261   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264                                     HasCancel, OutlinedHelperName);
1265   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267 }
1268 
1269 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273   return emitParallelOrTeamsOutlinedFunction(
1274       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275 }
1276 
1277 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281   return emitParallelOrTeamsOutlinedFunction(
1282       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283 }
1284 
1285 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289     bool Tied, unsigned &NumberOfParts) {
1290   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291                                               PrePostActionTy &) {
1292     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294     llvm::Value *TaskArgs[] = {
1295         UpLoc, ThreadID,
1296         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297                                     TaskTVar->getType()->castAs<PointerType>())
1298             .getPointer(CGF)};
1299     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1301                         TaskArgs);
1302   };
1303   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304                                                             UntiedCodeGen);
1305   CodeGen.setAction(Action);
1306   assert(!ThreadIDVar->getType()->isPointerType() &&
1307          "thread id variable must be of type kmp_int32 for tasks");
1308   const OpenMPDirectiveKind Region =
1309       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310                                                       : OMPD_task;
1311   const CapturedStmt *CS = D.getCapturedStmt(Region);
1312   bool HasCancel = false;
1313   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314     HasCancel = TD->hasCancel();
1315   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316     HasCancel = TD->hasCancel();
1317   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318     HasCancel = TD->hasCancel();
1319   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320     HasCancel = TD->hasCancel();
1321 
1322   CodeGenFunction CGF(CGM, true);
1323   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324                                         InnermostKind, HasCancel, Action);
1325   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327   if (!Tied)
1328     NumberOfParts = Action.getNumberOfParts();
1329   return Res;
1330 }
1331 
1332 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1333                              const RecordDecl *RD, const CGRecordLayout &RL,
1334                              ArrayRef<llvm::Constant *> Data) {
1335   llvm::StructType *StructTy = RL.getLLVMType();
1336   unsigned PrevIdx = 0;
1337   ConstantInitBuilder CIBuilder(CGM);
1338   const auto *DI = Data.begin();
1339   for (const FieldDecl *FD : RD->fields()) {
1340     unsigned Idx = RL.getLLVMFieldNo(FD);
1341     // Fill the alignment.
1342     for (unsigned I = PrevIdx; I < Idx; ++I)
1343       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1344     PrevIdx = Idx + 1;
1345     Fields.add(*DI);
1346     ++DI;
1347   }
1348 }
1349 
1350 template <class... As>
1351 static llvm::GlobalVariable *
1352 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1353                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1354                    As &&... Args) {
1355   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1356   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1357   ConstantInitBuilder CIBuilder(CGM);
1358   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1359   buildStructValue(Fields, CGM, RD, RL, Data);
1360   return Fields.finishAndCreateGlobal(
1361       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1362       std::forward<As>(Args)...);
1363 }
1364 
1365 template <typename T>
1366 static void
1367 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1368                                          ArrayRef<llvm::Constant *> Data,
1369                                          T &Parent) {
1370   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1371   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1372   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1373   buildStructValue(Fields, CGM, RD, RL, Data);
1374   Fields.finishAndAddTo(Parent);
1375 }
1376 
1377 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1378                                              bool AtCurrentPoint) {
1379   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1380   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1381 
1382   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1383   if (AtCurrentPoint) {
1384     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1385         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1386   } else {
1387     Elem.second.ServiceInsertPt =
1388         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1389     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1390   }
1391 }
1392 
1393 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1394   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1395   if (Elem.second.ServiceInsertPt) {
1396     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1397     Elem.second.ServiceInsertPt = nullptr;
1398     Ptr->eraseFromParent();
1399   }
1400 }
1401 
1402 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1403                                                   SourceLocation Loc,
1404                                                   SmallString<128> &Buffer) {
1405   llvm::raw_svector_ostream OS(Buffer);
1406   // Build debug location
1407   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1408   OS << ";" << PLoc.getFilename() << ";";
1409   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1410     OS << FD->getQualifiedNameAsString();
1411   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1412   return OS.str();
1413 }
1414 
1415 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1416                                                  SourceLocation Loc,
1417                                                  unsigned Flags) {
1418   uint32_t SrcLocStrSize;
1419   llvm::Constant *SrcLocStr;
1420   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1421       Loc.isInvalid()) {
1422     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1423   } else {
1424     std::string FunctionName;
1425     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1426       FunctionName = FD->getQualifiedNameAsString();
1427     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1428     const char *FileName = PLoc.getFilename();
1429     unsigned Line = PLoc.getLine();
1430     unsigned Column = PLoc.getColumn();
1431     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1432                                                 Column, SrcLocStrSize);
1433   }
1434   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1435   return OMPBuilder.getOrCreateIdent(
1436       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1437 }
1438 
1439 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1440                                           SourceLocation Loc) {
1441   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1442   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1443   // the clang invariants used below might be broken.
1444   if (CGM.getLangOpts().OpenMPIRBuilder) {
1445     SmallString<128> Buffer;
1446     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1447     uint32_t SrcLocStrSize;
1448     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1449         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1450     return OMPBuilder.getOrCreateThreadID(
1451         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1452   }
1453 
1454   llvm::Value *ThreadID = nullptr;
1455   // Check whether we've already cached a load of the thread id in this
1456   // function.
1457   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1458   if (I != OpenMPLocThreadIDMap.end()) {
1459     ThreadID = I->second.ThreadID;
1460     if (ThreadID != nullptr)
1461       return ThreadID;
1462   }
1463   // If exceptions are enabled, do not use parameter to avoid possible crash.
1464   if (auto *OMPRegionInfo =
1465           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1466     if (OMPRegionInfo->getThreadIDVariable()) {
1467       // Check if this an outlined function with thread id passed as argument.
1468       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1469       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1470       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1471           !CGF.getLangOpts().CXXExceptions ||
1472           CGF.Builder.GetInsertBlock() == TopBlock ||
1473           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1474           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1475               TopBlock ||
1476           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1477               CGF.Builder.GetInsertBlock()) {
1478         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1479         // If value loaded in entry block, cache it and use it everywhere in
1480         // function.
1481         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1482           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1483           Elem.second.ThreadID = ThreadID;
1484         }
1485         return ThreadID;
1486       }
1487     }
1488   }
1489 
1490   // This is not an outlined function region - need to call __kmpc_int32
1491   // kmpc_global_thread_num(ident_t *loc).
1492   // Generate thread id value and cache this value for use across the
1493   // function.
1494   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1495   if (!Elem.second.ServiceInsertPt)
1496     setLocThreadIdInsertPt(CGF);
1497   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1498   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1499   llvm::CallInst *Call = CGF.Builder.CreateCall(
1500       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1501                                             OMPRTL___kmpc_global_thread_num),
1502       emitUpdateLocation(CGF, Loc));
1503   Call->setCallingConv(CGF.getRuntimeCC());
1504   Elem.second.ThreadID = Call;
1505   return Call;
1506 }
1507 
1508 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1509   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1510   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1511     clearLocThreadIdInsertPt(CGF);
1512     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1513   }
1514   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1515     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1516       UDRMap.erase(D);
1517     FunctionUDRMap.erase(CGF.CurFn);
1518   }
1519   auto I = FunctionUDMMap.find(CGF.CurFn);
1520   if (I != FunctionUDMMap.end()) {
1521     for(const auto *D : I->second)
1522       UDMMap.erase(D);
1523     FunctionUDMMap.erase(I);
1524   }
1525   LastprivateConditionalToTypes.erase(CGF.CurFn);
1526   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1527 }
1528 
1529 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1530   return OMPBuilder.IdentPtr;
1531 }
1532 
1533 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1534   if (!Kmpc_MicroTy) {
1535     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1536     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1537                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1538     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1539   }
1540   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1541 }
1542 
1543 llvm::FunctionCallee
1544 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1545                                              bool IsGPUDistribute) {
1546   assert((IVSize == 32 || IVSize == 64) &&
1547          "IV size is not compatible with the omp runtime");
1548   StringRef Name;
1549   if (IsGPUDistribute)
1550     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1551                                     : "__kmpc_distribute_static_init_4u")
1552                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1553                                     : "__kmpc_distribute_static_init_8u");
1554   else
1555     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1556                                     : "__kmpc_for_static_init_4u")
1557                         : (IVSigned ? "__kmpc_for_static_init_8"
1558                                     : "__kmpc_for_static_init_8u");
1559 
1560   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1561   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1562   llvm::Type *TypeParams[] = {
1563     getIdentTyPointerTy(),                     // loc
1564     CGM.Int32Ty,                               // tid
1565     CGM.Int32Ty,                               // schedtype
1566     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1567     PtrTy,                                     // p_lower
1568     PtrTy,                                     // p_upper
1569     PtrTy,                                     // p_stride
1570     ITy,                                       // incr
1571     ITy                                        // chunk
1572   };
1573   auto *FnTy =
1574       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1575   return CGM.CreateRuntimeFunction(FnTy, Name);
1576 }
1577 
1578 llvm::FunctionCallee
1579 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1580   assert((IVSize == 32 || IVSize == 64) &&
1581          "IV size is not compatible with the omp runtime");
1582   StringRef Name =
1583       IVSize == 32
1584           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1585           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1586   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1587   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1588                                CGM.Int32Ty,           // tid
1589                                CGM.Int32Ty,           // schedtype
1590                                ITy,                   // lower
1591                                ITy,                   // upper
1592                                ITy,                   // stride
1593                                ITy                    // chunk
1594   };
1595   auto *FnTy =
1596       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1597   return CGM.CreateRuntimeFunction(FnTy, Name);
1598 }
1599 
1600 llvm::FunctionCallee
1601 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1602   assert((IVSize == 32 || IVSize == 64) &&
1603          "IV size is not compatible with the omp runtime");
1604   StringRef Name =
1605       IVSize == 32
1606           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1607           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1608   llvm::Type *TypeParams[] = {
1609       getIdentTyPointerTy(), // loc
1610       CGM.Int32Ty,           // tid
1611   };
1612   auto *FnTy =
1613       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1614   return CGM.CreateRuntimeFunction(FnTy, Name);
1615 }
1616 
1617 llvm::FunctionCallee
1618 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1619   assert((IVSize == 32 || IVSize == 64) &&
1620          "IV size is not compatible with the omp runtime");
1621   StringRef Name =
1622       IVSize == 32
1623           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1624           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1625   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1626   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1627   llvm::Type *TypeParams[] = {
1628     getIdentTyPointerTy(),                     // loc
1629     CGM.Int32Ty,                               // tid
1630     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1631     PtrTy,                                     // p_lower
1632     PtrTy,                                     // p_upper
1633     PtrTy                                      // p_stride
1634   };
1635   auto *FnTy =
1636       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1637   return CGM.CreateRuntimeFunction(FnTy, Name);
1638 }
1639 
1640 /// Obtain information that uniquely identifies a target entry. This
1641 /// consists of the file and device IDs as well as line number associated with
1642 /// the relevant entry source location.
1643 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1644                                      unsigned &DeviceID, unsigned &FileID,
1645                                      unsigned &LineNum) {
1646   SourceManager &SM = C.getSourceManager();
1647 
1648   // The loc should be always valid and have a file ID (the user cannot use
1649   // #pragma directives in macros)
1650 
1651   assert(Loc.isValid() && "Source location is expected to be always valid.");
1652 
1653   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1654   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1655 
1656   llvm::sys::fs::UniqueID ID;
1657   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1658     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1659     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1660     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1661       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1662           << PLoc.getFilename() << EC.message();
1663   }
1664 
1665   DeviceID = ID.getDevice();
1666   FileID = ID.getFile();
1667   LineNum = PLoc.getLine();
1668 }
1669 
1670 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1671   if (CGM.getLangOpts().OpenMPSimd)
1672     return Address::invalid();
1673   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1674       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1675   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1676               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1677                HasRequiresUnifiedSharedMemory))) {
1678     SmallString<64> PtrName;
1679     {
1680       llvm::raw_svector_ostream OS(PtrName);
1681       OS << CGM.getMangledName(GlobalDecl(VD));
1682       if (!VD->isExternallyVisible()) {
1683         unsigned DeviceID, FileID, Line;
1684         getTargetEntryUniqueInfo(CGM.getContext(),
1685                                  VD->getCanonicalDecl()->getBeginLoc(),
1686                                  DeviceID, FileID, Line);
1687         OS << llvm::format("_%x", FileID);
1688       }
1689       OS << "_decl_tgt_ref_ptr";
1690     }
1691     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1692     QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1693     llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1694     if (!Ptr) {
1695       Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1696 
1697       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1698       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1699 
1700       if (!CGM.getLangOpts().OpenMPIsDevice)
1701         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1702       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1703     }
1704     return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1705   }
1706   return Address::invalid();
1707 }
1708 
1709 llvm::Constant *
1710 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1711   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1712          !CGM.getContext().getTargetInfo().isTLSSupported());
1713   // Lookup the entry, lazily creating it if necessary.
1714   std::string Suffix = getName({"cache", ""});
1715   return getOrCreateInternalVariable(
1716       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1717 }
1718 
1719 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1720                                                 const VarDecl *VD,
1721                                                 Address VDAddr,
1722                                                 SourceLocation Loc) {
1723   if (CGM.getLangOpts().OpenMPUseTLS &&
1724       CGM.getContext().getTargetInfo().isTLSSupported())
1725     return VDAddr;
1726 
1727   llvm::Type *VarTy = VDAddr.getElementType();
1728   llvm::Value *Args[] = {
1729       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1730       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1731       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1732       getOrCreateThreadPrivateCache(VD)};
1733   return Address(
1734       CGF.EmitRuntimeCall(
1735           OMPBuilder.getOrCreateRuntimeFunction(
1736               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1737           Args),
1738       CGF.Int8Ty, VDAddr.getAlignment());
1739 }
1740 
1741 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1742     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1743     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1744   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1745   // library.
1746   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1747   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1748                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1749                       OMPLoc);
1750   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1751   // to register constructor/destructor for variable.
1752   llvm::Value *Args[] = {
1753       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1754       Ctor, CopyCtor, Dtor};
1755   CGF.EmitRuntimeCall(
1756       OMPBuilder.getOrCreateRuntimeFunction(
1757           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1758       Args);
1759 }
1760 
1761 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1762     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1763     bool PerformInit, CodeGenFunction *CGF) {
1764   if (CGM.getLangOpts().OpenMPUseTLS &&
1765       CGM.getContext().getTargetInfo().isTLSSupported())
1766     return nullptr;
1767 
1768   VD = VD->getDefinition(CGM.getContext());
1769   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1770     QualType ASTTy = VD->getType();
1771 
1772     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1773     const Expr *Init = VD->getAnyInitializer();
1774     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1775       // Generate function that re-emits the declaration's initializer into the
1776       // threadprivate copy of the variable VD
1777       CodeGenFunction CtorCGF(CGM);
1778       FunctionArgList Args;
1779       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1780                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1781                             ImplicitParamDecl::Other);
1782       Args.push_back(&Dst);
1783 
1784       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1785           CGM.getContext().VoidPtrTy, Args);
1786       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1787       std::string Name = getName({"__kmpc_global_ctor_", ""});
1788       llvm::Function *Fn =
1789           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1790       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1791                             Args, Loc, Loc);
1792       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1793           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1794           CGM.getContext().VoidPtrTy, Dst.getLocation());
1795       Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1796       Arg = CtorCGF.Builder.CreateElementBitCast(
1797           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1798       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1799                                /*IsInitializer=*/true);
1800       ArgVal = CtorCGF.EmitLoadOfScalar(
1801           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1802           CGM.getContext().VoidPtrTy, Dst.getLocation());
1803       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1804       CtorCGF.FinishFunction();
1805       Ctor = Fn;
1806     }
1807     if (VD->getType().isDestructedType() != QualType::DK_none) {
1808       // Generate function that emits destructor call for the threadprivate copy
1809       // of the variable VD
1810       CodeGenFunction DtorCGF(CGM);
1811       FunctionArgList Args;
1812       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1813                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1814                             ImplicitParamDecl::Other);
1815       Args.push_back(&Dst);
1816 
1817       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1818           CGM.getContext().VoidTy, Args);
1819       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1820       std::string Name = getName({"__kmpc_global_dtor_", ""});
1821       llvm::Function *Fn =
1822           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1823       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1824       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1825                             Loc, Loc);
1826       // Create a scope with an artificial location for the body of this function.
1827       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1828       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1829           DtorCGF.GetAddrOfLocalVar(&Dst),
1830           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1831       DtorCGF.emitDestroy(
1832           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1833           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1834           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1835       DtorCGF.FinishFunction();
1836       Dtor = Fn;
1837     }
1838     // Do not emit init function if it is not required.
1839     if (!Ctor && !Dtor)
1840       return nullptr;
1841 
1842     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1843     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1844                                                /*isVarArg=*/false)
1845                            ->getPointerTo();
1846     // Copying constructor for the threadprivate variable.
1847     // Must be NULL - reserved by runtime, but currently it requires that this
1848     // parameter is always NULL. Otherwise it fires assertion.
1849     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1850     if (Ctor == nullptr) {
1851       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1852                                              /*isVarArg=*/false)
1853                          ->getPointerTo();
1854       Ctor = llvm::Constant::getNullValue(CtorTy);
1855     }
1856     if (Dtor == nullptr) {
1857       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1858                                              /*isVarArg=*/false)
1859                          ->getPointerTo();
1860       Dtor = llvm::Constant::getNullValue(DtorTy);
1861     }
1862     if (!CGF) {
1863       auto *InitFunctionTy =
1864           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1865       std::string Name = getName({"__omp_threadprivate_init_", ""});
1866       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1867           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1868       CodeGenFunction InitCGF(CGM);
1869       FunctionArgList ArgList;
1870       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1871                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1872                             Loc, Loc);
1873       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1874       InitCGF.FinishFunction();
1875       return InitFunction;
1876     }
1877     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878   }
1879   return nullptr;
1880 }
1881 
1882 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1883                                                      llvm::GlobalVariable *Addr,
1884                                                      bool PerformInit) {
1885   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1886       !CGM.getLangOpts().OpenMPIsDevice)
1887     return false;
1888   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1889       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1890   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1891       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1892        HasRequiresUnifiedSharedMemory))
1893     return CGM.getLangOpts().OpenMPIsDevice;
1894   VD = VD->getDefinition(CGM.getContext());
1895   assert(VD && "Unknown VarDecl");
1896 
1897   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1898     return CGM.getLangOpts().OpenMPIsDevice;
1899 
1900   QualType ASTTy = VD->getType();
1901   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1902 
1903   // Produce the unique prefix to identify the new target regions. We use
1904   // the source location of the variable declaration which we know to not
1905   // conflict with any target region.
1906   unsigned DeviceID;
1907   unsigned FileID;
1908   unsigned Line;
1909   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1910   SmallString<128> Buffer, Out;
1911   {
1912     llvm::raw_svector_ostream OS(Buffer);
1913     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1914        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1915   }
1916 
1917   const Expr *Init = VD->getAnyInitializer();
1918   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1919     llvm::Constant *Ctor;
1920     llvm::Constant *ID;
1921     if (CGM.getLangOpts().OpenMPIsDevice) {
1922       // Generate function that re-emits the declaration's initializer into
1923       // the threadprivate copy of the variable VD
1924       CodeGenFunction CtorCGF(CGM);
1925 
1926       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1927       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1928       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1929           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1930       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1931       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1932                             FunctionArgList(), Loc, Loc);
1933       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1934       llvm::Constant *AddrInAS0 = Addr;
1935       if (Addr->getAddressSpace() != 0)
1936         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1937             Addr, llvm::PointerType::getWithSamePointeeType(
1938                       cast<llvm::PointerType>(Addr->getType()), 0));
1939       CtorCGF.EmitAnyExprToMem(Init,
1940                                Address(AddrInAS0, Addr->getValueType(),
1941                                        CGM.getContext().getDeclAlign(VD)),
1942                                Init->getType().getQualifiers(),
1943                                /*IsInitializer=*/true);
1944       CtorCGF.FinishFunction();
1945       Ctor = Fn;
1946       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1947       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1948     } else {
1949       Ctor = new llvm::GlobalVariable(
1950           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1951           llvm::GlobalValue::PrivateLinkage,
1952           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1953       ID = Ctor;
1954     }
1955 
1956     // Register the information for the entry associated with the constructor.
1957     Out.clear();
1958     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1959         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1960         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1961   }
1962   if (VD->getType().isDestructedType() != QualType::DK_none) {
1963     llvm::Constant *Dtor;
1964     llvm::Constant *ID;
1965     if (CGM.getLangOpts().OpenMPIsDevice) {
1966       // Generate function that emits destructor call for the threadprivate
1967       // copy of the variable VD
1968       CodeGenFunction DtorCGF(CGM);
1969 
1970       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1971       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1972       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1973           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1974       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1975       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1976                             FunctionArgList(), Loc, Loc);
1977       // Create a scope with an artificial location for the body of this
1978       // function.
1979       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1980       llvm::Constant *AddrInAS0 = Addr;
1981       if (Addr->getAddressSpace() != 0)
1982         AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1983             Addr, llvm::PointerType::getWithSamePointeeType(
1984                       cast<llvm::PointerType>(Addr->getType()), 0));
1985       DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1986                                   CGM.getContext().getDeclAlign(VD)),
1987                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1988                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1989       DtorCGF.FinishFunction();
1990       Dtor = Fn;
1991       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1992       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1993     } else {
1994       Dtor = new llvm::GlobalVariable(
1995           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1996           llvm::GlobalValue::PrivateLinkage,
1997           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1998       ID = Dtor;
1999     }
2000     // Register the information for the entry associated with the destructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2005   }
2006   return CGM.getLangOpts().OpenMPIsDevice;
2007 }
2008 
2009 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2010                                                           QualType VarType,
2011                                                           StringRef Name) {
2012   std::string Suffix = getName({"artificial", ""});
2013   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2014   llvm::GlobalVariable *GAddr =
2015       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2016   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2017       CGM.getTarget().isTLSSupported()) {
2018     GAddr->setThreadLocal(/*Val=*/true);
2019     return Address(GAddr, GAddr->getValueType(),
2020                    CGM.getContext().getTypeAlignInChars(VarType));
2021   }
2022   std::string CacheSuffix = getName({"cache", ""});
2023   llvm::Value *Args[] = {
2024       emitUpdateLocation(CGF, SourceLocation()),
2025       getThreadID(CGF, SourceLocation()),
2026       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2027       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2028                                 /*isSigned=*/false),
2029       getOrCreateInternalVariable(
2030           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2031   return Address(
2032       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2033           CGF.EmitRuntimeCall(
2034               OMPBuilder.getOrCreateRuntimeFunction(
2035                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2036               Args),
2037           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2038       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2039 }
2040 
2041 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2042                                    const RegionCodeGenTy &ThenGen,
2043                                    const RegionCodeGenTy &ElseGen) {
2044   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2045 
2046   // If the condition constant folds and can be elided, try to avoid emitting
2047   // the condition and the dead arm of the if/else.
2048   bool CondConstant;
2049   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2050     if (CondConstant)
2051       ThenGen(CGF);
2052     else
2053       ElseGen(CGF);
2054     return;
2055   }
2056 
2057   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2058   // emit the conditional branch.
2059   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2061   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2062   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2063 
2064   // Emit the 'then' code.
2065   CGF.EmitBlock(ThenBlock);
2066   ThenGen(CGF);
2067   CGF.EmitBranch(ContBlock);
2068   // Emit the 'else' code if present.
2069   // There is no need to emit line number for unconditional branch.
2070   (void)ApplyDebugLocation::CreateEmpty(CGF);
2071   CGF.EmitBlock(ElseBlock);
2072   ElseGen(CGF);
2073   // There is no need to emit line number for unconditional branch.
2074   (void)ApplyDebugLocation::CreateEmpty(CGF);
2075   CGF.EmitBranch(ContBlock);
2076   // Emit the continuation block for code after the if.
2077   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2078 }
2079 
2080 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2081                                        llvm::Function *OutlinedFn,
2082                                        ArrayRef<llvm::Value *> CapturedVars,
2083                                        const Expr *IfCond,
2084                                        llvm::Value *NumThreads) {
2085   if (!CGF.HaveInsertPoint())
2086     return;
2087   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2088   auto &M = CGM.getModule();
2089   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2090                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2091     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2092     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2093     llvm::Value *Args[] = {
2094         RTLoc,
2095         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2096         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2097     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2098     RealArgs.append(std::begin(Args), std::end(Args));
2099     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2100 
2101     llvm::FunctionCallee RTLFn =
2102         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2103     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2104   };
2105   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2106                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2107     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2108     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2109     // Build calls:
2110     // __kmpc_serialized_parallel(&Loc, GTid);
2111     llvm::Value *Args[] = {RTLoc, ThreadID};
2112     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2113                             M, OMPRTL___kmpc_serialized_parallel),
2114                         Args);
2115 
2116     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2117     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2118     Address ZeroAddrBound =
2119         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2120                                          /*Name=*/".bound.zero.addr");
2121     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2122     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2123     // ThreadId for serialized parallels is 0.
2124     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2125     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2126     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2127 
2128     // Ensure we do not inline the function. This is trivially true for the ones
2129     // passed to __kmpc_fork_call but the ones called in serialized regions
2130     // could be inlined. This is not a perfect but it is closer to the invariant
2131     // we want, namely, every data environment starts with a new function.
2132     // TODO: We should pass the if condition to the runtime function and do the
2133     //       handling there. Much cleaner code.
2134     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2135     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2136     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2137 
2138     // __kmpc_end_serialized_parallel(&Loc, GTid);
2139     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2140     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2141                             M, OMPRTL___kmpc_end_serialized_parallel),
2142                         EndArgs);
2143   };
2144   if (IfCond) {
2145     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2146   } else {
2147     RegionCodeGenTy ThenRCG(ThenGen);
2148     ThenRCG(CGF);
2149   }
2150 }
2151 
2152 // If we're inside an (outlined) parallel region, use the region info's
2153 // thread-ID variable (it is passed in a first argument of the outlined function
2154 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2155 // regular serial code region, get thread ID by calling kmp_int32
2156 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2157 // return the address of that temp.
2158 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2159                                              SourceLocation Loc) {
2160   if (auto *OMPRegionInfo =
2161           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2162     if (OMPRegionInfo->getThreadIDVariable())
2163       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2164 
2165   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2166   QualType Int32Ty =
2167       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2168   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2169   CGF.EmitStoreOfScalar(ThreadID,
2170                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2171 
2172   return ThreadIDTemp;
2173 }
2174 
2175 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2176     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2177   SmallString<256> Buffer;
2178   llvm::raw_svector_ostream Out(Buffer);
2179   Out << Name;
2180   StringRef RuntimeName = Out.str();
2181   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2182   if (Elem.second) {
2183     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2184            "OMP internal variable has different type than requested");
2185     return &*Elem.second;
2186   }
2187 
2188   return Elem.second = new llvm::GlobalVariable(
2189              CGM.getModule(), Ty, /*IsConstant*/ false,
2190              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2191              Elem.first(), /*InsertBefore=*/nullptr,
2192              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2193 }
2194 
2195 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2196   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2197   std::string Name = getName({Prefix, "var"});
2198   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2199 }
2200 
2201 namespace {
2202 /// Common pre(post)-action for different OpenMP constructs.
2203 class CommonActionTy final : public PrePostActionTy {
2204   llvm::FunctionCallee EnterCallee;
2205   ArrayRef<llvm::Value *> EnterArgs;
2206   llvm::FunctionCallee ExitCallee;
2207   ArrayRef<llvm::Value *> ExitArgs;
2208   bool Conditional;
2209   llvm::BasicBlock *ContBlock = nullptr;
2210 
2211 public:
2212   CommonActionTy(llvm::FunctionCallee EnterCallee,
2213                  ArrayRef<llvm::Value *> EnterArgs,
2214                  llvm::FunctionCallee ExitCallee,
2215                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2216       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2217         ExitArgs(ExitArgs), Conditional(Conditional) {}
2218   void Enter(CodeGenFunction &CGF) override {
2219     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2220     if (Conditional) {
2221       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2222       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2223       ContBlock = CGF.createBasicBlock("omp_if.end");
2224       // Generate the branch (If-stmt)
2225       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2226       CGF.EmitBlock(ThenBlock);
2227     }
2228   }
2229   void Done(CodeGenFunction &CGF) {
2230     // Emit the rest of blocks/branches
2231     CGF.EmitBranch(ContBlock);
2232     CGF.EmitBlock(ContBlock, true);
2233   }
2234   void Exit(CodeGenFunction &CGF) override {
2235     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2236   }
2237 };
2238 } // anonymous namespace
2239 
2240 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2241                                          StringRef CriticalName,
2242                                          const RegionCodeGenTy &CriticalOpGen,
2243                                          SourceLocation Loc, const Expr *Hint) {
2244   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2245   // CriticalOpGen();
2246   // __kmpc_end_critical(ident_t *, gtid, Lock);
2247   // Prepare arguments and build a call to __kmpc_critical
2248   if (!CGF.HaveInsertPoint())
2249     return;
2250   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2251                          getCriticalRegionLock(CriticalName)};
2252   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2253                                                 std::end(Args));
2254   if (Hint) {
2255     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2256         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2257   }
2258   CommonActionTy Action(
2259       OMPBuilder.getOrCreateRuntimeFunction(
2260           CGM.getModule(),
2261           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2262       EnterArgs,
2263       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2264                                             OMPRTL___kmpc_end_critical),
2265       Args);
2266   CriticalOpGen.setAction(Action);
2267   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2268 }
2269 
2270 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2271                                        const RegionCodeGenTy &MasterOpGen,
2272                                        SourceLocation Loc) {
2273   if (!CGF.HaveInsertPoint())
2274     return;
2275   // if(__kmpc_master(ident_t *, gtid)) {
2276   //   MasterOpGen();
2277   //   __kmpc_end_master(ident_t *, gtid);
2278   // }
2279   // Prepare arguments and build a call to __kmpc_master
2280   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2281   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2282                             CGM.getModule(), OMPRTL___kmpc_master),
2283                         Args,
2284                         OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_end_master),
2286                         Args,
2287                         /*Conditional=*/true);
2288   MasterOpGen.setAction(Action);
2289   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2290   Action.Done(CGF);
2291 }
2292 
2293 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2294                                        const RegionCodeGenTy &MaskedOpGen,
2295                                        SourceLocation Loc, const Expr *Filter) {
2296   if (!CGF.HaveInsertPoint())
2297     return;
2298   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2299   //   MaskedOpGen();
2300   //   __kmpc_end_masked(iden_t *, gtid);
2301   // }
2302   // Prepare arguments and build a call to __kmpc_masked
2303   llvm::Value *FilterVal = Filter
2304                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2305                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2306   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2307                          FilterVal};
2308   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2309                             getThreadID(CGF, Loc)};
2310   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2311                             CGM.getModule(), OMPRTL___kmpc_masked),
2312                         Args,
2313                         OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2315                         ArgsEnd,
2316                         /*Conditional=*/true);
2317   MaskedOpGen.setAction(Action);
2318   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2319   Action.Done(CGF);
2320 }
2321 
2322 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2323                                         SourceLocation Loc) {
2324   if (!CGF.HaveInsertPoint())
2325     return;
2326   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2327     OMPBuilder.createTaskyield(CGF.Builder);
2328   } else {
2329     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330     llvm::Value *Args[] = {
2331         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2334                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335                         Args);
2336   }
2337 
2338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339     Region->emitUntiedSwitch(CGF);
2340 }
2341 
2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343                                           const RegionCodeGenTy &TaskgroupOpGen,
2344                                           SourceLocation Loc) {
2345   if (!CGF.HaveInsertPoint())
2346     return;
2347   // __kmpc_taskgroup(ident_t *, gtid);
2348   // TaskgroupOpGen();
2349   // __kmpc_end_taskgroup(ident_t *, gtid);
2350   // Prepare arguments and build a call to __kmpc_taskgroup
2351   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2353                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354                         Args,
2355                         OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357                         Args);
2358   TaskgroupOpGen.setAction(Action);
2359   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360 }
2361 
2362 /// Given an array of pointers to variables, project the address of a
2363 /// given variable.
2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365                                       unsigned Index, const VarDecl *Var) {
2366   // Pull out the pointer to the variable.
2367   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369 
2370   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2371   return Address(
2372       CGF.Builder.CreateBitCast(
2373           Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2374       ElemTy, CGF.getContext().getDeclAlign(Var));
2375 }
2376 
2377 static llvm::Value *emitCopyprivateCopyFunction(
2378     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2379     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2380     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2381     SourceLocation Loc) {
2382   ASTContext &C = CGM.getContext();
2383   // void copy_func(void *LHSArg, void *RHSArg);
2384   FunctionArgList Args;
2385   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2386                            ImplicitParamDecl::Other);
2387   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   Args.push_back(&LHSArg);
2390   Args.push_back(&RHSArg);
2391   const auto &CGFI =
2392       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2393   std::string Name =
2394       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2395   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2396                                     llvm::GlobalValue::InternalLinkage, Name,
2397                                     &CGM.getModule());
2398   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2399   Fn->setDoesNotRecurse();
2400   CodeGenFunction CGF(CGM);
2401   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2402   // Dest = (void*[n])(LHSArg);
2403   // Src = (void*[n])(RHSArg);
2404   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2405                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2406                   ArgsElemType->getPointerTo()),
2407               ArgsElemType, CGF.getPointerAlign());
2408   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2409                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2410                   ArgsElemType->getPointerTo()),
2411               ArgsElemType, CGF.getPointerAlign());
2412   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2413   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2414   // ...
2415   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2416   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2417     const auto *DestVar =
2418         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2419     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2420 
2421     const auto *SrcVar =
2422         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2423     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2424 
2425     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2426     QualType Type = VD->getType();
2427     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2428   }
2429   CGF.FinishFunction();
2430   return Fn;
2431 }
2432 
2433 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2434                                        const RegionCodeGenTy &SingleOpGen,
2435                                        SourceLocation Loc,
2436                                        ArrayRef<const Expr *> CopyprivateVars,
2437                                        ArrayRef<const Expr *> SrcExprs,
2438                                        ArrayRef<const Expr *> DstExprs,
2439                                        ArrayRef<const Expr *> AssignmentOps) {
2440   if (!CGF.HaveInsertPoint())
2441     return;
2442   assert(CopyprivateVars.size() == SrcExprs.size() &&
2443          CopyprivateVars.size() == DstExprs.size() &&
2444          CopyprivateVars.size() == AssignmentOps.size());
2445   ASTContext &C = CGM.getContext();
2446   // int32 did_it = 0;
2447   // if(__kmpc_single(ident_t *, gtid)) {
2448   //   SingleOpGen();
2449   //   __kmpc_end_single(ident_t *, gtid);
2450   //   did_it = 1;
2451   // }
2452   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2453   // <copy_func>, did_it);
2454 
2455   Address DidIt = Address::invalid();
2456   if (!CopyprivateVars.empty()) {
2457     // int32 did_it = 0;
2458     QualType KmpInt32Ty =
2459         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2460     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2461     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2462   }
2463   // Prepare arguments and build a call to __kmpc_single
2464   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_single),
2467                         Args,
2468                         OMPBuilder.getOrCreateRuntimeFunction(
2469                             CGM.getModule(), OMPRTL___kmpc_end_single),
2470                         Args,
2471                         /*Conditional=*/true);
2472   SingleOpGen.setAction(Action);
2473   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2474   if (DidIt.isValid()) {
2475     // did_it = 1;
2476     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2477   }
2478   Action.Done(CGF);
2479   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2480   // <copy_func>, did_it);
2481   if (DidIt.isValid()) {
2482     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2483     QualType CopyprivateArrayTy = C.getConstantArrayType(
2484         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2485         /*IndexTypeQuals=*/0);
2486     // Create a list of all private variables for copyprivate.
2487     Address CopyprivateList =
2488         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2489     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2490       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2491       CGF.Builder.CreateStore(
2492           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2493               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2494               CGF.VoidPtrTy),
2495           Elem);
2496     }
2497     // Build function that copies private values from single region to all other
2498     // threads in the corresponding parallel region.
2499     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2500         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2501         SrcExprs, DstExprs, AssignmentOps, Loc);
2502     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2503     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2504         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2505     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2506     llvm::Value *Args[] = {
2507         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2508         getThreadID(CGF, Loc),        // i32 <gtid>
2509         BufSize,                      // size_t <buf_size>
2510         CL.getPointer(),              // void *<copyprivate list>
2511         CpyFn,                        // void (*) (void *, void *) <copy_func>
2512         DidItVal                      // i32 did_it
2513     };
2514     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2515                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2516                         Args);
2517   }
2518 }
2519 
2520 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2521                                         const RegionCodeGenTy &OrderedOpGen,
2522                                         SourceLocation Loc, bool IsThreads) {
2523   if (!CGF.HaveInsertPoint())
2524     return;
2525   // __kmpc_ordered(ident_t *, gtid);
2526   // OrderedOpGen();
2527   // __kmpc_end_ordered(ident_t *, gtid);
2528   // Prepare arguments and build a call to __kmpc_ordered
2529   if (IsThreads) {
2530     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2531     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2532                               CGM.getModule(), OMPRTL___kmpc_ordered),
2533                           Args,
2534                           OMPBuilder.getOrCreateRuntimeFunction(
2535                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2536                           Args);
2537     OrderedOpGen.setAction(Action);
2538     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2539     return;
2540   }
2541   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2542 }
2543 
2544 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2545   unsigned Flags;
2546   if (Kind == OMPD_for)
2547     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2548   else if (Kind == OMPD_sections)
2549     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2550   else if (Kind == OMPD_single)
2551     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2552   else if (Kind == OMPD_barrier)
2553     Flags = OMP_IDENT_BARRIER_EXPL;
2554   else
2555     Flags = OMP_IDENT_BARRIER_IMPL;
2556   return Flags;
2557 }
2558 
2559 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2560     CodeGenFunction &CGF, const OMPLoopDirective &S,
2561     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2562   // Check if the loop directive is actually a doacross loop directive. In this
2563   // case choose static, 1 schedule.
2564   if (llvm::any_of(
2565           S.getClausesOfKind<OMPOrderedClause>(),
2566           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2567     ScheduleKind = OMPC_SCHEDULE_static;
2568     // Chunk size is 1 in this case.
2569     llvm::APInt ChunkSize(32, 1);
2570     ChunkExpr = IntegerLiteral::Create(
2571         CGF.getContext(), ChunkSize,
2572         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2573         SourceLocation());
2574   }
2575 }
2576 
2577 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2578                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2579                                       bool ForceSimpleCall) {
2580   // Check if we should use the OMPBuilder
2581   auto *OMPRegionInfo =
2582       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2583   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2584     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2585         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2586     return;
2587   }
2588 
2589   if (!CGF.HaveInsertPoint())
2590     return;
2591   // Build call __kmpc_cancel_barrier(loc, thread_id);
2592   // Build call __kmpc_barrier(loc, thread_id);
2593   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2594   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2595   // thread_id);
2596   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2597                          getThreadID(CGF, Loc)};
2598   if (OMPRegionInfo) {
2599     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2600       llvm::Value *Result = CGF.EmitRuntimeCall(
2601           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2602                                                 OMPRTL___kmpc_cancel_barrier),
2603           Args);
2604       if (EmitChecks) {
2605         // if (__kmpc_cancel_barrier()) {
2606         //   exit from construct;
2607         // }
2608         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2609         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2610         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2611         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2612         CGF.EmitBlock(ExitBB);
2613         //   exit from construct;
2614         CodeGenFunction::JumpDest CancelDestination =
2615             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2616         CGF.EmitBranchThroughCleanup(CancelDestination);
2617         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2618       }
2619       return;
2620     }
2621   }
2622   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2623                           CGM.getModule(), OMPRTL___kmpc_barrier),
2624                       Args);
2625 }
2626 
2627 /// Map the OpenMP loop schedule to the runtime enumeration.
2628 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2629                                           bool Chunked, bool Ordered) {
2630   switch (ScheduleKind) {
2631   case OMPC_SCHEDULE_static:
2632     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2633                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2634   case OMPC_SCHEDULE_dynamic:
2635     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2636   case OMPC_SCHEDULE_guided:
2637     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2638   case OMPC_SCHEDULE_runtime:
2639     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2640   case OMPC_SCHEDULE_auto:
2641     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2642   case OMPC_SCHEDULE_unknown:
2643     assert(!Chunked && "chunk was specified but schedule kind not known");
2644     return Ordered ? OMP_ord_static : OMP_sch_static;
2645   }
2646   llvm_unreachable("Unexpected runtime schedule");
2647 }
2648 
2649 /// Map the OpenMP distribute schedule to the runtime enumeration.
2650 static OpenMPSchedType
2651 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2652   // only static is allowed for dist_schedule
2653   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2654 }
2655 
2656 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2657                                          bool Chunked) const {
2658   OpenMPSchedType Schedule =
2659       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2660   return Schedule == OMP_sch_static;
2661 }
2662 
2663 bool CGOpenMPRuntime::isStaticNonchunked(
2664     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2665   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2666   return Schedule == OMP_dist_sch_static;
2667 }
2668 
2669 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2670                                       bool Chunked) const {
2671   OpenMPSchedType Schedule =
2672       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2673   return Schedule == OMP_sch_static_chunked;
2674 }
2675 
2676 bool CGOpenMPRuntime::isStaticChunked(
2677     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2678   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2679   return Schedule == OMP_dist_sch_static_chunked;
2680 }
2681 
2682 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2683   OpenMPSchedType Schedule =
2684       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2685   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2686   return Schedule != OMP_sch_static;
2687 }
2688 
2689 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2690                                   OpenMPScheduleClauseModifier M1,
2691                                   OpenMPScheduleClauseModifier M2) {
2692   int Modifier = 0;
2693   switch (M1) {
2694   case OMPC_SCHEDULE_MODIFIER_monotonic:
2695     Modifier = OMP_sch_modifier_monotonic;
2696     break;
2697   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2698     Modifier = OMP_sch_modifier_nonmonotonic;
2699     break;
2700   case OMPC_SCHEDULE_MODIFIER_simd:
2701     if (Schedule == OMP_sch_static_chunked)
2702       Schedule = OMP_sch_static_balanced_chunked;
2703     break;
2704   case OMPC_SCHEDULE_MODIFIER_last:
2705   case OMPC_SCHEDULE_MODIFIER_unknown:
2706     break;
2707   }
2708   switch (M2) {
2709   case OMPC_SCHEDULE_MODIFIER_monotonic:
2710     Modifier = OMP_sch_modifier_monotonic;
2711     break;
2712   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2713     Modifier = OMP_sch_modifier_nonmonotonic;
2714     break;
2715   case OMPC_SCHEDULE_MODIFIER_simd:
2716     if (Schedule == OMP_sch_static_chunked)
2717       Schedule = OMP_sch_static_balanced_chunked;
2718     break;
2719   case OMPC_SCHEDULE_MODIFIER_last:
2720   case OMPC_SCHEDULE_MODIFIER_unknown:
2721     break;
2722   }
2723   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2724   // If the static schedule kind is specified or if the ordered clause is
2725   // specified, and if the nonmonotonic modifier is not specified, the effect is
2726   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2727   // modifier is specified, the effect is as if the nonmonotonic modifier is
2728   // specified.
2729   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2730     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2731           Schedule == OMP_sch_static_balanced_chunked ||
2732           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2733           Schedule == OMP_dist_sch_static_chunked ||
2734           Schedule == OMP_dist_sch_static))
2735       Modifier = OMP_sch_modifier_nonmonotonic;
2736   }
2737   return Schedule | Modifier;
2738 }
2739 
2740 void CGOpenMPRuntime::emitForDispatchInit(
2741     CodeGenFunction &CGF, SourceLocation Loc,
2742     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2743     bool Ordered, const DispatchRTInput &DispatchValues) {
2744   if (!CGF.HaveInsertPoint())
2745     return;
2746   OpenMPSchedType Schedule = getRuntimeSchedule(
2747       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2748   assert(Ordered ||
2749          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2750           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2751           Schedule != OMP_sch_static_balanced_chunked));
2752   // Call __kmpc_dispatch_init(
2753   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2754   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2755   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2756 
2757   // If the Chunk was not specified in the clause - use default value 1.
2758   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2759                                             : CGF.Builder.getIntN(IVSize, 1);
2760   llvm::Value *Args[] = {
2761       emitUpdateLocation(CGF, Loc),
2762       getThreadID(CGF, Loc),
2763       CGF.Builder.getInt32(addMonoNonMonoModifier(
2764           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2765       DispatchValues.LB,                                     // Lower
2766       DispatchValues.UB,                                     // Upper
2767       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2768       Chunk                                                  // Chunk
2769   };
2770   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2771 }
2772 
2773 static void emitForStaticInitCall(
2774     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2775     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2776     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2777     const CGOpenMPRuntime::StaticRTInput &Values) {
2778   if (!CGF.HaveInsertPoint())
2779     return;
2780 
2781   assert(!Values.Ordered);
2782   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2783          Schedule == OMP_sch_static_balanced_chunked ||
2784          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2785          Schedule == OMP_dist_sch_static ||
2786          Schedule == OMP_dist_sch_static_chunked);
2787 
2788   // Call __kmpc_for_static_init(
2789   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2790   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2791   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2792   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2793   llvm::Value *Chunk = Values.Chunk;
2794   if (Chunk == nullptr) {
2795     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2796             Schedule == OMP_dist_sch_static) &&
2797            "expected static non-chunked schedule");
2798     // If the Chunk was not specified in the clause - use default value 1.
2799     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2800   } else {
2801     assert((Schedule == OMP_sch_static_chunked ||
2802             Schedule == OMP_sch_static_balanced_chunked ||
2803             Schedule == OMP_ord_static_chunked ||
2804             Schedule == OMP_dist_sch_static_chunked) &&
2805            "expected static chunked schedule");
2806   }
2807   llvm::Value *Args[] = {
2808       UpdateLocation,
2809       ThreadId,
2810       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2811                                                   M2)), // Schedule type
2812       Values.IL.getPointer(),                           // &isLastIter
2813       Values.LB.getPointer(),                           // &LB
2814       Values.UB.getPointer(),                           // &UB
2815       Values.ST.getPointer(),                           // &Stride
2816       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2817       Chunk                                             // Chunk
2818   };
2819   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2820 }
2821 
2822 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2823                                         SourceLocation Loc,
2824                                         OpenMPDirectiveKind DKind,
2825                                         const OpenMPScheduleTy &ScheduleKind,
2826                                         const StaticRTInput &Values) {
2827   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2828       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2829   assert(isOpenMPWorksharingDirective(DKind) &&
2830          "Expected loop-based or sections-based directive.");
2831   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2832                                              isOpenMPLoopDirective(DKind)
2833                                                  ? OMP_IDENT_WORK_LOOP
2834                                                  : OMP_IDENT_WORK_SECTIONS);
2835   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2836   llvm::FunctionCallee StaticInitFunction =
2837       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2838   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2839   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2840                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2841 }
2842 
2843 void CGOpenMPRuntime::emitDistributeStaticInit(
2844     CodeGenFunction &CGF, SourceLocation Loc,
2845     OpenMPDistScheduleClauseKind SchedKind,
2846     const CGOpenMPRuntime::StaticRTInput &Values) {
2847   OpenMPSchedType ScheduleNum =
2848       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2849   llvm::Value *UpdatedLocation =
2850       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2851   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2852   llvm::FunctionCallee StaticInitFunction;
2853   bool isGPUDistribute =
2854       CGM.getLangOpts().OpenMPIsDevice &&
2855       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2856   StaticInitFunction = createForStaticInitFunction(
2857       Values.IVSize, Values.IVSigned, isGPUDistribute);
2858 
2859   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2860                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2861                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2862 }
2863 
2864 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2865                                           SourceLocation Loc,
2866                                           OpenMPDirectiveKind DKind) {
2867   if (!CGF.HaveInsertPoint())
2868     return;
2869   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2870   llvm::Value *Args[] = {
2871       emitUpdateLocation(CGF, Loc,
2872                          isOpenMPDistributeDirective(DKind)
2873                              ? OMP_IDENT_WORK_DISTRIBUTE
2874                              : isOpenMPLoopDirective(DKind)
2875                                    ? OMP_IDENT_WORK_LOOP
2876                                    : OMP_IDENT_WORK_SECTIONS),
2877       getThreadID(CGF, Loc)};
2878   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2879   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2880       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2881     CGF.EmitRuntimeCall(
2882         OMPBuilder.getOrCreateRuntimeFunction(
2883             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2884         Args);
2885   else
2886     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2887                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2888                         Args);
2889 }
2890 
2891 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2892                                                  SourceLocation Loc,
2893                                                  unsigned IVSize,
2894                                                  bool IVSigned) {
2895   if (!CGF.HaveInsertPoint())
2896     return;
2897   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2898   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2899   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2900 }
2901 
2902 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2903                                           SourceLocation Loc, unsigned IVSize,
2904                                           bool IVSigned, Address IL,
2905                                           Address LB, Address UB,
2906                                           Address ST) {
2907   // Call __kmpc_dispatch_next(
2908   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2909   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2910   //          kmp_int[32|64] *p_stride);
2911   llvm::Value *Args[] = {
2912       emitUpdateLocation(CGF, Loc),
2913       getThreadID(CGF, Loc),
2914       IL.getPointer(), // &isLastIter
2915       LB.getPointer(), // &Lower
2916       UB.getPointer(), // &Upper
2917       ST.getPointer()  // &Stride
2918   };
2919   llvm::Value *Call =
2920       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2921   return CGF.EmitScalarConversion(
2922       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2923       CGF.getContext().BoolTy, Loc);
2924 }
2925 
2926 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2927                                            llvm::Value *NumThreads,
2928                                            SourceLocation Loc) {
2929   if (!CGF.HaveInsertPoint())
2930     return;
2931   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2932   llvm::Value *Args[] = {
2933       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2934       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2935   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2936                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2937                       Args);
2938 }
2939 
2940 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2941                                          ProcBindKind ProcBind,
2942                                          SourceLocation Loc) {
2943   if (!CGF.HaveInsertPoint())
2944     return;
2945   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2946   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2947   llvm::Value *Args[] = {
2948       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2949       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2950   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2951                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2952                       Args);
2953 }
2954 
2955 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2956                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2957   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2958     OMPBuilder.createFlush(CGF.Builder);
2959   } else {
2960     if (!CGF.HaveInsertPoint())
2961       return;
2962     // Build call void __kmpc_flush(ident_t *loc)
2963     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2964                             CGM.getModule(), OMPRTL___kmpc_flush),
2965                         emitUpdateLocation(CGF, Loc));
2966   }
2967 }
2968 
2969 namespace {
2970 /// Indexes of fields for type kmp_task_t.
2971 enum KmpTaskTFields {
2972   /// List of shared variables.
2973   KmpTaskTShareds,
2974   /// Task routine.
2975   KmpTaskTRoutine,
2976   /// Partition id for the untied tasks.
2977   KmpTaskTPartId,
2978   /// Function with call of destructors for private variables.
2979   Data1,
2980   /// Task priority.
2981   Data2,
2982   /// (Taskloops only) Lower bound.
2983   KmpTaskTLowerBound,
2984   /// (Taskloops only) Upper bound.
2985   KmpTaskTUpperBound,
2986   /// (Taskloops only) Stride.
2987   KmpTaskTStride,
2988   /// (Taskloops only) Is last iteration flag.
2989   KmpTaskTLastIter,
2990   /// (Taskloops only) Reduction data.
2991   KmpTaskTReductions,
2992 };
2993 } // anonymous namespace
2994 
2995 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2996   return OffloadEntriesTargetRegion.empty() &&
2997          OffloadEntriesDeviceGlobalVar.empty();
2998 }
2999 
3000 /// Initialize target region entry.
3001 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3002     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3003                                     StringRef ParentName, unsigned LineNum,
3004                                     unsigned Order) {
3005   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3006                                              "only required for the device "
3007                                              "code generation.");
3008   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3009       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3010                                    OMPTargetRegionEntryTargetRegion);
3011   ++OffloadingEntriesNum;
3012 }
3013 
3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3015     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3016                                   StringRef ParentName, unsigned LineNum,
3017                                   llvm::Constant *Addr, llvm::Constant *ID,
3018                                   OMPTargetRegionEntryKind Flags) {
3019   // If we are emitting code for a target, the entry is already initialized,
3020   // only has to be registered.
3021   if (CGM.getLangOpts().OpenMPIsDevice) {
3022     // This could happen if the device compilation is invoked standalone.
3023     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3024       return;
3025     auto &Entry =
3026         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3027     Entry.setAddress(Addr);
3028     Entry.setID(ID);
3029     Entry.setFlags(Flags);
3030   } else {
3031     if (Flags ==
3032             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3033         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3034                                  /*IgnoreAddressId*/ true))
3035       return;
3036     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3037            "Target region entry already registered!");
3038     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3039     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3040     ++OffloadingEntriesNum;
3041   }
3042 }
3043 
3044 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3045     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3046     bool IgnoreAddressId) const {
3047   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3048   if (PerDevice == OffloadEntriesTargetRegion.end())
3049     return false;
3050   auto PerFile = PerDevice->second.find(FileID);
3051   if (PerFile == PerDevice->second.end())
3052     return false;
3053   auto PerParentName = PerFile->second.find(ParentName);
3054   if (PerParentName == PerFile->second.end())
3055     return false;
3056   auto PerLine = PerParentName->second.find(LineNum);
3057   if (PerLine == PerParentName->second.end())
3058     return false;
3059   // Fail if this entry is already registered.
3060   if (!IgnoreAddressId &&
3061       (PerLine->second.getAddress() || PerLine->second.getID()))
3062     return false;
3063   return true;
3064 }
3065 
3066 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3067     const OffloadTargetRegionEntryInfoActTy &Action) {
3068   // Scan all target region entries and perform the provided action.
3069   for (const auto &D : OffloadEntriesTargetRegion)
3070     for (const auto &F : D.second)
3071       for (const auto &P : F.second)
3072         for (const auto &L : P.second)
3073           Action(D.first, F.first, P.first(), L.first, L.second);
3074 }
3075 
3076 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3077     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3078                                        OMPTargetGlobalVarEntryKind Flags,
3079                                        unsigned Order) {
3080   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3081                                              "only required for the device "
3082                                              "code generation.");
3083   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3084   ++OffloadingEntriesNum;
3085 }
3086 
3087 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3088     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3089                                      CharUnits VarSize,
3090                                      OMPTargetGlobalVarEntryKind Flags,
3091                                      llvm::GlobalValue::LinkageTypes Linkage) {
3092   if (CGM.getLangOpts().OpenMPIsDevice) {
3093     // This could happen if the device compilation is invoked standalone.
3094     if (!hasDeviceGlobalVarEntryInfo(VarName))
3095       return;
3096     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3097     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3098       if (Entry.getVarSize().isZero()) {
3099         Entry.setVarSize(VarSize);
3100         Entry.setLinkage(Linkage);
3101       }
3102       return;
3103     }
3104     Entry.setVarSize(VarSize);
3105     Entry.setLinkage(Linkage);
3106     Entry.setAddress(Addr);
3107   } else {
3108     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3109       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3110       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3111              "Entry not initialized!");
3112       if (Entry.getVarSize().isZero()) {
3113         Entry.setVarSize(VarSize);
3114         Entry.setLinkage(Linkage);
3115       }
3116       return;
3117     }
3118     OffloadEntriesDeviceGlobalVar.try_emplace(
3119         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3120     ++OffloadingEntriesNum;
3121   }
3122 }
3123 
3124 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3125     actOnDeviceGlobalVarEntriesInfo(
3126         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3127   // Scan all target region entries and perform the provided action.
3128   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3129     Action(E.getKey(), E.getValue());
3130 }
3131 
3132 void CGOpenMPRuntime::createOffloadEntry(
3133     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3134     llvm::GlobalValue::LinkageTypes Linkage) {
3135   StringRef Name = Addr->getName();
3136   llvm::Module &M = CGM.getModule();
3137   llvm::LLVMContext &C = M.getContext();
3138 
3139   // Create constant string with the name.
3140   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3141 
3142   std::string StringName = getName({"omp_offloading", "entry_name"});
3143   auto *Str = new llvm::GlobalVariable(
3144       M, StrPtrInit->getType(), /*isConstant=*/true,
3145       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3146   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3147 
3148   llvm::Constant *Data[] = {
3149       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3150       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3151       llvm::ConstantInt::get(CGM.SizeTy, Size),
3152       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3153       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3154   std::string EntryName = getName({"omp_offloading", "entry", ""});
3155   llvm::GlobalVariable *Entry = createGlobalStruct(
3156       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3157       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3158 
3159   // The entry has to be created in the section the linker expects it to be.
3160   Entry->setSection("omp_offloading_entries");
3161 }
3162 
3163 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3164   // Emit the offloading entries and metadata so that the device codegen side
3165   // can easily figure out what to emit. The produced metadata looks like
3166   // this:
3167   //
3168   // !omp_offload.info = !{!1, ...}
3169   //
3170   // Right now we only generate metadata for function that contain target
3171   // regions.
3172 
3173   // If we are in simd mode or there are no entries, we don't need to do
3174   // anything.
3175   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3176     return;
3177 
3178   llvm::Module &M = CGM.getModule();
3179   llvm::LLVMContext &C = M.getContext();
3180   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3181                          SourceLocation, StringRef>,
3182               16>
3183       OrderedEntries(OffloadEntriesInfoManager.size());
3184   llvm::SmallVector<StringRef, 16> ParentFunctions(
3185       OffloadEntriesInfoManager.size());
3186 
3187   // Auxiliary methods to create metadata values and strings.
3188   auto &&GetMDInt = [this](unsigned V) {
3189     return llvm::ConstantAsMetadata::get(
3190         llvm::ConstantInt::get(CGM.Int32Ty, V));
3191   };
3192 
3193   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3194 
3195   // Create the offloading info metadata node.
3196   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3197 
3198   // Create function that emits metadata for each target region entry;
3199   auto &&TargetRegionMetadataEmitter =
3200       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3201        &GetMDString](
3202           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3203           unsigned Line,
3204           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3205         // Generate metadata for target regions. Each entry of this metadata
3206         // contains:
3207         // - Entry 0 -> Kind of this type of metadata (0).
3208         // - Entry 1 -> Device ID of the file where the entry was identified.
3209         // - Entry 2 -> File ID of the file where the entry was identified.
3210         // - Entry 3 -> Mangled name of the function where the entry was
3211         // identified.
3212         // - Entry 4 -> Line in the file where the entry was identified.
3213         // - Entry 5 -> Order the entry was created.
3214         // The first element of the metadata node is the kind.
3215         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3216                                  GetMDInt(FileID),      GetMDString(ParentName),
3217                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3218 
3219         SourceLocation Loc;
3220         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3221                   E = CGM.getContext().getSourceManager().fileinfo_end();
3222              I != E; ++I) {
3223           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3224               I->getFirst()->getUniqueID().getFile() == FileID) {
3225             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3226                 I->getFirst(), Line, 1);
3227             break;
3228           }
3229         }
3230         // Save this entry in the right position of the ordered entries array.
3231         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3232         ParentFunctions[E.getOrder()] = ParentName;
3233 
3234         // Add metadata to the named metadata node.
3235         MD->addOperand(llvm::MDNode::get(C, Ops));
3236       };
3237 
3238   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3239       TargetRegionMetadataEmitter);
3240 
3241   // Create function that emits metadata for each device global variable entry;
3242   auto &&DeviceGlobalVarMetadataEmitter =
3243       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3244        MD](StringRef MangledName,
3245            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3246                &E) {
3247         // Generate metadata for global variables. Each entry of this metadata
3248         // contains:
3249         // - Entry 0 -> Kind of this type of metadata (1).
3250         // - Entry 1 -> Mangled name of the variable.
3251         // - Entry 2 -> Declare target kind.
3252         // - Entry 3 -> Order the entry was created.
3253         // The first element of the metadata node is the kind.
3254         llvm::Metadata *Ops[] = {
3255             GetMDInt(E.getKind()), GetMDString(MangledName),
3256             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3257 
3258         // Save this entry in the right position of the ordered entries array.
3259         OrderedEntries[E.getOrder()] =
3260             std::make_tuple(&E, SourceLocation(), MangledName);
3261 
3262         // Add metadata to the named metadata node.
3263         MD->addOperand(llvm::MDNode::get(C, Ops));
3264       };
3265 
3266   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3267       DeviceGlobalVarMetadataEmitter);
3268 
3269   for (const auto &E : OrderedEntries) {
3270     assert(std::get<0>(E) && "All ordered entries must exist!");
3271     if (const auto *CE =
3272             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3273                 std::get<0>(E))) {
3274       if (!CE->getID() || !CE->getAddress()) {
3275         // Do not blame the entry if the parent funtion is not emitted.
3276         StringRef FnName = ParentFunctions[CE->getOrder()];
3277         if (!CGM.GetGlobalValue(FnName))
3278           continue;
3279         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3280             DiagnosticsEngine::Error,
3281             "Offloading entry for target region in %0 is incorrect: either the "
3282             "address or the ID is invalid.");
3283         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3284         continue;
3285       }
3286       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3287                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3288     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3289                                              OffloadEntryInfoDeviceGlobalVar>(
3290                    std::get<0>(E))) {
3291       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3292           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3293               CE->getFlags());
3294       switch (Flags) {
3295       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3296         if (CGM.getLangOpts().OpenMPIsDevice &&
3297             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3298           continue;
3299         if (!CE->getAddress()) {
3300           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3301               DiagnosticsEngine::Error, "Offloading entry for declare target "
3302                                         "variable %0 is incorrect: the "
3303                                         "address is invalid.");
3304           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3305           continue;
3306         }
3307         // The vaiable has no definition - no need to add the entry.
3308         if (CE->getVarSize().isZero())
3309           continue;
3310         break;
3311       }
3312       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3313         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3314                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3315                "Declaret target link address is set.");
3316         if (CGM.getLangOpts().OpenMPIsDevice)
3317           continue;
3318         if (!CE->getAddress()) {
3319           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3320               DiagnosticsEngine::Error,
3321               "Offloading entry for declare target variable is incorrect: the "
3322               "address is invalid.");
3323           CGM.getDiags().Report(DiagID);
3324           continue;
3325         }
3326         break;
3327       }
3328 
3329       // Hidden or internal symbols on the device are not externally visible. We
3330       // should not attempt to register them by creating an offloading entry.
3331       if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3332         if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3333           continue;
3334 
3335       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3336                          CE->getVarSize().getQuantity(), Flags,
3337                          CE->getLinkage());
3338     } else {
3339       llvm_unreachable("Unsupported entry kind.");
3340     }
3341   }
3342 }
3343 
3344 /// Loads all the offload entries information from the host IR
3345 /// metadata.
3346 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3347   // If we are in target mode, load the metadata from the host IR. This code has
3348   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3349 
3350   if (!CGM.getLangOpts().OpenMPIsDevice)
3351     return;
3352 
3353   if (CGM.getLangOpts().OMPHostIRFile.empty())
3354     return;
3355 
3356   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3357   if (auto EC = Buf.getError()) {
3358     CGM.getDiags().Report(diag::err_cannot_open_file)
3359         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3360     return;
3361   }
3362 
3363   llvm::LLVMContext C;
3364   auto ME = expectedToErrorOrAndEmitErrors(
3365       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3366 
3367   if (auto EC = ME.getError()) {
3368     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3369         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3370     CGM.getDiags().Report(DiagID)
3371         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3372     return;
3373   }
3374 
3375   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3376   if (!MD)
3377     return;
3378 
3379   for (llvm::MDNode *MN : MD->operands()) {
3380     auto &&GetMDInt = [MN](unsigned Idx) {
3381       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3382       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3383     };
3384 
3385     auto &&GetMDString = [MN](unsigned Idx) {
3386       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3387       return V->getString();
3388     };
3389 
3390     switch (GetMDInt(0)) {
3391     default:
3392       llvm_unreachable("Unexpected metadata!");
3393       break;
3394     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3395         OffloadingEntryInfoTargetRegion:
3396       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3397           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3398           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3399           /*Order=*/GetMDInt(5));
3400       break;
3401     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3402         OffloadingEntryInfoDeviceGlobalVar:
3403       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3404           /*MangledName=*/GetMDString(1),
3405           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3406               /*Flags=*/GetMDInt(2)),
3407           /*Order=*/GetMDInt(3));
3408       break;
3409     }
3410   }
3411 }
3412 
3413 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3414   if (!KmpRoutineEntryPtrTy) {
3415     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3416     ASTContext &C = CGM.getContext();
3417     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3418     FunctionProtoType::ExtProtoInfo EPI;
3419     KmpRoutineEntryPtrQTy = C.getPointerType(
3420         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3421     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3422   }
3423 }
3424 
3425 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3426   // Make sure the type of the entry is already created. This is the type we
3427   // have to create:
3428   // struct __tgt_offload_entry{
3429   //   void      *addr;       // Pointer to the offload entry info.
3430   //                          // (function or global)
3431   //   char      *name;       // Name of the function or global.
3432   //   size_t     size;       // Size of the entry info (0 if it a function).
3433   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3434   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3435   // };
3436   if (TgtOffloadEntryQTy.isNull()) {
3437     ASTContext &C = CGM.getContext();
3438     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3439     RD->startDefinition();
3440     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3441     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3442     addFieldToRecordDecl(C, RD, C.getSizeType());
3443     addFieldToRecordDecl(
3444         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3445     addFieldToRecordDecl(
3446         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3447     RD->completeDefinition();
3448     RD->addAttr(PackedAttr::CreateImplicit(C));
3449     TgtOffloadEntryQTy = C.getRecordType(RD);
3450   }
3451   return TgtOffloadEntryQTy;
3452 }
3453 
3454 namespace {
3455 struct PrivateHelpersTy {
3456   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3457                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3458       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3459         PrivateElemInit(PrivateElemInit) {}
3460   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3461   const Expr *OriginalRef = nullptr;
3462   const VarDecl *Original = nullptr;
3463   const VarDecl *PrivateCopy = nullptr;
3464   const VarDecl *PrivateElemInit = nullptr;
3465   bool isLocalPrivate() const {
3466     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3467   }
3468 };
3469 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3470 } // anonymous namespace
3471 
3472 static bool isAllocatableDecl(const VarDecl *VD) {
3473   const VarDecl *CVD = VD->getCanonicalDecl();
3474   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3475     return false;
3476   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3477   // Use the default allocation.
3478   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3479            !AA->getAllocator());
3480 }
3481 
3482 static RecordDecl *
3483 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3484   if (!Privates.empty()) {
3485     ASTContext &C = CGM.getContext();
3486     // Build struct .kmp_privates_t. {
3487     //         /*  private vars  */
3488     //       };
3489     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3490     RD->startDefinition();
3491     for (const auto &Pair : Privates) {
3492       const VarDecl *VD = Pair.second.Original;
3493       QualType Type = VD->getType().getNonReferenceType();
3494       // If the private variable is a local variable with lvalue ref type,
3495       // allocate the pointer instead of the pointee type.
3496       if (Pair.second.isLocalPrivate()) {
3497         if (VD->getType()->isLValueReferenceType())
3498           Type = C.getPointerType(Type);
3499         if (isAllocatableDecl(VD))
3500           Type = C.getPointerType(Type);
3501       }
3502       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3503       if (VD->hasAttrs()) {
3504         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3505              E(VD->getAttrs().end());
3506              I != E; ++I)
3507           FD->addAttr(*I);
3508       }
3509     }
3510     RD->completeDefinition();
3511     return RD;
3512   }
3513   return nullptr;
3514 }
3515 
3516 static RecordDecl *
3517 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3518                          QualType KmpInt32Ty,
3519                          QualType KmpRoutineEntryPointerQTy) {
3520   ASTContext &C = CGM.getContext();
3521   // Build struct kmp_task_t {
3522   //         void *              shareds;
3523   //         kmp_routine_entry_t routine;
3524   //         kmp_int32           part_id;
3525   //         kmp_cmplrdata_t data1;
3526   //         kmp_cmplrdata_t data2;
3527   // For taskloops additional fields:
3528   //         kmp_uint64          lb;
3529   //         kmp_uint64          ub;
3530   //         kmp_int64           st;
3531   //         kmp_int32           liter;
3532   //         void *              reductions;
3533   //       };
3534   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3535   UD->startDefinition();
3536   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3537   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3538   UD->completeDefinition();
3539   QualType KmpCmplrdataTy = C.getRecordType(UD);
3540   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3541   RD->startDefinition();
3542   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3543   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3544   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3545   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3546   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3547   if (isOpenMPTaskLoopDirective(Kind)) {
3548     QualType KmpUInt64Ty =
3549         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3550     QualType KmpInt64Ty =
3551         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3552     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3553     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3554     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3555     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3556     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3557   }
3558   RD->completeDefinition();
3559   return RD;
3560 }
3561 
3562 static RecordDecl *
3563 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3564                                      ArrayRef<PrivateDataTy> Privates) {
3565   ASTContext &C = CGM.getContext();
3566   // Build struct kmp_task_t_with_privates {
3567   //         kmp_task_t task_data;
3568   //         .kmp_privates_t. privates;
3569   //       };
3570   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3571   RD->startDefinition();
3572   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3573   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3574     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3575   RD->completeDefinition();
3576   return RD;
3577 }
3578 
3579 /// Emit a proxy function which accepts kmp_task_t as the second
3580 /// argument.
3581 /// \code
3582 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3583 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3584 ///   For taskloops:
3585 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3586 ///   tt->reductions, tt->shareds);
3587 ///   return 0;
3588 /// }
3589 /// \endcode
3590 static llvm::Function *
3591 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3592                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3593                       QualType KmpTaskTWithPrivatesPtrQTy,
3594                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3595                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3596                       llvm::Value *TaskPrivatesMap) {
3597   ASTContext &C = CGM.getContext();
3598   FunctionArgList Args;
3599   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3600                             ImplicitParamDecl::Other);
3601   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3602                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3603                                 ImplicitParamDecl::Other);
3604   Args.push_back(&GtidArg);
3605   Args.push_back(&TaskTypeArg);
3606   const auto &TaskEntryFnInfo =
3607       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3608   llvm::FunctionType *TaskEntryTy =
3609       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3610   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3611   auto *TaskEntry = llvm::Function::Create(
3612       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3613   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3614   TaskEntry->setDoesNotRecurse();
3615   CodeGenFunction CGF(CGM);
3616   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3617                     Loc, Loc);
3618 
3619   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3620   // tt,
3621   // For taskloops:
3622   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3623   // tt->task_data.shareds);
3624   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3625       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3626   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3627       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3628       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3629   const auto *KmpTaskTWithPrivatesQTyRD =
3630       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3631   LValue Base =
3632       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3633   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3634   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3635   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3636   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3637 
3638   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3639   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3640   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3641       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3642       CGF.ConvertTypeForMem(SharedsPtrTy));
3643 
3644   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3645   llvm::Value *PrivatesParam;
3646   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3647     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3648     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3649         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3650   } else {
3651     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3652   }
3653 
3654   llvm::Value *CommonArgs[] = {
3655       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3656       CGF.Builder
3657           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3658                                                CGF.VoidPtrTy, CGF.Int8Ty)
3659           .getPointer()};
3660   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3661                                           std::end(CommonArgs));
3662   if (isOpenMPTaskLoopDirective(Kind)) {
3663     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3664     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3665     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3666     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3667     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3668     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3669     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3670     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3671     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3672     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3673     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3674     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3675     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3676     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3677     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3678     CallArgs.push_back(LBParam);
3679     CallArgs.push_back(UBParam);
3680     CallArgs.push_back(StParam);
3681     CallArgs.push_back(LIParam);
3682     CallArgs.push_back(RParam);
3683   }
3684   CallArgs.push_back(SharedsParam);
3685 
3686   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3687                                                   CallArgs);
3688   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3689                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3690   CGF.FinishFunction();
3691   return TaskEntry;
3692 }
3693 
3694 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3695                                             SourceLocation Loc,
3696                                             QualType KmpInt32Ty,
3697                                             QualType KmpTaskTWithPrivatesPtrQTy,
3698                                             QualType KmpTaskTWithPrivatesQTy) {
3699   ASTContext &C = CGM.getContext();
3700   FunctionArgList Args;
3701   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3702                             ImplicitParamDecl::Other);
3703   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3704                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3705                                 ImplicitParamDecl::Other);
3706   Args.push_back(&GtidArg);
3707   Args.push_back(&TaskTypeArg);
3708   const auto &DestructorFnInfo =
3709       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3710   llvm::FunctionType *DestructorFnTy =
3711       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3712   std::string Name =
3713       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3714   auto *DestructorFn =
3715       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3716                              Name, &CGM.getModule());
3717   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3718                                     DestructorFnInfo);
3719   DestructorFn->setDoesNotRecurse();
3720   CodeGenFunction CGF(CGM);
3721   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3722                     Args, Loc, Loc);
3723 
3724   LValue Base = CGF.EmitLoadOfPointerLValue(
3725       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3726       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3727   const auto *KmpTaskTWithPrivatesQTyRD =
3728       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3729   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3730   Base = CGF.EmitLValueForField(Base, *FI);
3731   for (const auto *Field :
3732        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3733     if (QualType::DestructionKind DtorKind =
3734             Field->getType().isDestructedType()) {
3735       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3736       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3737     }
3738   }
3739   CGF.FinishFunction();
3740   return DestructorFn;
3741 }
3742 
3743 /// Emit a privates mapping function for correct handling of private and
3744 /// firstprivate variables.
3745 /// \code
3746 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3747 /// **noalias priv1,...,  <tyn> **noalias privn) {
3748 ///   *priv1 = &.privates.priv1;
3749 ///   ...;
3750 ///   *privn = &.privates.privn;
3751 /// }
3752 /// \endcode
3753 static llvm::Value *
3754 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3755                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3756                                ArrayRef<PrivateDataTy> Privates) {
3757   ASTContext &C = CGM.getContext();
3758   FunctionArgList Args;
3759   ImplicitParamDecl TaskPrivatesArg(
3760       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3761       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3762       ImplicitParamDecl::Other);
3763   Args.push_back(&TaskPrivatesArg);
3764   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3765   unsigned Counter = 1;
3766   for (const Expr *E : Data.PrivateVars) {
3767     Args.push_back(ImplicitParamDecl::Create(
3768         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3769         C.getPointerType(C.getPointerType(E->getType()))
3770             .withConst()
3771             .withRestrict(),
3772         ImplicitParamDecl::Other));
3773     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3774     PrivateVarsPos[VD] = Counter;
3775     ++Counter;
3776   }
3777   for (const Expr *E : Data.FirstprivateVars) {
3778     Args.push_back(ImplicitParamDecl::Create(
3779         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3780         C.getPointerType(C.getPointerType(E->getType()))
3781             .withConst()
3782             .withRestrict(),
3783         ImplicitParamDecl::Other));
3784     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3785     PrivateVarsPos[VD] = Counter;
3786     ++Counter;
3787   }
3788   for (const Expr *E : Data.LastprivateVars) {
3789     Args.push_back(ImplicitParamDecl::Create(
3790         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3791         C.getPointerType(C.getPointerType(E->getType()))
3792             .withConst()
3793             .withRestrict(),
3794         ImplicitParamDecl::Other));
3795     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3796     PrivateVarsPos[VD] = Counter;
3797     ++Counter;
3798   }
3799   for (const VarDecl *VD : Data.PrivateLocals) {
3800     QualType Ty = VD->getType().getNonReferenceType();
3801     if (VD->getType()->isLValueReferenceType())
3802       Ty = C.getPointerType(Ty);
3803     if (isAllocatableDecl(VD))
3804       Ty = C.getPointerType(Ty);
3805     Args.push_back(ImplicitParamDecl::Create(
3806         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3807         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3808         ImplicitParamDecl::Other));
3809     PrivateVarsPos[VD] = Counter;
3810     ++Counter;
3811   }
3812   const auto &TaskPrivatesMapFnInfo =
3813       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3814   llvm::FunctionType *TaskPrivatesMapTy =
3815       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3816   std::string Name =
3817       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3818   auto *TaskPrivatesMap = llvm::Function::Create(
3819       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3820       &CGM.getModule());
3821   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3822                                     TaskPrivatesMapFnInfo);
3823   if (CGM.getLangOpts().Optimize) {
3824     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3825     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3826     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3827   }
3828   CodeGenFunction CGF(CGM);
3829   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3830                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3831 
3832   // *privi = &.privates.privi;
3833   LValue Base = CGF.EmitLoadOfPointerLValue(
3834       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3835       TaskPrivatesArg.getType()->castAs<PointerType>());
3836   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3837   Counter = 0;
3838   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3839     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3840     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3841     LValue RefLVal =
3842         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3843     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3844         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3845     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3846     ++Counter;
3847   }
3848   CGF.FinishFunction();
3849   return TaskPrivatesMap;
3850 }
3851 
3852 /// Emit initialization for private variables in task-based directives.
3853 static void emitPrivatesInit(CodeGenFunction &CGF,
3854                              const OMPExecutableDirective &D,
3855                              Address KmpTaskSharedsPtr, LValue TDBase,
3856                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3857                              QualType SharedsTy, QualType SharedsPtrTy,
3858                              const OMPTaskDataTy &Data,
3859                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3860   ASTContext &C = CGF.getContext();
3861   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3862   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3863   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3864                                  ? OMPD_taskloop
3865                                  : OMPD_task;
3866   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3867   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3868   LValue SrcBase;
3869   bool IsTargetTask =
3870       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3871       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3872   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3873   // PointersArray, SizesArray, and MappersArray. The original variables for
3874   // these arrays are not captured and we get their addresses explicitly.
3875   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3876       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3877     SrcBase = CGF.MakeAddrLValue(
3878         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3879             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3880             CGF.ConvertTypeForMem(SharedsTy)),
3881         SharedsTy);
3882   }
3883   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3884   for (const PrivateDataTy &Pair : Privates) {
3885     // Do not initialize private locals.
3886     if (Pair.second.isLocalPrivate()) {
3887       ++FI;
3888       continue;
3889     }
3890     const VarDecl *VD = Pair.second.PrivateCopy;
3891     const Expr *Init = VD->getAnyInitializer();
3892     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3893                              !CGF.isTrivialInitializer(Init)))) {
3894       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3895       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3896         const VarDecl *OriginalVD = Pair.second.Original;
3897         // Check if the variable is the target-based BasePointersArray,
3898         // PointersArray, SizesArray, or MappersArray.
3899         LValue SharedRefLValue;
3900         QualType Type = PrivateLValue.getType();
3901         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3902         if (IsTargetTask && !SharedField) {
3903           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3904                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3905                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3906                          ->getNumParams() == 0 &&
3907                  isa<TranslationUnitDecl>(
3908                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3909                          ->getDeclContext()) &&
3910                  "Expected artificial target data variable.");
3911           SharedRefLValue =
3912               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3913         } else if (ForDup) {
3914           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3915           SharedRefLValue = CGF.MakeAddrLValue(
3916               SharedRefLValue.getAddress(CGF).withAlignment(
3917                   C.getDeclAlign(OriginalVD)),
3918               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3919               SharedRefLValue.getTBAAInfo());
3920         } else if (CGF.LambdaCaptureFields.count(
3921                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3922                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3923           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3924         } else {
3925           // Processing for implicitly captured variables.
3926           InlinedOpenMPRegionRAII Region(
3927               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3928               /*HasCancel=*/false, /*NoInheritance=*/true);
3929           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3930         }
3931         if (Type->isArrayType()) {
3932           // Initialize firstprivate array.
3933           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3934             // Perform simple memcpy.
3935             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3936           } else {
3937             // Initialize firstprivate array using element-by-element
3938             // initialization.
3939             CGF.EmitOMPAggregateAssign(
3940                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3941                 Type,
3942                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3943                                                   Address SrcElement) {
3944                   // Clean up any temporaries needed by the initialization.
3945                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3946                   InitScope.addPrivate(Elem, SrcElement);
3947                   (void)InitScope.Privatize();
3948                   // Emit initialization for single element.
3949                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3950                       CGF, &CapturesInfo);
3951                   CGF.EmitAnyExprToMem(Init, DestElement,
3952                                        Init->getType().getQualifiers(),
3953                                        /*IsInitializer=*/false);
3954                 });
3955           }
3956         } else {
3957           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3958           InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3959           (void)InitScope.Privatize();
3960           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3961           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3962                              /*capturedByInit=*/false);
3963         }
3964       } else {
3965         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3966       }
3967     }
3968     ++FI;
3969   }
3970 }
3971 
3972 /// Check if duplication function is required for taskloops.
3973 static bool checkInitIsRequired(CodeGenFunction &CGF,
3974                                 ArrayRef<PrivateDataTy> Privates) {
3975   bool InitRequired = false;
3976   for (const PrivateDataTy &Pair : Privates) {
3977     if (Pair.second.isLocalPrivate())
3978       continue;
3979     const VarDecl *VD = Pair.second.PrivateCopy;
3980     const Expr *Init = VD->getAnyInitializer();
3981     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3982                                     !CGF.isTrivialInitializer(Init));
3983     if (InitRequired)
3984       break;
3985   }
3986   return InitRequired;
3987 }
3988 
3989 
3990 /// Emit task_dup function (for initialization of
3991 /// private/firstprivate/lastprivate vars and last_iter flag)
3992 /// \code
3993 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3994 /// lastpriv) {
3995 /// // setup lastprivate flag
3996 ///    task_dst->last = lastpriv;
3997 /// // could be constructor calls here...
3998 /// }
3999 /// \endcode
4000 static llvm::Value *
4001 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4002                     const OMPExecutableDirective &D,
4003                     QualType KmpTaskTWithPrivatesPtrQTy,
4004                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4005                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4006                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4007                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4008   ASTContext &C = CGM.getContext();
4009   FunctionArgList Args;
4010   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4011                            KmpTaskTWithPrivatesPtrQTy,
4012                            ImplicitParamDecl::Other);
4013   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4014                            KmpTaskTWithPrivatesPtrQTy,
4015                            ImplicitParamDecl::Other);
4016   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4017                                 ImplicitParamDecl::Other);
4018   Args.push_back(&DstArg);
4019   Args.push_back(&SrcArg);
4020   Args.push_back(&LastprivArg);
4021   const auto &TaskDupFnInfo =
4022       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4023   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4024   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4025   auto *TaskDup = llvm::Function::Create(
4026       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4027   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4028   TaskDup->setDoesNotRecurse();
4029   CodeGenFunction CGF(CGM);
4030   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4031                     Loc);
4032 
4033   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4034       CGF.GetAddrOfLocalVar(&DstArg),
4035       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4036   // task_dst->liter = lastpriv;
4037   if (WithLastIter) {
4038     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4039     LValue Base = CGF.EmitLValueForField(
4040         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4041     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4042     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4043         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4044     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4045   }
4046 
4047   // Emit initial values for private copies (if any).
4048   assert(!Privates.empty());
4049   Address KmpTaskSharedsPtr = Address::invalid();
4050   if (!Data.FirstprivateVars.empty()) {
4051     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4052         CGF.GetAddrOfLocalVar(&SrcArg),
4053         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4054     LValue Base = CGF.EmitLValueForField(
4055         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4056     KmpTaskSharedsPtr = Address(
4057         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4058                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4059                                                   KmpTaskTShareds)),
4060                              Loc),
4061         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4062   }
4063   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4064                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4065   CGF.FinishFunction();
4066   return TaskDup;
4067 }
4068 
4069 /// Checks if destructor function is required to be generated.
4070 /// \return true if cleanups are required, false otherwise.
4071 static bool
4072 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4073                          ArrayRef<PrivateDataTy> Privates) {
4074   for (const PrivateDataTy &P : Privates) {
4075     if (P.second.isLocalPrivate())
4076       continue;
4077     QualType Ty = P.second.Original->getType().getNonReferenceType();
4078     if (Ty.isDestructedType())
4079       return true;
4080   }
4081   return false;
4082 }
4083 
4084 namespace {
4085 /// Loop generator for OpenMP iterator expression.
4086 class OMPIteratorGeneratorScope final
4087     : public CodeGenFunction::OMPPrivateScope {
4088   CodeGenFunction &CGF;
4089   const OMPIteratorExpr *E = nullptr;
4090   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4091   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4092   OMPIteratorGeneratorScope() = delete;
4093   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4094 
4095 public:
4096   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4097       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4098     if (!E)
4099       return;
4100     SmallVector<llvm::Value *, 4> Uppers;
4101     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4102       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4103       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4104       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4105       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4106       addPrivate(
4107           HelperData.CounterVD,
4108           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4109     }
4110     Privatize();
4111 
4112     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4113       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4114       LValue CLVal =
4115           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4116                              HelperData.CounterVD->getType());
4117       // Counter = 0;
4118       CGF.EmitStoreOfScalar(
4119           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4120           CLVal);
4121       CodeGenFunction::JumpDest &ContDest =
4122           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4123       CodeGenFunction::JumpDest &ExitDest =
4124           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4125       // N = <number-of_iterations>;
4126       llvm::Value *N = Uppers[I];
4127       // cont:
4128       // if (Counter < N) goto body; else goto exit;
4129       CGF.EmitBlock(ContDest.getBlock());
4130       auto *CVal =
4131           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4132       llvm::Value *Cmp =
4133           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4134               ? CGF.Builder.CreateICmpSLT(CVal, N)
4135               : CGF.Builder.CreateICmpULT(CVal, N);
4136       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4137       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4138       // body:
4139       CGF.EmitBlock(BodyBB);
4140       // Iteri = Begini + Counter * Stepi;
4141       CGF.EmitIgnoredExpr(HelperData.Update);
4142     }
4143   }
4144   ~OMPIteratorGeneratorScope() {
4145     if (!E)
4146       return;
4147     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4148       // Counter = Counter + 1;
4149       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4150       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4151       // goto cont;
4152       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4153       // exit:
4154       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4155     }
4156   }
4157 };
4158 } // namespace
4159 
4160 static std::pair<llvm::Value *, llvm::Value *>
4161 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4162   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4163   llvm::Value *Addr;
4164   if (OASE) {
4165     const Expr *Base = OASE->getBase();
4166     Addr = CGF.EmitScalarExpr(Base);
4167   } else {
4168     Addr = CGF.EmitLValue(E).getPointer(CGF);
4169   }
4170   llvm::Value *SizeVal;
4171   QualType Ty = E->getType();
4172   if (OASE) {
4173     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4174     for (const Expr *SE : OASE->getDimensions()) {
4175       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4176       Sz = CGF.EmitScalarConversion(
4177           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4178       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4179     }
4180   } else if (const auto *ASE =
4181                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4182     LValue UpAddrLVal =
4183         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4184     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4185     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4186         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4187     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4188     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4189     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4190   } else {
4191     SizeVal = CGF.getTypeSize(Ty);
4192   }
4193   return std::make_pair(Addr, SizeVal);
4194 }
4195 
4196 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4197 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4198   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4199   if (KmpTaskAffinityInfoTy.isNull()) {
4200     RecordDecl *KmpAffinityInfoRD =
4201         C.buildImplicitRecord("kmp_task_affinity_info_t");
4202     KmpAffinityInfoRD->startDefinition();
4203     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4206     KmpAffinityInfoRD->completeDefinition();
4207     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4208   }
4209 }
4210 
4211 CGOpenMPRuntime::TaskResultTy
4212 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4213                               const OMPExecutableDirective &D,
4214                               llvm::Function *TaskFunction, QualType SharedsTy,
4215                               Address Shareds, const OMPTaskDataTy &Data) {
4216   ASTContext &C = CGM.getContext();
4217   llvm::SmallVector<PrivateDataTy, 4> Privates;
4218   // Aggregate privates and sort them by the alignment.
4219   const auto *I = Data.PrivateCopies.begin();
4220   for (const Expr *E : Data.PrivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   I = Data.FirstprivateCopies.begin();
4229   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4230   for (const Expr *E : Data.FirstprivateVars) {
4231     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4232     Privates.emplace_back(
4233         C.getDeclAlign(VD),
4234         PrivateHelpersTy(
4235             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4236             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4237     ++I;
4238     ++IElemInitRef;
4239   }
4240   I = Data.LastprivateCopies.begin();
4241   for (const Expr *E : Data.LastprivateVars) {
4242     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4243     Privates.emplace_back(
4244         C.getDeclAlign(VD),
4245         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4246                          /*PrivateElemInit=*/nullptr));
4247     ++I;
4248   }
4249   for (const VarDecl *VD : Data.PrivateLocals) {
4250     if (isAllocatableDecl(VD))
4251       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4252     else
4253       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4254   }
4255   llvm::stable_sort(Privates,
4256                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4257                       return L.first > R.first;
4258                     });
4259   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4260   // Build type kmp_routine_entry_t (if not built yet).
4261   emitKmpRoutineEntryT(KmpInt32Ty);
4262   // Build type kmp_task_t (if not built yet).
4263   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4264     if (SavedKmpTaskloopTQTy.isNull()) {
4265       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4266           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4267     }
4268     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4269   } else {
4270     assert((D.getDirectiveKind() == OMPD_task ||
4271             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4272             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4273            "Expected taskloop, task or target directive");
4274     if (SavedKmpTaskTQTy.isNull()) {
4275       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4276           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4277     }
4278     KmpTaskTQTy = SavedKmpTaskTQTy;
4279   }
4280   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4281   // Build particular struct kmp_task_t for the given task.
4282   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4283       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4284   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4285   QualType KmpTaskTWithPrivatesPtrQTy =
4286       C.getPointerType(KmpTaskTWithPrivatesQTy);
4287   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4289       KmpTaskTWithPrivatesTy->getPointerTo();
4290   llvm::Value *KmpTaskTWithPrivatesTySize =
4291       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4292   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4293 
4294   // Emit initial values for private copies (if any).
4295   llvm::Value *TaskPrivatesMap = nullptr;
4296   llvm::Type *TaskPrivatesMapTy =
4297       std::next(TaskFunction->arg_begin(), 3)->getType();
4298   if (!Privates.empty()) {
4299     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4300     TaskPrivatesMap =
4301         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4302     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4303         TaskPrivatesMap, TaskPrivatesMapTy);
4304   } else {
4305     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4306         cast<llvm::PointerType>(TaskPrivatesMapTy));
4307   }
4308   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4309   // kmp_task_t *tt);
4310   llvm::Function *TaskEntry = emitProxyTaskFunction(
4311       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4312       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4313       TaskPrivatesMap);
4314 
4315   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4316   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4317   // kmp_routine_entry_t *task_entry);
4318   // Task flags. Format is taken from
4319   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4320   // description of kmp_tasking_flags struct.
4321   enum {
4322     TiedFlag = 0x1,
4323     FinalFlag = 0x2,
4324     DestructorsFlag = 0x8,
4325     PriorityFlag = 0x20,
4326     DetachableFlag = 0x40,
4327   };
4328   unsigned Flags = Data.Tied ? TiedFlag : 0;
4329   bool NeedsCleanup = false;
4330   if (!Privates.empty()) {
4331     NeedsCleanup =
4332         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4333     if (NeedsCleanup)
4334       Flags = Flags | DestructorsFlag;
4335   }
4336   if (Data.Priority.getInt())
4337     Flags = Flags | PriorityFlag;
4338   if (D.hasClausesOfKind<OMPDetachClause>())
4339     Flags = Flags | DetachableFlag;
4340   llvm::Value *TaskFlags =
4341       Data.Final.getPointer()
4342           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4343                                      CGF.Builder.getInt32(FinalFlag),
4344                                      CGF.Builder.getInt32(/*C=*/0))
4345           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4346   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4347   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4348   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4349       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4350       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4351           TaskEntry, KmpRoutineEntryPtrTy)};
4352   llvm::Value *NewTask;
4353   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4354     // Check if we have any device clause associated with the directive.
4355     const Expr *Device = nullptr;
4356     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4357       Device = C->getDevice();
4358     // Emit device ID if any otherwise use default value.
4359     llvm::Value *DeviceID;
4360     if (Device)
4361       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4362                                            CGF.Int64Ty, /*isSigned=*/true);
4363     else
4364       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4365     AllocArgs.push_back(DeviceID);
4366     NewTask = CGF.EmitRuntimeCall(
4367         OMPBuilder.getOrCreateRuntimeFunction(
4368             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4369         AllocArgs);
4370   } else {
4371     NewTask =
4372         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4373                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4374                             AllocArgs);
4375   }
4376   // Emit detach clause initialization.
4377   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4378   // task_descriptor);
4379   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4380     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4381     LValue EvtLVal = CGF.EmitLValue(Evt);
4382 
4383     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4384     // int gtid, kmp_task_t *task);
4385     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4386     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4387     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4388     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4389         OMPBuilder.getOrCreateRuntimeFunction(
4390             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4391         {Loc, Tid, NewTask});
4392     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4393                                       Evt->getExprLoc());
4394     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4395   }
4396   // Process affinity clauses.
4397   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4398     // Process list of affinity data.
4399     ASTContext &C = CGM.getContext();
4400     Address AffinitiesArray = Address::invalid();
4401     // Calculate number of elements to form the array of affinity data.
4402     llvm::Value *NumOfElements = nullptr;
4403     unsigned NumAffinities = 0;
4404     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4405       if (const Expr *Modifier = C->getModifier()) {
4406         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4407         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4408           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4409           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4410           NumOfElements =
4411               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4412         }
4413       } else {
4414         NumAffinities += C->varlist_size();
4415       }
4416     }
4417     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4418     // Fields ids in kmp_task_affinity_info record.
4419     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4420 
4421     QualType KmpTaskAffinityInfoArrayTy;
4422     if (NumOfElements) {
4423       NumOfElements = CGF.Builder.CreateNUWAdd(
4424           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4425       auto *OVE = new (C) OpaqueValueExpr(
4426           Loc,
4427           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4428           VK_PRValue);
4429       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4430                                                     RValue::get(NumOfElements));
4431       KmpTaskAffinityInfoArrayTy =
4432           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4433                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4434       // Properly emit variable-sized array.
4435       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4436                                            ImplicitParamDecl::Other);
4437       CGF.EmitVarDecl(*PD);
4438       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4439       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4440                                                 /*isSigned=*/false);
4441     } else {
4442       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4443           KmpTaskAffinityInfoTy,
4444           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4445           ArrayType::Normal, /*IndexTypeQuals=*/0);
4446       AffinitiesArray =
4447           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4448       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4449       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4450                                              /*isSigned=*/false);
4451     }
4452 
4453     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4454     // Fill array by elements without iterators.
4455     unsigned Pos = 0;
4456     bool HasIterator = false;
4457     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4458       if (C->getModifier()) {
4459         HasIterator = true;
4460         continue;
4461       }
4462       for (const Expr *E : C->varlists()) {
4463         llvm::Value *Addr;
4464         llvm::Value *Size;
4465         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4466         LValue Base =
4467             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4468                                KmpTaskAffinityInfoTy);
4469         // affs[i].base_addr = &<Affinities[i].second>;
4470         LValue BaseAddrLVal = CGF.EmitLValueForField(
4471             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4472         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4473                               BaseAddrLVal);
4474         // affs[i].len = sizeof(<Affinities[i].second>);
4475         LValue LenLVal = CGF.EmitLValueForField(
4476             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4477         CGF.EmitStoreOfScalar(Size, LenLVal);
4478         ++Pos;
4479       }
4480     }
4481     LValue PosLVal;
4482     if (HasIterator) {
4483       PosLVal = CGF.MakeAddrLValue(
4484           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4485           C.getSizeType());
4486       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4487     }
4488     // Process elements with iterators.
4489     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4490       const Expr *Modifier = C->getModifier();
4491       if (!Modifier)
4492         continue;
4493       OMPIteratorGeneratorScope IteratorScope(
4494           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4495       for (const Expr *E : C->varlists()) {
4496         llvm::Value *Addr;
4497         llvm::Value *Size;
4498         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4499         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4500         LValue Base = CGF.MakeAddrLValue(
4501             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4502         // affs[i].base_addr = &<Affinities[i].second>;
4503         LValue BaseAddrLVal = CGF.EmitLValueForField(
4504             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4505         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4506                               BaseAddrLVal);
4507         // affs[i].len = sizeof(<Affinities[i].second>);
4508         LValue LenLVal = CGF.EmitLValueForField(
4509             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4510         CGF.EmitStoreOfScalar(Size, LenLVal);
4511         Idx = CGF.Builder.CreateNUWAdd(
4512             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4513         CGF.EmitStoreOfScalar(Idx, PosLVal);
4514       }
4515     }
4516     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4517     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4518     // naffins, kmp_task_affinity_info_t *affin_list);
4519     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4520     llvm::Value *GTid = getThreadID(CGF, Loc);
4521     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4522         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4523     // FIXME: Emit the function and ignore its result for now unless the
4524     // runtime function is properly implemented.
4525     (void)CGF.EmitRuntimeCall(
4526         OMPBuilder.getOrCreateRuntimeFunction(
4527             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4528         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4529   }
4530   llvm::Value *NewTaskNewTaskTTy =
4531       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4532           NewTask, KmpTaskTWithPrivatesPtrTy);
4533   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4534                                                KmpTaskTWithPrivatesQTy);
4535   LValue TDBase =
4536       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4537   // Fill the data in the resulting kmp_task_t record.
4538   // Copy shareds if there are any.
4539   Address KmpTaskSharedsPtr = Address::invalid();
4540   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4541     KmpTaskSharedsPtr = Address(
4542         CGF.EmitLoadOfScalar(
4543             CGF.EmitLValueForField(
4544                 TDBase,
4545                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4546             Loc),
4547         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4548     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4549     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4550     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4551   }
4552   // Emit initial values for private copies (if any).
4553   TaskResultTy Result;
4554   if (!Privates.empty()) {
4555     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4556                      SharedsTy, SharedsPtrTy, Data, Privates,
4557                      /*ForDup=*/false);
4558     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4559         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4560       Result.TaskDupFn = emitTaskDupFunction(
4561           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4562           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4563           /*WithLastIter=*/!Data.LastprivateVars.empty());
4564     }
4565   }
4566   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4567   enum { Priority = 0, Destructors = 1 };
4568   // Provide pointer to function with destructors for privates.
4569   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4570   const RecordDecl *KmpCmplrdataUD =
4571       (*FI)->getType()->getAsUnionType()->getDecl();
4572   if (NeedsCleanup) {
4573     llvm::Value *DestructorFn = emitDestructorsFunction(
4574         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4575         KmpTaskTWithPrivatesQTy);
4576     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4577     LValue DestructorsLV = CGF.EmitLValueForField(
4578         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4579     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4580                               DestructorFn, KmpRoutineEntryPtrTy),
4581                           DestructorsLV);
4582   }
4583   // Set priority.
4584   if (Data.Priority.getInt()) {
4585     LValue Data2LV = CGF.EmitLValueForField(
4586         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4587     LValue PriorityLV = CGF.EmitLValueForField(
4588         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4589     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4590   }
4591   Result.NewTask = NewTask;
4592   Result.TaskEntry = TaskEntry;
4593   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4594   Result.TDBase = TDBase;
4595   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4596   return Result;
4597 }
4598 
4599 namespace {
4600 /// Dependence kind for RTL.
4601 enum RTLDependenceKindTy {
4602   DepIn = 0x01,
4603   DepInOut = 0x3,
4604   DepMutexInOutSet = 0x4,
4605   DepInOutSet = 0x8
4606 };
4607 /// Fields ids in kmp_depend_info record.
4608 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4609 } // namespace
4610 
4611 /// Translates internal dependency kind into the runtime kind.
4612 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4613   RTLDependenceKindTy DepKind;
4614   switch (K) {
4615   case OMPC_DEPEND_in:
4616     DepKind = DepIn;
4617     break;
4618   // Out and InOut dependencies must use the same code.
4619   case OMPC_DEPEND_out:
4620   case OMPC_DEPEND_inout:
4621     DepKind = DepInOut;
4622     break;
4623   case OMPC_DEPEND_mutexinoutset:
4624     DepKind = DepMutexInOutSet;
4625     break;
4626   case OMPC_DEPEND_inoutset:
4627     DepKind = DepInOutSet;
4628     break;
4629   case OMPC_DEPEND_source:
4630   case OMPC_DEPEND_sink:
4631   case OMPC_DEPEND_depobj:
4632   case OMPC_DEPEND_unknown:
4633     llvm_unreachable("Unknown task dependence type");
4634   }
4635   return DepKind;
4636 }
4637 
4638 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4639 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4640                            QualType &FlagsTy) {
4641   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4642   if (KmpDependInfoTy.isNull()) {
4643     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4644     KmpDependInfoRD->startDefinition();
4645     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4646     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4647     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4648     KmpDependInfoRD->completeDefinition();
4649     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4650   }
4651 }
4652 
4653 std::pair<llvm::Value *, LValue>
4654 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4655                                    SourceLocation Loc) {
4656   ASTContext &C = CGM.getContext();
4657   QualType FlagsTy;
4658   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4659   RecordDecl *KmpDependInfoRD =
4660       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4661   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4662   LValue Base = CGF.EmitLoadOfPointerLValue(
4663       CGF.Builder.CreateElementBitCast(
4664           DepobjLVal.getAddress(CGF),
4665           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4666       KmpDependInfoPtrTy->castAs<PointerType>());
4667   Address DepObjAddr = CGF.Builder.CreateGEP(
4668       Base.getAddress(CGF),
4669       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4670   LValue NumDepsBase = CGF.MakeAddrLValue(
4671       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4672   // NumDeps = deps[i].base_addr;
4673   LValue BaseAddrLVal = CGF.EmitLValueForField(
4674       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4675   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4676   return std::make_pair(NumDeps, Base);
4677 }
4678 
4679 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4680                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4681                            const OMPTaskDataTy::DependData &Data,
4682                            Address DependenciesArray) {
4683   CodeGenModule &CGM = CGF.CGM;
4684   ASTContext &C = CGM.getContext();
4685   QualType FlagsTy;
4686   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4687   RecordDecl *KmpDependInfoRD =
4688       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4689   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4690 
4691   OMPIteratorGeneratorScope IteratorScope(
4692       CGF, cast_or_null<OMPIteratorExpr>(
4693                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4694                                  : nullptr));
4695   for (const Expr *E : Data.DepExprs) {
4696     llvm::Value *Addr;
4697     llvm::Value *Size;
4698     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4699     LValue Base;
4700     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4701       Base = CGF.MakeAddrLValue(
4702           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4703     } else {
4704       LValue &PosLVal = *Pos.get<LValue *>();
4705       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4706       Base = CGF.MakeAddrLValue(
4707           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4708     }
4709     // deps[i].base_addr = &<Dependencies[i].second>;
4710     LValue BaseAddrLVal = CGF.EmitLValueForField(
4711         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4712     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4713                           BaseAddrLVal);
4714     // deps[i].len = sizeof(<Dependencies[i].second>);
4715     LValue LenLVal = CGF.EmitLValueForField(
4716         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4717     CGF.EmitStoreOfScalar(Size, LenLVal);
4718     // deps[i].flags = <Dependencies[i].first>;
4719     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4720     LValue FlagsLVal = CGF.EmitLValueForField(
4721         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4722     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4723                           FlagsLVal);
4724     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4725       ++(*P);
4726     } else {
4727       LValue &PosLVal = *Pos.get<LValue *>();
4728       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4729       Idx = CGF.Builder.CreateNUWAdd(Idx,
4730                                      llvm::ConstantInt::get(Idx->getType(), 1));
4731       CGF.EmitStoreOfScalar(Idx, PosLVal);
4732     }
4733   }
4734 }
4735 
4736 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4737     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4738     const OMPTaskDataTy::DependData &Data) {
4739   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4740          "Expected depobj dependecy kind.");
4741   SmallVector<llvm::Value *, 4> Sizes;
4742   SmallVector<LValue, 4> SizeLVals;
4743   ASTContext &C = CGF.getContext();
4744   {
4745     OMPIteratorGeneratorScope IteratorScope(
4746         CGF, cast_or_null<OMPIteratorExpr>(
4747                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4748                                    : nullptr));
4749     for (const Expr *E : Data.DepExprs) {
4750       llvm::Value *NumDeps;
4751       LValue Base;
4752       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4753       std::tie(NumDeps, Base) =
4754           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4755       LValue NumLVal = CGF.MakeAddrLValue(
4756           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4757           C.getUIntPtrType());
4758       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4759                               NumLVal.getAddress(CGF));
4760       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4761       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4762       CGF.EmitStoreOfScalar(Add, NumLVal);
4763       SizeLVals.push_back(NumLVal);
4764     }
4765   }
4766   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4767     llvm::Value *Size =
4768         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4769     Sizes.push_back(Size);
4770   }
4771   return Sizes;
4772 }
4773 
4774 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4775                                          QualType &KmpDependInfoTy,
4776                                          LValue PosLVal,
4777                                          const OMPTaskDataTy::DependData &Data,
4778                                          Address DependenciesArray) {
4779   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4780          "Expected depobj dependecy kind.");
4781   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4782   {
4783     OMPIteratorGeneratorScope IteratorScope(
4784         CGF, cast_or_null<OMPIteratorExpr>(
4785                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4786                                    : nullptr));
4787     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4788       const Expr *E = Data.DepExprs[I];
4789       llvm::Value *NumDeps;
4790       LValue Base;
4791       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4792       std::tie(NumDeps, Base) =
4793           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4794 
4795       // memcopy dependency data.
4796       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4797           ElSize,
4798           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4799       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4800       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4801       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4802 
4803       // Increase pos.
4804       // pos += size;
4805       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4806       CGF.EmitStoreOfScalar(Add, PosLVal);
4807     }
4808   }
4809 }
4810 
4811 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4812     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4813     SourceLocation Loc) {
4814   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4815         return D.DepExprs.empty();
4816       }))
4817     return std::make_pair(nullptr, Address::invalid());
4818   // Process list of dependencies.
4819   ASTContext &C = CGM.getContext();
4820   Address DependenciesArray = Address::invalid();
4821   llvm::Value *NumOfElements = nullptr;
4822   unsigned NumDependencies = std::accumulate(
4823       Dependencies.begin(), Dependencies.end(), 0,
4824       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4825         return D.DepKind == OMPC_DEPEND_depobj
4826                    ? V
4827                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4828       });
4829   QualType FlagsTy;
4830   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4831   bool HasDepobjDeps = false;
4832   bool HasRegularWithIterators = false;
4833   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4834   llvm::Value *NumOfRegularWithIterators =
4835       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4836   // Calculate number of depobj dependecies and regular deps with the iterators.
4837   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4838     if (D.DepKind == OMPC_DEPEND_depobj) {
4839       SmallVector<llvm::Value *, 4> Sizes =
4840           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4841       for (llvm::Value *Size : Sizes) {
4842         NumOfDepobjElements =
4843             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4844       }
4845       HasDepobjDeps = true;
4846       continue;
4847     }
4848     // Include number of iterations, if any.
4849 
4850     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4851       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4852         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4853         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4854         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4855             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4856         NumOfRegularWithIterators =
4857             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4858       }
4859       HasRegularWithIterators = true;
4860       continue;
4861     }
4862   }
4863 
4864   QualType KmpDependInfoArrayTy;
4865   if (HasDepobjDeps || HasRegularWithIterators) {
4866     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4867                                            /*isSigned=*/false);
4868     if (HasDepobjDeps) {
4869       NumOfElements =
4870           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4871     }
4872     if (HasRegularWithIterators) {
4873       NumOfElements =
4874           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4875     }
4876     auto *OVE = new (C) OpaqueValueExpr(
4877         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4878         VK_PRValue);
4879     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4880                                                   RValue::get(NumOfElements));
4881     KmpDependInfoArrayTy =
4882         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4883                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4884     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4885     // Properly emit variable-sized array.
4886     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4887                                          ImplicitParamDecl::Other);
4888     CGF.EmitVarDecl(*PD);
4889     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4890     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4891                                               /*isSigned=*/false);
4892   } else {
4893     KmpDependInfoArrayTy = C.getConstantArrayType(
4894         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4895         ArrayType::Normal, /*IndexTypeQuals=*/0);
4896     DependenciesArray =
4897         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4898     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4899     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4900                                            /*isSigned=*/false);
4901   }
4902   unsigned Pos = 0;
4903   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4904     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4905         Dependencies[I].IteratorExpr)
4906       continue;
4907     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4908                    DependenciesArray);
4909   }
4910   // Copy regular dependecies with iterators.
4911   LValue PosLVal = CGF.MakeAddrLValue(
4912       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4913   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4914   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4915     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4916         !Dependencies[I].IteratorExpr)
4917       continue;
4918     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4919                    DependenciesArray);
4920   }
4921   // Copy final depobj arrays without iterators.
4922   if (HasDepobjDeps) {
4923     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4924       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4925         continue;
4926       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4927                          DependenciesArray);
4928     }
4929   }
4930   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4931       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4932   return std::make_pair(NumOfElements, DependenciesArray);
4933 }
4934 
4935 Address CGOpenMPRuntime::emitDepobjDependClause(
4936     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4937     SourceLocation Loc) {
4938   if (Dependencies.DepExprs.empty())
4939     return Address::invalid();
4940   // Process list of dependencies.
4941   ASTContext &C = CGM.getContext();
4942   Address DependenciesArray = Address::invalid();
4943   unsigned NumDependencies = Dependencies.DepExprs.size();
4944   QualType FlagsTy;
4945   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4946   RecordDecl *KmpDependInfoRD =
4947       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4948 
4949   llvm::Value *Size;
4950   // Define type kmp_depend_info[<Dependencies.size()>];
4951   // For depobj reserve one extra element to store the number of elements.
4952   // It is required to handle depobj(x) update(in) construct.
4953   // kmp_depend_info[<Dependencies.size()>] deps;
4954   llvm::Value *NumDepsVal;
4955   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4956   if (const auto *IE =
4957           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4958     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4959     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4960       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4961       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4962       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4963     }
4964     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4965                                     NumDepsVal);
4966     CharUnits SizeInBytes =
4967         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4968     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4969     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4970     NumDepsVal =
4971         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4972   } else {
4973     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4974         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4975         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4976     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4977     Size = CGM.getSize(Sz.alignTo(Align));
4978     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4979   }
4980   // Need to allocate on the dynamic memory.
4981   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4982   // Use default allocator.
4983   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4984   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4985 
4986   llvm::Value *Addr =
4987       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4988                               CGM.getModule(), OMPRTL___kmpc_alloc),
4989                           Args, ".dep.arr.addr");
4990   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4991   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4992       Addr, KmpDependInfoLlvmTy->getPointerTo());
4993   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4994   // Write number of elements in the first element of array for depobj.
4995   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4996   // deps[i].base_addr = NumDependencies;
4997   LValue BaseAddrLVal = CGF.EmitLValueForField(
4998       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4999   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5000   llvm::PointerUnion<unsigned *, LValue *> Pos;
5001   unsigned Idx = 1;
5002   LValue PosLVal;
5003   if (Dependencies.IteratorExpr) {
5004     PosLVal = CGF.MakeAddrLValue(
5005         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5006         C.getSizeType());
5007     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5008                           /*IsInit=*/true);
5009     Pos = &PosLVal;
5010   } else {
5011     Pos = &Idx;
5012   }
5013   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5014   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5015       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
5016       CGF.Int8Ty);
5017   return DependenciesArray;
5018 }
5019 
5020 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5021                                         SourceLocation Loc) {
5022   ASTContext &C = CGM.getContext();
5023   QualType FlagsTy;
5024   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5025   LValue Base = CGF.EmitLoadOfPointerLValue(
5026       DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
5027   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5028   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5029       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
5030       CGF.ConvertTypeForMem(KmpDependInfoTy));
5031   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5032       Addr.getElementType(), Addr.getPointer(),
5033       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5034   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5035                                                                CGF.VoidPtrTy);
5036   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5037   // Use default allocator.
5038   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5039   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5040 
5041   // _kmpc_free(gtid, addr, nullptr);
5042   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5043                                 CGM.getModule(), OMPRTL___kmpc_free),
5044                             Args);
5045 }
5046 
5047 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5048                                        OpenMPDependClauseKind NewDepKind,
5049                                        SourceLocation Loc) {
5050   ASTContext &C = CGM.getContext();
5051   QualType FlagsTy;
5052   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5053   RecordDecl *KmpDependInfoRD =
5054       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5055   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5056   llvm::Value *NumDeps;
5057   LValue Base;
5058   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5059 
5060   Address Begin = Base.getAddress(CGF);
5061   // Cast from pointer to array type to pointer to single element.
5062   llvm::Value *End = CGF.Builder.CreateGEP(
5063       Begin.getElementType(), Begin.getPointer(), NumDeps);
5064   // The basic structure here is a while-do loop.
5065   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5066   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5067   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5068   CGF.EmitBlock(BodyBB);
5069   llvm::PHINode *ElementPHI =
5070       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5071   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5072   Begin = Begin.withPointer(ElementPHI);
5073   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5074                             Base.getTBAAInfo());
5075   // deps[i].flags = NewDepKind;
5076   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5077   LValue FlagsLVal = CGF.EmitLValueForField(
5078       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5079   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5080                         FlagsLVal);
5081 
5082   // Shift the address forward by one element.
5083   Address ElementNext =
5084       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5085   ElementPHI->addIncoming(ElementNext.getPointer(),
5086                           CGF.Builder.GetInsertBlock());
5087   llvm::Value *IsEmpty =
5088       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5089   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5090   // Done.
5091   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5092 }
5093 
5094 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5095                                    const OMPExecutableDirective &D,
5096                                    llvm::Function *TaskFunction,
5097                                    QualType SharedsTy, Address Shareds,
5098                                    const Expr *IfCond,
5099                                    const OMPTaskDataTy &Data) {
5100   if (!CGF.HaveInsertPoint())
5101     return;
5102 
5103   TaskResultTy Result =
5104       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5105   llvm::Value *NewTask = Result.NewTask;
5106   llvm::Function *TaskEntry = Result.TaskEntry;
5107   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5108   LValue TDBase = Result.TDBase;
5109   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5110   // Process list of dependences.
5111   Address DependenciesArray = Address::invalid();
5112   llvm::Value *NumOfElements;
5113   std::tie(NumOfElements, DependenciesArray) =
5114       emitDependClause(CGF, Data.Dependences, Loc);
5115 
5116   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5117   // libcall.
5118   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5119   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5120   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5121   // list is not empty
5122   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5123   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5124   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5125   llvm::Value *DepTaskArgs[7];
5126   if (!Data.Dependences.empty()) {
5127     DepTaskArgs[0] = UpLoc;
5128     DepTaskArgs[1] = ThreadID;
5129     DepTaskArgs[2] = NewTask;
5130     DepTaskArgs[3] = NumOfElements;
5131     DepTaskArgs[4] = DependenciesArray.getPointer();
5132     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5133     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5134   }
5135   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5136                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5137     if (!Data.Tied) {
5138       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5139       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5140       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5141     }
5142     if (!Data.Dependences.empty()) {
5143       CGF.EmitRuntimeCall(
5144           OMPBuilder.getOrCreateRuntimeFunction(
5145               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5146           DepTaskArgs);
5147     } else {
5148       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5149                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5150                           TaskArgs);
5151     }
5152     // Check if parent region is untied and build return for untied task;
5153     if (auto *Region =
5154             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5155       Region->emitUntiedSwitch(CGF);
5156   };
5157 
5158   llvm::Value *DepWaitTaskArgs[6];
5159   if (!Data.Dependences.empty()) {
5160     DepWaitTaskArgs[0] = UpLoc;
5161     DepWaitTaskArgs[1] = ThreadID;
5162     DepWaitTaskArgs[2] = NumOfElements;
5163     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5164     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5165     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5166   }
5167   auto &M = CGM.getModule();
5168   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5169                         TaskEntry, &Data, &DepWaitTaskArgs,
5170                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5171     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5172     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5173     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5174     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5175     // is specified.
5176     if (!Data.Dependences.empty())
5177       CGF.EmitRuntimeCall(
5178           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5179           DepWaitTaskArgs);
5180     // Call proxy_task_entry(gtid, new_task);
5181     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5182                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5183       Action.Enter(CGF);
5184       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5185       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5186                                                           OutlinedFnArgs);
5187     };
5188 
5189     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5190     // kmp_task_t *new_task);
5191     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5192     // kmp_task_t *new_task);
5193     RegionCodeGenTy RCG(CodeGen);
5194     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5195                               M, OMPRTL___kmpc_omp_task_begin_if0),
5196                           TaskArgs,
5197                           OMPBuilder.getOrCreateRuntimeFunction(
5198                               M, OMPRTL___kmpc_omp_task_complete_if0),
5199                           TaskArgs);
5200     RCG.setAction(Action);
5201     RCG(CGF);
5202   };
5203 
5204   if (IfCond) {
5205     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5206   } else {
5207     RegionCodeGenTy ThenRCG(ThenCodeGen);
5208     ThenRCG(CGF);
5209   }
5210 }
5211 
5212 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5213                                        const OMPLoopDirective &D,
5214                                        llvm::Function *TaskFunction,
5215                                        QualType SharedsTy, Address Shareds,
5216                                        const Expr *IfCond,
5217                                        const OMPTaskDataTy &Data) {
5218   if (!CGF.HaveInsertPoint())
5219     return;
5220   TaskResultTy Result =
5221       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5222   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5223   // libcall.
5224   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5225   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5226   // sched, kmp_uint64 grainsize, void *task_dup);
5227   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5228   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5229   llvm::Value *IfVal;
5230   if (IfCond) {
5231     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5232                                       /*isSigned=*/true);
5233   } else {
5234     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5235   }
5236 
5237   LValue LBLVal = CGF.EmitLValueForField(
5238       Result.TDBase,
5239       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5240   const auto *LBVar =
5241       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5242   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5243                        LBLVal.getQuals(),
5244                        /*IsInitializer=*/true);
5245   LValue UBLVal = CGF.EmitLValueForField(
5246       Result.TDBase,
5247       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5248   const auto *UBVar =
5249       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5250   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5251                        UBLVal.getQuals(),
5252                        /*IsInitializer=*/true);
5253   LValue StLVal = CGF.EmitLValueForField(
5254       Result.TDBase,
5255       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5256   const auto *StVar =
5257       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5258   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5259                        StLVal.getQuals(),
5260                        /*IsInitializer=*/true);
5261   // Store reductions address.
5262   LValue RedLVal = CGF.EmitLValueForField(
5263       Result.TDBase,
5264       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5265   if (Data.Reductions) {
5266     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5267   } else {
5268     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5269                                CGF.getContext().VoidPtrTy);
5270   }
5271   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5272   llvm::Value *TaskArgs[] = {
5273       UpLoc,
5274       ThreadID,
5275       Result.NewTask,
5276       IfVal,
5277       LBLVal.getPointer(CGF),
5278       UBLVal.getPointer(CGF),
5279       CGF.EmitLoadOfScalar(StLVal, Loc),
5280       llvm::ConstantInt::getSigned(
5281           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5282       llvm::ConstantInt::getSigned(
5283           CGF.IntTy, Data.Schedule.getPointer()
5284                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5285                          : NoSchedule),
5286       Data.Schedule.getPointer()
5287           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5288                                       /*isSigned=*/false)
5289           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5290       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5291                              Result.TaskDupFn, CGF.VoidPtrTy)
5292                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5293   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5294                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5295                       TaskArgs);
5296 }
5297 
5298 /// Emit reduction operation for each element of array (required for
5299 /// array sections) LHS op = RHS.
5300 /// \param Type Type of array.
5301 /// \param LHSVar Variable on the left side of the reduction operation
5302 /// (references element of array in original variable).
5303 /// \param RHSVar Variable on the right side of the reduction operation
5304 /// (references element of array in original variable).
5305 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5306 /// RHSVar.
5307 static void EmitOMPAggregateReduction(
5308     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5309     const VarDecl *RHSVar,
5310     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5311                                   const Expr *, const Expr *)> &RedOpGen,
5312     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5313     const Expr *UpExpr = nullptr) {
5314   // Perform element-by-element initialization.
5315   QualType ElementTy;
5316   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5317   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5318 
5319   // Drill down to the base element type on both arrays.
5320   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5321   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5322 
5323   llvm::Value *RHSBegin = RHSAddr.getPointer();
5324   llvm::Value *LHSBegin = LHSAddr.getPointer();
5325   // Cast from pointer to array type to pointer to single element.
5326   llvm::Value *LHSEnd =
5327       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5328   // The basic structure here is a while-do loop.
5329   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5330   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5331   llvm::Value *IsEmpty =
5332       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5333   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5334 
5335   // Enter the loop body, making that address the current address.
5336   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5337   CGF.EmitBlock(BodyBB);
5338 
5339   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5340 
5341   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5342       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5343   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5344   Address RHSElementCurrent(
5345       RHSElementPHI, RHSAddr.getElementType(),
5346       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5347 
5348   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5349       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5350   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5351   Address LHSElementCurrent(
5352       LHSElementPHI, LHSAddr.getElementType(),
5353       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5354 
5355   // Emit copy.
5356   CodeGenFunction::OMPPrivateScope Scope(CGF);
5357   Scope.addPrivate(LHSVar, LHSElementCurrent);
5358   Scope.addPrivate(RHSVar, RHSElementCurrent);
5359   Scope.Privatize();
5360   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5361   Scope.ForceCleanup();
5362 
5363   // Shift the address forward by one element.
5364   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5365       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5366       "omp.arraycpy.dest.element");
5367   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5368       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5369       "omp.arraycpy.src.element");
5370   // Check whether we've reached the end.
5371   llvm::Value *Done =
5372       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5373   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5374   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5375   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5376 
5377   // Done.
5378   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5379 }
5380 
5381 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5382 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5383 /// UDR combiner function.
5384 static void emitReductionCombiner(CodeGenFunction &CGF,
5385                                   const Expr *ReductionOp) {
5386   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5387     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5388       if (const auto *DRE =
5389               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5390         if (const auto *DRD =
5391                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5392           std::pair<llvm::Function *, llvm::Function *> Reduction =
5393               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5394           RValue Func = RValue::get(Reduction.first);
5395           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5396           CGF.EmitIgnoredExpr(ReductionOp);
5397           return;
5398         }
5399   CGF.EmitIgnoredExpr(ReductionOp);
5400 }
5401 
5402 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5403     SourceLocation Loc, llvm::Type *ArgsElemType,
5404     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5405     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5406   ASTContext &C = CGM.getContext();
5407 
5408   // void reduction_func(void *LHSArg, void *RHSArg);
5409   FunctionArgList Args;
5410   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5411                            ImplicitParamDecl::Other);
5412   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5413                            ImplicitParamDecl::Other);
5414   Args.push_back(&LHSArg);
5415   Args.push_back(&RHSArg);
5416   const auto &CGFI =
5417       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5418   std::string Name = getName({"omp", "reduction", "reduction_func"});
5419   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5420                                     llvm::GlobalValue::InternalLinkage, Name,
5421                                     &CGM.getModule());
5422   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5423   Fn->setDoesNotRecurse();
5424   CodeGenFunction CGF(CGM);
5425   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5426 
5427   // Dst = (void*[n])(LHSArg);
5428   // Src = (void*[n])(RHSArg);
5429   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5430                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5431                   ArgsElemType->getPointerTo()),
5432               ArgsElemType, CGF.getPointerAlign());
5433   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5434                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5435                   ArgsElemType->getPointerTo()),
5436               ArgsElemType, CGF.getPointerAlign());
5437 
5438   //  ...
5439   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5440   //  ...
5441   CodeGenFunction::OMPPrivateScope Scope(CGF);
5442   const auto *IPriv = Privates.begin();
5443   unsigned Idx = 0;
5444   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5445     const auto *RHSVar =
5446         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5447     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5448     const auto *LHSVar =
5449         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5450     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5451     QualType PrivTy = (*IPriv)->getType();
5452     if (PrivTy->isVariablyModifiedType()) {
5453       // Get array size and emit VLA type.
5454       ++Idx;
5455       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5456       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5457       const VariableArrayType *VLA =
5458           CGF.getContext().getAsVariableArrayType(PrivTy);
5459       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5460       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5461           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5462       CGF.EmitVariablyModifiedType(PrivTy);
5463     }
5464   }
5465   Scope.Privatize();
5466   IPriv = Privates.begin();
5467   const auto *ILHS = LHSExprs.begin();
5468   const auto *IRHS = RHSExprs.begin();
5469   for (const Expr *E : ReductionOps) {
5470     if ((*IPriv)->getType()->isArrayType()) {
5471       // Emit reduction for array section.
5472       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5473       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5474       EmitOMPAggregateReduction(
5475           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5476           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5477             emitReductionCombiner(CGF, E);
5478           });
5479     } else {
5480       // Emit reduction for array subscript or single variable.
5481       emitReductionCombiner(CGF, E);
5482     }
5483     ++IPriv;
5484     ++ILHS;
5485     ++IRHS;
5486   }
5487   Scope.ForceCleanup();
5488   CGF.FinishFunction();
5489   return Fn;
5490 }
5491 
5492 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5493                                                   const Expr *ReductionOp,
5494                                                   const Expr *PrivateRef,
5495                                                   const DeclRefExpr *LHS,
5496                                                   const DeclRefExpr *RHS) {
5497   if (PrivateRef->getType()->isArrayType()) {
5498     // Emit reduction for array section.
5499     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5500     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5501     EmitOMPAggregateReduction(
5502         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5503         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5504           emitReductionCombiner(CGF, ReductionOp);
5505         });
5506   } else {
5507     // Emit reduction for array subscript or single variable.
5508     emitReductionCombiner(CGF, ReductionOp);
5509   }
5510 }
5511 
5512 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5513                                     ArrayRef<const Expr *> Privates,
5514                                     ArrayRef<const Expr *> LHSExprs,
5515                                     ArrayRef<const Expr *> RHSExprs,
5516                                     ArrayRef<const Expr *> ReductionOps,
5517                                     ReductionOptionsTy Options) {
5518   if (!CGF.HaveInsertPoint())
5519     return;
5520 
5521   bool WithNowait = Options.WithNowait;
5522   bool SimpleReduction = Options.SimpleReduction;
5523 
5524   // Next code should be emitted for reduction:
5525   //
5526   // static kmp_critical_name lock = { 0 };
5527   //
5528   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5529   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5530   //  ...
5531   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5532   //  *(Type<n>-1*)rhs[<n>-1]);
5533   // }
5534   //
5535   // ...
5536   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5537   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5538   // RedList, reduce_func, &<lock>)) {
5539   // case 1:
5540   //  ...
5541   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5542   //  ...
5543   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5544   // break;
5545   // case 2:
5546   //  ...
5547   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5548   //  ...
5549   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5550   // break;
5551   // default:;
5552   // }
5553   //
5554   // if SimpleReduction is true, only the next code is generated:
5555   //  ...
5556   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5557   //  ...
5558 
5559   ASTContext &C = CGM.getContext();
5560 
5561   if (SimpleReduction) {
5562     CodeGenFunction::RunCleanupsScope Scope(CGF);
5563     const auto *IPriv = Privates.begin();
5564     const auto *ILHS = LHSExprs.begin();
5565     const auto *IRHS = RHSExprs.begin();
5566     for (const Expr *E : ReductionOps) {
5567       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5568                                   cast<DeclRefExpr>(*IRHS));
5569       ++IPriv;
5570       ++ILHS;
5571       ++IRHS;
5572     }
5573     return;
5574   }
5575 
5576   // 1. Build a list of reduction variables.
5577   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5578   auto Size = RHSExprs.size();
5579   for (const Expr *E : Privates) {
5580     if (E->getType()->isVariablyModifiedType())
5581       // Reserve place for array size.
5582       ++Size;
5583   }
5584   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5585   QualType ReductionArrayTy =
5586       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5587                              /*IndexTypeQuals=*/0);
5588   Address ReductionList =
5589       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5590   const auto *IPriv = Privates.begin();
5591   unsigned Idx = 0;
5592   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5593     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5594     CGF.Builder.CreateStore(
5595         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5596             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5597         Elem);
5598     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5599       // Store array size.
5600       ++Idx;
5601       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5602       llvm::Value *Size = CGF.Builder.CreateIntCast(
5603           CGF.getVLASize(
5604                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5605               .NumElts,
5606           CGF.SizeTy, /*isSigned=*/false);
5607       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5608                               Elem);
5609     }
5610   }
5611 
5612   // 2. Emit reduce_func().
5613   llvm::Function *ReductionFn =
5614       emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5615                             Privates, LHSExprs, RHSExprs, ReductionOps);
5616 
5617   // 3. Create static kmp_critical_name lock = { 0 };
5618   std::string Name = getName({"reduction"});
5619   llvm::Value *Lock = getCriticalRegionLock(Name);
5620 
5621   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5622   // RedList, reduce_func, &<lock>);
5623   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5624   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5625   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5626   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5627       ReductionList.getPointer(), CGF.VoidPtrTy);
5628   llvm::Value *Args[] = {
5629       IdentTLoc,                             // ident_t *<loc>
5630       ThreadId,                              // i32 <gtid>
5631       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5632       ReductionArrayTySize,                  // size_type sizeof(RedList)
5633       RL,                                    // void *RedList
5634       ReductionFn, // void (*) (void *, void *) <reduce_func>
5635       Lock         // kmp_critical_name *&<lock>
5636   };
5637   llvm::Value *Res = CGF.EmitRuntimeCall(
5638       OMPBuilder.getOrCreateRuntimeFunction(
5639           CGM.getModule(),
5640           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5641       Args);
5642 
5643   // 5. Build switch(res)
5644   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5645   llvm::SwitchInst *SwInst =
5646       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5647 
5648   // 6. Build case 1:
5649   //  ...
5650   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5651   //  ...
5652   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5653   // break;
5654   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5655   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5656   CGF.EmitBlock(Case1BB);
5657 
5658   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5659   llvm::Value *EndArgs[] = {
5660       IdentTLoc, // ident_t *<loc>
5661       ThreadId,  // i32 <gtid>
5662       Lock       // kmp_critical_name *&<lock>
5663   };
5664   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5665                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5666     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5667     const auto *IPriv = Privates.begin();
5668     const auto *ILHS = LHSExprs.begin();
5669     const auto *IRHS = RHSExprs.begin();
5670     for (const Expr *E : ReductionOps) {
5671       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5672                                      cast<DeclRefExpr>(*IRHS));
5673       ++IPriv;
5674       ++ILHS;
5675       ++IRHS;
5676     }
5677   };
5678   RegionCodeGenTy RCG(CodeGen);
5679   CommonActionTy Action(
5680       nullptr, llvm::None,
5681       OMPBuilder.getOrCreateRuntimeFunction(
5682           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5683                                       : OMPRTL___kmpc_end_reduce),
5684       EndArgs);
5685   RCG.setAction(Action);
5686   RCG(CGF);
5687 
5688   CGF.EmitBranch(DefaultBB);
5689 
5690   // 7. Build case 2:
5691   //  ...
5692   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5693   //  ...
5694   // break;
5695   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5696   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5697   CGF.EmitBlock(Case2BB);
5698 
5699   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5700                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5701     const auto *ILHS = LHSExprs.begin();
5702     const auto *IRHS = RHSExprs.begin();
5703     const auto *IPriv = Privates.begin();
5704     for (const Expr *E : ReductionOps) {
5705       const Expr *XExpr = nullptr;
5706       const Expr *EExpr = nullptr;
5707       const Expr *UpExpr = nullptr;
5708       BinaryOperatorKind BO = BO_Comma;
5709       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5710         if (BO->getOpcode() == BO_Assign) {
5711           XExpr = BO->getLHS();
5712           UpExpr = BO->getRHS();
5713         }
5714       }
5715       // Try to emit update expression as a simple atomic.
5716       const Expr *RHSExpr = UpExpr;
5717       if (RHSExpr) {
5718         // Analyze RHS part of the whole expression.
5719         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5720                 RHSExpr->IgnoreParenImpCasts())) {
5721           // If this is a conditional operator, analyze its condition for
5722           // min/max reduction operator.
5723           RHSExpr = ACO->getCond();
5724         }
5725         if (const auto *BORHS =
5726                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5727           EExpr = BORHS->getRHS();
5728           BO = BORHS->getOpcode();
5729         }
5730       }
5731       if (XExpr) {
5732         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5733         auto &&AtomicRedGen = [BO, VD,
5734                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5735                                     const Expr *EExpr, const Expr *UpExpr) {
5736           LValue X = CGF.EmitLValue(XExpr);
5737           RValue E;
5738           if (EExpr)
5739             E = CGF.EmitAnyExpr(EExpr);
5740           CGF.EmitOMPAtomicSimpleUpdateExpr(
5741               X, E, BO, /*IsXLHSInRHSPart=*/true,
5742               llvm::AtomicOrdering::Monotonic, Loc,
5743               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5744                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5745                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5746                 CGF.emitOMPSimpleStore(
5747                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5748                     VD->getType().getNonReferenceType(), Loc);
5749                 PrivateScope.addPrivate(VD, LHSTemp);
5750                 (void)PrivateScope.Privatize();
5751                 return CGF.EmitAnyExpr(UpExpr);
5752               });
5753         };
5754         if ((*IPriv)->getType()->isArrayType()) {
5755           // Emit atomic reduction for array section.
5756           const auto *RHSVar =
5757               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5758           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5759                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5760         } else {
5761           // Emit atomic reduction for array subscript or single variable.
5762           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5763         }
5764       } else {
5765         // Emit as a critical region.
5766         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5767                                            const Expr *, const Expr *) {
5768           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5769           std::string Name = RT.getName({"atomic_reduction"});
5770           RT.emitCriticalRegion(
5771               CGF, Name,
5772               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5773                 Action.Enter(CGF);
5774                 emitReductionCombiner(CGF, E);
5775               },
5776               Loc);
5777         };
5778         if ((*IPriv)->getType()->isArrayType()) {
5779           const auto *LHSVar =
5780               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5781           const auto *RHSVar =
5782               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5783           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5784                                     CritRedGen);
5785         } else {
5786           CritRedGen(CGF, nullptr, nullptr, nullptr);
5787         }
5788       }
5789       ++ILHS;
5790       ++IRHS;
5791       ++IPriv;
5792     }
5793   };
5794   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5795   if (!WithNowait) {
5796     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5797     llvm::Value *EndArgs[] = {
5798         IdentTLoc, // ident_t *<loc>
5799         ThreadId,  // i32 <gtid>
5800         Lock       // kmp_critical_name *&<lock>
5801     };
5802     CommonActionTy Action(nullptr, llvm::None,
5803                           OMPBuilder.getOrCreateRuntimeFunction(
5804                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5805                           EndArgs);
5806     AtomicRCG.setAction(Action);
5807     AtomicRCG(CGF);
5808   } else {
5809     AtomicRCG(CGF);
5810   }
5811 
5812   CGF.EmitBranch(DefaultBB);
5813   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5814 }
5815 
5816 /// Generates unique name for artificial threadprivate variables.
5817 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5818 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5819                                       const Expr *Ref) {
5820   SmallString<256> Buffer;
5821   llvm::raw_svector_ostream Out(Buffer);
5822   const clang::DeclRefExpr *DE;
5823   const VarDecl *D = ::getBaseDecl(Ref, DE);
5824   if (!D)
5825     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5826   D = D->getCanonicalDecl();
5827   std::string Name = CGM.getOpenMPRuntime().getName(
5828       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5829   Out << Prefix << Name << "_"
5830       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5831   return std::string(Out.str());
5832 }
5833 
5834 /// Emits reduction initializer function:
5835 /// \code
5836 /// void @.red_init(void* %arg, void* %orig) {
5837 /// %0 = bitcast void* %arg to <type>*
5838 /// store <type> <init>, <type>* %0
5839 /// ret void
5840 /// }
5841 /// \endcode
5842 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5843                                            SourceLocation Loc,
5844                                            ReductionCodeGen &RCG, unsigned N) {
5845   ASTContext &C = CGM.getContext();
5846   QualType VoidPtrTy = C.VoidPtrTy;
5847   VoidPtrTy.addRestrict();
5848   FunctionArgList Args;
5849   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5850                           ImplicitParamDecl::Other);
5851   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5852                               ImplicitParamDecl::Other);
5853   Args.emplace_back(&Param);
5854   Args.emplace_back(&ParamOrig);
5855   const auto &FnInfo =
5856       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5857   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5858   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5859   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5860                                     Name, &CGM.getModule());
5861   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5862   Fn->setDoesNotRecurse();
5863   CodeGenFunction CGF(CGM);
5864   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5865   QualType PrivateType = RCG.getPrivateType(N);
5866   Address PrivateAddr = CGF.EmitLoadOfPointer(
5867       CGF.Builder.CreateElementBitCast(
5868           CGF.GetAddrOfLocalVar(&Param),
5869           CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5870       C.getPointerType(PrivateType)->castAs<PointerType>());
5871   llvm::Value *Size = nullptr;
5872   // If the size of the reduction item is non-constant, load it from global
5873   // threadprivate variable.
5874   if (RCG.getSizes(N).second) {
5875     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5876         CGF, CGM.getContext().getSizeType(),
5877         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5878     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5879                                 CGM.getContext().getSizeType(), Loc);
5880   }
5881   RCG.emitAggregateType(CGF, N, Size);
5882   Address OrigAddr = Address::invalid();
5883   // If initializer uses initializer from declare reduction construct, emit a
5884   // pointer to the address of the original reduction item (reuired by reduction
5885   // initializer)
5886   if (RCG.usesReductionInitializer(N)) {
5887     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5888     OrigAddr = CGF.EmitLoadOfPointer(
5889         SharedAddr,
5890         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5891   }
5892   // Emit the initializer:
5893   // %0 = bitcast void* %arg to <type>*
5894   // store <type> <init>, <type>* %0
5895   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5896                          [](CodeGenFunction &) { return false; });
5897   CGF.FinishFunction();
5898   return Fn;
5899 }
5900 
5901 /// Emits reduction combiner function:
5902 /// \code
5903 /// void @.red_comb(void* %arg0, void* %arg1) {
5904 /// %lhs = bitcast void* %arg0 to <type>*
5905 /// %rhs = bitcast void* %arg1 to <type>*
5906 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5907 /// store <type> %2, <type>* %lhs
5908 /// ret void
5909 /// }
5910 /// \endcode
5911 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5912                                            SourceLocation Loc,
5913                                            ReductionCodeGen &RCG, unsigned N,
5914                                            const Expr *ReductionOp,
5915                                            const Expr *LHS, const Expr *RHS,
5916                                            const Expr *PrivateRef) {
5917   ASTContext &C = CGM.getContext();
5918   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5919   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5920   FunctionArgList Args;
5921   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5922                                C.VoidPtrTy, ImplicitParamDecl::Other);
5923   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5924                             ImplicitParamDecl::Other);
5925   Args.emplace_back(&ParamInOut);
5926   Args.emplace_back(&ParamIn);
5927   const auto &FnInfo =
5928       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5929   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5930   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5931   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5932                                     Name, &CGM.getModule());
5933   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5934   Fn->setDoesNotRecurse();
5935   CodeGenFunction CGF(CGM);
5936   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5937   llvm::Value *Size = nullptr;
5938   // If the size of the reduction item is non-constant, load it from global
5939   // threadprivate variable.
5940   if (RCG.getSizes(N).second) {
5941     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5942         CGF, CGM.getContext().getSizeType(),
5943         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5944     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5945                                 CGM.getContext().getSizeType(), Loc);
5946   }
5947   RCG.emitAggregateType(CGF, N, Size);
5948   // Remap lhs and rhs variables to the addresses of the function arguments.
5949   // %lhs = bitcast void* %arg0 to <type>*
5950   // %rhs = bitcast void* %arg1 to <type>*
5951   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5952   PrivateScope.addPrivate(
5953       LHSVD,
5954       // Pull out the pointer to the variable.
5955       CGF.EmitLoadOfPointer(
5956           CGF.Builder.CreateElementBitCast(
5957               CGF.GetAddrOfLocalVar(&ParamInOut),
5958               CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5959           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5960   PrivateScope.addPrivate(
5961       RHSVD,
5962       // Pull out the pointer to the variable.
5963       CGF.EmitLoadOfPointer(
5964           CGF.Builder.CreateElementBitCast(
5965             CGF.GetAddrOfLocalVar(&ParamIn),
5966             CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5967           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5968   PrivateScope.Privatize();
5969   // Emit the combiner body:
5970   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5971   // store <type> %2, <type>* %lhs
5972   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5973       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5974       cast<DeclRefExpr>(RHS));
5975   CGF.FinishFunction();
5976   return Fn;
5977 }
5978 
5979 /// Emits reduction finalizer function:
5980 /// \code
5981 /// void @.red_fini(void* %arg) {
5982 /// %0 = bitcast void* %arg to <type>*
5983 /// <destroy>(<type>* %0)
5984 /// ret void
5985 /// }
5986 /// \endcode
5987 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5988                                            SourceLocation Loc,
5989                                            ReductionCodeGen &RCG, unsigned N) {
5990   if (!RCG.needCleanups(N))
5991     return nullptr;
5992   ASTContext &C = CGM.getContext();
5993   FunctionArgList Args;
5994   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5995                           ImplicitParamDecl::Other);
5996   Args.emplace_back(&Param);
5997   const auto &FnInfo =
5998       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5999   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6000   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6001   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6002                                     Name, &CGM.getModule());
6003   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6004   Fn->setDoesNotRecurse();
6005   CodeGenFunction CGF(CGM);
6006   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6007   Address PrivateAddr = CGF.EmitLoadOfPointer(
6008       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
6009   llvm::Value *Size = nullptr;
6010   // If the size of the reduction item is non-constant, load it from global
6011   // threadprivate variable.
6012   if (RCG.getSizes(N).second) {
6013     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6014         CGF, CGM.getContext().getSizeType(),
6015         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6016     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6017                                 CGM.getContext().getSizeType(), Loc);
6018   }
6019   RCG.emitAggregateType(CGF, N, Size);
6020   // Emit the finalizer body:
6021   // <destroy>(<type>* %0)
6022   RCG.emitCleanups(CGF, N, PrivateAddr);
6023   CGF.FinishFunction(Loc);
6024   return Fn;
6025 }
6026 
6027 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6028     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6029     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6030   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6031     return nullptr;
6032 
6033   // Build typedef struct:
6034   // kmp_taskred_input {
6035   //   void *reduce_shar; // shared reduction item
6036   //   void *reduce_orig; // original reduction item used for initialization
6037   //   size_t reduce_size; // size of data item
6038   //   void *reduce_init; // data initialization routine
6039   //   void *reduce_fini; // data finalization routine
6040   //   void *reduce_comb; // data combiner routine
6041   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6042   // } kmp_taskred_input_t;
6043   ASTContext &C = CGM.getContext();
6044   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6045   RD->startDefinition();
6046   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6047   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6048   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6049   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6050   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6051   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6052   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6053       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6054   RD->completeDefinition();
6055   QualType RDType = C.getRecordType(RD);
6056   unsigned Size = Data.ReductionVars.size();
6057   llvm::APInt ArraySize(/*numBits=*/64, Size);
6058   QualType ArrayRDType = C.getConstantArrayType(
6059       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6060   // kmp_task_red_input_t .rd_input.[Size];
6061   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6062   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6063                        Data.ReductionCopies, Data.ReductionOps);
6064   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6065     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6066     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6067                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6068     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6069         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6070         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6071         ".rd_input.gep.");
6072     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6073     // ElemLVal.reduce_shar = &Shareds[Cnt];
6074     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6075     RCG.emitSharedOrigLValue(CGF, Cnt);
6076     llvm::Value *CastedShared =
6077         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6078     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6079     // ElemLVal.reduce_orig = &Origs[Cnt];
6080     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6081     llvm::Value *CastedOrig =
6082         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6083     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6084     RCG.emitAggregateType(CGF, Cnt);
6085     llvm::Value *SizeValInChars;
6086     llvm::Value *SizeVal;
6087     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6088     // We use delayed creation/initialization for VLAs and array sections. It is
6089     // required because runtime does not provide the way to pass the sizes of
6090     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6091     // threadprivate global variables are used to store these values and use
6092     // them in the functions.
6093     bool DelayedCreation = !!SizeVal;
6094     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6095                                                /*isSigned=*/false);
6096     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6097     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6098     // ElemLVal.reduce_init = init;
6099     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6100     llvm::Value *InitAddr =
6101         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6102     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6103     // ElemLVal.reduce_fini = fini;
6104     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6105     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6106     llvm::Value *FiniAddr = Fini
6107                                 ? CGF.EmitCastToVoidPtr(Fini)
6108                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6109     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6110     // ElemLVal.reduce_comb = comb;
6111     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6112     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6113         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6114         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6115     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6116     // ElemLVal.flags = 0;
6117     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6118     if (DelayedCreation) {
6119       CGF.EmitStoreOfScalar(
6120           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6121           FlagsLVal);
6122     } else
6123       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6124                                  FlagsLVal.getType());
6125   }
6126   if (Data.IsReductionWithTaskMod) {
6127     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6128     // is_ws, int num, void *data);
6129     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6130     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6131                                                   CGM.IntTy, /*isSigned=*/true);
6132     llvm::Value *Args[] = {
6133         IdentTLoc, GTid,
6134         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6135                                /*isSigned=*/true),
6136         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6137         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6138             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6139     return CGF.EmitRuntimeCall(
6140         OMPBuilder.getOrCreateRuntimeFunction(
6141             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6142         Args);
6143   }
6144   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6145   llvm::Value *Args[] = {
6146       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6147                                 /*isSigned=*/true),
6148       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6149       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6150                                                       CGM.VoidPtrTy)};
6151   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6152                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6153                              Args);
6154 }
6155 
6156 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6157                                             SourceLocation Loc,
6158                                             bool IsWorksharingReduction) {
6159   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6160   // is_ws, int num, void *data);
6161   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6162   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6163                                                 CGM.IntTy, /*isSigned=*/true);
6164   llvm::Value *Args[] = {IdentTLoc, GTid,
6165                          llvm::ConstantInt::get(CGM.IntTy,
6166                                                 IsWorksharingReduction ? 1 : 0,
6167                                                 /*isSigned=*/true)};
6168   (void)CGF.EmitRuntimeCall(
6169       OMPBuilder.getOrCreateRuntimeFunction(
6170           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6171       Args);
6172 }
6173 
6174 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6175                                               SourceLocation Loc,
6176                                               ReductionCodeGen &RCG,
6177                                               unsigned N) {
6178   auto Sizes = RCG.getSizes(N);
6179   // Emit threadprivate global variable if the type is non-constant
6180   // (Sizes.second = nullptr).
6181   if (Sizes.second) {
6182     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6183                                                      /*isSigned=*/false);
6184     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6185         CGF, CGM.getContext().getSizeType(),
6186         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6187     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6188   }
6189 }
6190 
6191 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6192                                               SourceLocation Loc,
6193                                               llvm::Value *ReductionsPtr,
6194                                               LValue SharedLVal) {
6195   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6196   // *d);
6197   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6198                                                    CGM.IntTy,
6199                                                    /*isSigned=*/true),
6200                          ReductionsPtr,
6201                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6202                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6203   return Address(
6204       CGF.EmitRuntimeCall(
6205           OMPBuilder.getOrCreateRuntimeFunction(
6206               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6207           Args),
6208       CGF.Int8Ty, SharedLVal.getAlignment());
6209 }
6210 
6211 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6212                                        const OMPTaskDataTy &Data) {
6213   if (!CGF.HaveInsertPoint())
6214     return;
6215 
6216   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6217     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6218     OMPBuilder.createTaskwait(CGF.Builder);
6219   } else {
6220     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6221     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6222     auto &M = CGM.getModule();
6223     Address DependenciesArray = Address::invalid();
6224     llvm::Value *NumOfElements;
6225     std::tie(NumOfElements, DependenciesArray) =
6226         emitDependClause(CGF, Data.Dependences, Loc);
6227     llvm::Value *DepWaitTaskArgs[6];
6228     if (!Data.Dependences.empty()) {
6229       DepWaitTaskArgs[0] = UpLoc;
6230       DepWaitTaskArgs[1] = ThreadID;
6231       DepWaitTaskArgs[2] = NumOfElements;
6232       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6233       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6234       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6235 
6236       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6237 
6238       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6239       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6240       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6241       // is specified.
6242       CGF.EmitRuntimeCall(
6243           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6244           DepWaitTaskArgs);
6245 
6246     } else {
6247 
6248       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6249       // global_tid);
6250       llvm::Value *Args[] = {UpLoc, ThreadID};
6251       // Ignore return result until untied tasks are supported.
6252       CGF.EmitRuntimeCall(
6253           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6254           Args);
6255     }
6256   }
6257 
6258   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6259     Region->emitUntiedSwitch(CGF);
6260 }
6261 
6262 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6263                                            OpenMPDirectiveKind InnerKind,
6264                                            const RegionCodeGenTy &CodeGen,
6265                                            bool HasCancel) {
6266   if (!CGF.HaveInsertPoint())
6267     return;
6268   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6269                                  InnerKind != OMPD_critical &&
6270                                      InnerKind != OMPD_master &&
6271                                      InnerKind != OMPD_masked);
6272   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6273 }
6274 
6275 namespace {
6276 enum RTCancelKind {
6277   CancelNoreq = 0,
6278   CancelParallel = 1,
6279   CancelLoop = 2,
6280   CancelSections = 3,
6281   CancelTaskgroup = 4
6282 };
6283 } // anonymous namespace
6284 
6285 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6286   RTCancelKind CancelKind = CancelNoreq;
6287   if (CancelRegion == OMPD_parallel)
6288     CancelKind = CancelParallel;
6289   else if (CancelRegion == OMPD_for)
6290     CancelKind = CancelLoop;
6291   else if (CancelRegion == OMPD_sections)
6292     CancelKind = CancelSections;
6293   else {
6294     assert(CancelRegion == OMPD_taskgroup);
6295     CancelKind = CancelTaskgroup;
6296   }
6297   return CancelKind;
6298 }
6299 
6300 void CGOpenMPRuntime::emitCancellationPointCall(
6301     CodeGenFunction &CGF, SourceLocation Loc,
6302     OpenMPDirectiveKind CancelRegion) {
6303   if (!CGF.HaveInsertPoint())
6304     return;
6305   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6306   // global_tid, kmp_int32 cncl_kind);
6307   if (auto *OMPRegionInfo =
6308           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6309     // For 'cancellation point taskgroup', the task region info may not have a
6310     // cancel. This may instead happen in another adjacent task.
6311     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6312       llvm::Value *Args[] = {
6313           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6314           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6315       // Ignore return result until untied tasks are supported.
6316       llvm::Value *Result = CGF.EmitRuntimeCall(
6317           OMPBuilder.getOrCreateRuntimeFunction(
6318               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6319           Args);
6320       // if (__kmpc_cancellationpoint()) {
6321       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6322       //   exit from construct;
6323       // }
6324       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6325       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6326       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6327       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6328       CGF.EmitBlock(ExitBB);
6329       if (CancelRegion == OMPD_parallel)
6330         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6331       // exit from construct;
6332       CodeGenFunction::JumpDest CancelDest =
6333           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6334       CGF.EmitBranchThroughCleanup(CancelDest);
6335       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6336     }
6337   }
6338 }
6339 
6340 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6341                                      const Expr *IfCond,
6342                                      OpenMPDirectiveKind CancelRegion) {
6343   if (!CGF.HaveInsertPoint())
6344     return;
6345   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6346   // kmp_int32 cncl_kind);
6347   auto &M = CGM.getModule();
6348   if (auto *OMPRegionInfo =
6349           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6350     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6351                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6352       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6353       llvm::Value *Args[] = {
6354           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6355           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6356       // Ignore return result until untied tasks are supported.
6357       llvm::Value *Result = CGF.EmitRuntimeCall(
6358           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6359       // if (__kmpc_cancel()) {
6360       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6361       //   exit from construct;
6362       // }
6363       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6364       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6365       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6366       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6367       CGF.EmitBlock(ExitBB);
6368       if (CancelRegion == OMPD_parallel)
6369         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6370       // exit from construct;
6371       CodeGenFunction::JumpDest CancelDest =
6372           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6373       CGF.EmitBranchThroughCleanup(CancelDest);
6374       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6375     };
6376     if (IfCond) {
6377       emitIfClause(CGF, IfCond, ThenGen,
6378                    [](CodeGenFunction &, PrePostActionTy &) {});
6379     } else {
6380       RegionCodeGenTy ThenRCG(ThenGen);
6381       ThenRCG(CGF);
6382     }
6383   }
6384 }
6385 
6386 namespace {
6387 /// Cleanup action for uses_allocators support.
6388 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6389   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6390 
6391 public:
6392   OMPUsesAllocatorsActionTy(
6393       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6394       : Allocators(Allocators) {}
6395   void Enter(CodeGenFunction &CGF) override {
6396     if (!CGF.HaveInsertPoint())
6397       return;
6398     for (const auto &AllocatorData : Allocators) {
6399       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6400           CGF, AllocatorData.first, AllocatorData.second);
6401     }
6402   }
6403   void Exit(CodeGenFunction &CGF) override {
6404     if (!CGF.HaveInsertPoint())
6405       return;
6406     for (const auto &AllocatorData : Allocators) {
6407       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6408                                                         AllocatorData.first);
6409     }
6410   }
6411 };
6412 } // namespace
6413 
6414 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6415     const OMPExecutableDirective &D, StringRef ParentName,
6416     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6417     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6418   assert(!ParentName.empty() && "Invalid target region parent name!");
6419   HasEmittedTargetRegion = true;
6420   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6421   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6422     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6423       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6424       if (!D.AllocatorTraits)
6425         continue;
6426       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6427     }
6428   }
6429   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6430   CodeGen.setAction(UsesAllocatorAction);
6431   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6432                                    IsOffloadEntry, CodeGen);
6433 }
6434 
6435 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6436                                              const Expr *Allocator,
6437                                              const Expr *AllocatorTraits) {
6438   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6439   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6440   // Use default memspace handle.
6441   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6442   llvm::Value *NumTraits = llvm::ConstantInt::get(
6443       CGF.IntTy, cast<ConstantArrayType>(
6444                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6445                      ->getSize()
6446                      .getLimitedValue());
6447   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6448   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6449       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6450   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6451                                            AllocatorTraitsLVal.getBaseInfo(),
6452                                            AllocatorTraitsLVal.getTBAAInfo());
6453   llvm::Value *Traits =
6454       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6455 
6456   llvm::Value *AllocatorVal =
6457       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6458                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6459                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6460   // Store to allocator.
6461   CGF.EmitVarDecl(*cast<VarDecl>(
6462       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6463   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6464   AllocatorVal =
6465       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6466                                Allocator->getType(), Allocator->getExprLoc());
6467   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6468 }
6469 
6470 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6471                                              const Expr *Allocator) {
6472   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6473   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6474   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6475   llvm::Value *AllocatorVal =
6476       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6477   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6478                                           CGF.getContext().VoidPtrTy,
6479                                           Allocator->getExprLoc());
6480   (void)CGF.EmitRuntimeCall(
6481       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6482                                             OMPRTL___kmpc_destroy_allocator),
6483       {ThreadId, AllocatorVal});
6484 }
6485 
6486 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6487     const OMPExecutableDirective &D, StringRef ParentName,
6488     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6489     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6490   // Create a unique name for the entry function using the source location
6491   // information of the current target region. The name will be something like:
6492   //
6493   // __omp_offloading_DD_FFFF_PP_lBB
6494   //
6495   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6496   // mangled name of the function that encloses the target region and BB is the
6497   // line number of the target region.
6498 
6499   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6500                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6501   unsigned DeviceID;
6502   unsigned FileID;
6503   unsigned Line;
6504   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6505                            Line);
6506   SmallString<64> EntryFnName;
6507   {
6508     llvm::raw_svector_ostream OS(EntryFnName);
6509     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6510        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6511   }
6512 
6513   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6514 
6515   CodeGenFunction CGF(CGM, true);
6516   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6517   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6518 
6519   if (BuildOutlinedFn)
6520     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6521 
6522   // If this target outline function is not an offload entry, we don't need to
6523   // register it.
6524   if (!IsOffloadEntry)
6525     return;
6526 
6527   // The target region ID is used by the runtime library to identify the current
6528   // target region, so it only has to be unique and not necessarily point to
6529   // anything. It could be the pointer to the outlined function that implements
6530   // the target region, but we aren't using that so that the compiler doesn't
6531   // need to keep that, and could therefore inline the host function if proven
6532   // worthwhile during optimization. In the other hand, if emitting code for the
6533   // device, the ID has to be the function address so that it can retrieved from
6534   // the offloading entry and launched by the runtime library. We also mark the
6535   // outlined function to have external linkage in case we are emitting code for
6536   // the device, because these functions will be entry points to the device.
6537 
6538   if (CGM.getLangOpts().OpenMPIsDevice) {
6539     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6540     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6541     OutlinedFn->setDSOLocal(false);
6542     if (CGM.getTriple().isAMDGCN())
6543       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6544   } else {
6545     std::string Name = getName({EntryFnName, "region_id"});
6546     OutlinedFnID = new llvm::GlobalVariable(
6547         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6548         llvm::GlobalValue::WeakAnyLinkage,
6549         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6550   }
6551 
6552   // If we do not allow host fallback we still need a named address to use.
6553   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6554   if (!BuildOutlinedFn) {
6555     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6556            "Named kernel already exists?");
6557     TargetRegionEntryAddr = new llvm::GlobalVariable(
6558         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6559         llvm::GlobalValue::InternalLinkage,
6560         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6561   }
6562 
6563   // Register the information for the entry associated with this target region.
6564   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6565       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6566       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6567 
6568   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6569   int32_t DefaultValTeams = -1;
6570   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6571   if (DefaultValTeams > 0 && OutlinedFn) {
6572     OutlinedFn->addFnAttr("omp_target_num_teams",
6573                           std::to_string(DefaultValTeams));
6574   }
6575   int32_t DefaultValThreads = -1;
6576   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6577   if (DefaultValThreads > 0 && OutlinedFn) {
6578     OutlinedFn->addFnAttr("omp_target_thread_limit",
6579                           std::to_string(DefaultValThreads));
6580   }
6581 
6582   if (BuildOutlinedFn)
6583     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6584 }
6585 
6586 /// Checks if the expression is constant or does not have non-trivial function
6587 /// calls.
6588 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6589   // We can skip constant expressions.
6590   // We can skip expressions with trivial calls or simple expressions.
6591   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6592           !E->hasNonTrivialCall(Ctx)) &&
6593          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6594 }
6595 
6596 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6597                                                     const Stmt *Body) {
6598   const Stmt *Child = Body->IgnoreContainers();
6599   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6600     Child = nullptr;
6601     for (const Stmt *S : C->body()) {
6602       if (const auto *E = dyn_cast<Expr>(S)) {
6603         if (isTrivial(Ctx, E))
6604           continue;
6605       }
6606       // Some of the statements can be ignored.
6607       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6608           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6609         continue;
6610       // Analyze declarations.
6611       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6612         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6613               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6614                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6615                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6616                   isa<UsingDirectiveDecl>(D) ||
6617                   isa<OMPDeclareReductionDecl>(D) ||
6618                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6619                 return true;
6620               const auto *VD = dyn_cast<VarDecl>(D);
6621               if (!VD)
6622                 return false;
6623               return VD->hasGlobalStorage() || !VD->isUsed();
6624             }))
6625           continue;
6626       }
6627       // Found multiple children - cannot get the one child only.
6628       if (Child)
6629         return nullptr;
6630       Child = S;
6631     }
6632     if (Child)
6633       Child = Child->IgnoreContainers();
6634   }
6635   return Child;
6636 }
6637 
6638 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6639     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6640     int32_t &DefaultVal) {
6641 
6642   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6643   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6644          "Expected target-based executable directive.");
6645   switch (DirectiveKind) {
6646   case OMPD_target: {
6647     const auto *CS = D.getInnermostCapturedStmt();
6648     const auto *Body =
6649         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6650     const Stmt *ChildStmt =
6651         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6652     if (const auto *NestedDir =
6653             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6654       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6655         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6656           const Expr *NumTeams =
6657               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6658           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6659             if (auto Constant =
6660                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6661               DefaultVal = Constant->getExtValue();
6662           return NumTeams;
6663         }
6664         DefaultVal = 0;
6665         return nullptr;
6666       }
6667       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6668           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6669         DefaultVal = 1;
6670         return nullptr;
6671       }
6672       DefaultVal = 1;
6673       return nullptr;
6674     }
6675     // A value of -1 is used to check if we need to emit no teams region
6676     DefaultVal = -1;
6677     return nullptr;
6678   }
6679   case OMPD_target_teams:
6680   case OMPD_target_teams_distribute:
6681   case OMPD_target_teams_distribute_simd:
6682   case OMPD_target_teams_distribute_parallel_for:
6683   case OMPD_target_teams_distribute_parallel_for_simd: {
6684     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6685       const Expr *NumTeams =
6686           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6687       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6688         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6689           DefaultVal = Constant->getExtValue();
6690       return NumTeams;
6691     }
6692     DefaultVal = 0;
6693     return nullptr;
6694   }
6695   case OMPD_target_parallel:
6696   case OMPD_target_parallel_for:
6697   case OMPD_target_parallel_for_simd:
6698   case OMPD_target_simd:
6699     DefaultVal = 1;
6700     return nullptr;
6701   case OMPD_parallel:
6702   case OMPD_for:
6703   case OMPD_parallel_for:
6704   case OMPD_parallel_master:
6705   case OMPD_parallel_sections:
6706   case OMPD_for_simd:
6707   case OMPD_parallel_for_simd:
6708   case OMPD_cancel:
6709   case OMPD_cancellation_point:
6710   case OMPD_ordered:
6711   case OMPD_threadprivate:
6712   case OMPD_allocate:
6713   case OMPD_task:
6714   case OMPD_simd:
6715   case OMPD_tile:
6716   case OMPD_unroll:
6717   case OMPD_sections:
6718   case OMPD_section:
6719   case OMPD_single:
6720   case OMPD_master:
6721   case OMPD_critical:
6722   case OMPD_taskyield:
6723   case OMPD_barrier:
6724   case OMPD_taskwait:
6725   case OMPD_taskgroup:
6726   case OMPD_atomic:
6727   case OMPD_flush:
6728   case OMPD_depobj:
6729   case OMPD_scan:
6730   case OMPD_teams:
6731   case OMPD_target_data:
6732   case OMPD_target_exit_data:
6733   case OMPD_target_enter_data:
6734   case OMPD_distribute:
6735   case OMPD_distribute_simd:
6736   case OMPD_distribute_parallel_for:
6737   case OMPD_distribute_parallel_for_simd:
6738   case OMPD_teams_distribute:
6739   case OMPD_teams_distribute_simd:
6740   case OMPD_teams_distribute_parallel_for:
6741   case OMPD_teams_distribute_parallel_for_simd:
6742   case OMPD_target_update:
6743   case OMPD_declare_simd:
6744   case OMPD_declare_variant:
6745   case OMPD_begin_declare_variant:
6746   case OMPD_end_declare_variant:
6747   case OMPD_declare_target:
6748   case OMPD_end_declare_target:
6749   case OMPD_declare_reduction:
6750   case OMPD_declare_mapper:
6751   case OMPD_taskloop:
6752   case OMPD_taskloop_simd:
6753   case OMPD_master_taskloop:
6754   case OMPD_master_taskloop_simd:
6755   case OMPD_parallel_master_taskloop:
6756   case OMPD_parallel_master_taskloop_simd:
6757   case OMPD_requires:
6758   case OMPD_metadirective:
6759   case OMPD_unknown:
6760     break;
6761   default:
6762     break;
6763   }
6764   llvm_unreachable("Unexpected directive kind.");
6765 }
6766 
6767 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6768     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6769   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6770          "Clauses associated with the teams directive expected to be emitted "
6771          "only for the host!");
6772   CGBuilderTy &Bld = CGF.Builder;
6773   int32_t DefaultNT = -1;
6774   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6775   if (NumTeams != nullptr) {
6776     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6777 
6778     switch (DirectiveKind) {
6779     case OMPD_target: {
6780       const auto *CS = D.getInnermostCapturedStmt();
6781       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6782       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6783       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6784                                                   /*IgnoreResultAssign*/ true);
6785       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6786                              /*isSigned=*/true);
6787     }
6788     case OMPD_target_teams:
6789     case OMPD_target_teams_distribute:
6790     case OMPD_target_teams_distribute_simd:
6791     case OMPD_target_teams_distribute_parallel_for:
6792     case OMPD_target_teams_distribute_parallel_for_simd: {
6793       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6794       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6795                                                   /*IgnoreResultAssign*/ true);
6796       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6797                              /*isSigned=*/true);
6798     }
6799     default:
6800       break;
6801     }
6802   } else if (DefaultNT == -1) {
6803     return nullptr;
6804   }
6805 
6806   return Bld.getInt32(DefaultNT);
6807 }
6808 
6809 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6810                                   llvm::Value *DefaultThreadLimitVal) {
6811   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6812       CGF.getContext(), CS->getCapturedStmt());
6813   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6814     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6815       llvm::Value *NumThreads = nullptr;
6816       llvm::Value *CondVal = nullptr;
6817       // Handle if clause. If if clause present, the number of threads is
6818       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6819       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6820         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6821         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6822         const OMPIfClause *IfClause = nullptr;
6823         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6824           if (C->getNameModifier() == OMPD_unknown ||
6825               C->getNameModifier() == OMPD_parallel) {
6826             IfClause = C;
6827             break;
6828           }
6829         }
6830         if (IfClause) {
6831           const Expr *Cond = IfClause->getCondition();
6832           bool Result;
6833           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6834             if (!Result)
6835               return CGF.Builder.getInt32(1);
6836           } else {
6837             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6838             if (const auto *PreInit =
6839                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6840               for (const auto *I : PreInit->decls()) {
6841                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6842                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6843                 } else {
6844                   CodeGenFunction::AutoVarEmission Emission =
6845                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6846                   CGF.EmitAutoVarCleanups(Emission);
6847                 }
6848               }
6849             }
6850             CondVal = CGF.EvaluateExprAsBool(Cond);
6851           }
6852         }
6853       }
6854       // Check the value of num_threads clause iff if clause was not specified
6855       // or is not evaluated to false.
6856       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6857         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6858         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6859         const auto *NumThreadsClause =
6860             Dir->getSingleClause<OMPNumThreadsClause>();
6861         CodeGenFunction::LexicalScope Scope(
6862             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6863         if (const auto *PreInit =
6864                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6865           for (const auto *I : PreInit->decls()) {
6866             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6867               CGF.EmitVarDecl(cast<VarDecl>(*I));
6868             } else {
6869               CodeGenFunction::AutoVarEmission Emission =
6870                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6871               CGF.EmitAutoVarCleanups(Emission);
6872             }
6873           }
6874         }
6875         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6876         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6877                                                /*isSigned=*/false);
6878         if (DefaultThreadLimitVal)
6879           NumThreads = CGF.Builder.CreateSelect(
6880               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6881               DefaultThreadLimitVal, NumThreads);
6882       } else {
6883         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6884                                            : CGF.Builder.getInt32(0);
6885       }
6886       // Process condition of the if clause.
6887       if (CondVal) {
6888         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6889                                               CGF.Builder.getInt32(1));
6890       }
6891       return NumThreads;
6892     }
6893     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6894       return CGF.Builder.getInt32(1);
6895     return DefaultThreadLimitVal;
6896   }
6897   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6898                                : CGF.Builder.getInt32(0);
6899 }
6900 
6901 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6902     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6903     int32_t &DefaultVal) {
6904   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6905   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6906          "Expected target-based executable directive.");
6907 
6908   switch (DirectiveKind) {
6909   case OMPD_target:
6910     // Teams have no clause thread_limit
6911     return nullptr;
6912   case OMPD_target_teams:
6913   case OMPD_target_teams_distribute:
6914     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6915       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6916       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6917       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6918         if (auto Constant =
6919                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6920           DefaultVal = Constant->getExtValue();
6921       return ThreadLimit;
6922     }
6923     return nullptr;
6924   case OMPD_target_parallel:
6925   case OMPD_target_parallel_for:
6926   case OMPD_target_parallel_for_simd:
6927   case OMPD_target_teams_distribute_parallel_for:
6928   case OMPD_target_teams_distribute_parallel_for_simd: {
6929     Expr *ThreadLimit = nullptr;
6930     Expr *NumThreads = nullptr;
6931     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6932       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6933       ThreadLimit = ThreadLimitClause->getThreadLimit();
6934       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6935         if (auto Constant =
6936                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6937           DefaultVal = Constant->getExtValue();
6938     }
6939     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6940       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6941       NumThreads = NumThreadsClause->getNumThreads();
6942       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6943         if (auto Constant =
6944                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6945           if (Constant->getExtValue() < DefaultVal) {
6946             DefaultVal = Constant->getExtValue();
6947             ThreadLimit = NumThreads;
6948           }
6949         }
6950       }
6951     }
6952     return ThreadLimit;
6953   }
6954   case OMPD_target_teams_distribute_simd:
6955   case OMPD_target_simd:
6956     DefaultVal = 1;
6957     return nullptr;
6958   case OMPD_parallel:
6959   case OMPD_for:
6960   case OMPD_parallel_for:
6961   case OMPD_parallel_master:
6962   case OMPD_parallel_sections:
6963   case OMPD_for_simd:
6964   case OMPD_parallel_for_simd:
6965   case OMPD_cancel:
6966   case OMPD_cancellation_point:
6967   case OMPD_ordered:
6968   case OMPD_threadprivate:
6969   case OMPD_allocate:
6970   case OMPD_task:
6971   case OMPD_simd:
6972   case OMPD_tile:
6973   case OMPD_unroll:
6974   case OMPD_sections:
6975   case OMPD_section:
6976   case OMPD_single:
6977   case OMPD_master:
6978   case OMPD_critical:
6979   case OMPD_taskyield:
6980   case OMPD_barrier:
6981   case OMPD_taskwait:
6982   case OMPD_taskgroup:
6983   case OMPD_atomic:
6984   case OMPD_flush:
6985   case OMPD_depobj:
6986   case OMPD_scan:
6987   case OMPD_teams:
6988   case OMPD_target_data:
6989   case OMPD_target_exit_data:
6990   case OMPD_target_enter_data:
6991   case OMPD_distribute:
6992   case OMPD_distribute_simd:
6993   case OMPD_distribute_parallel_for:
6994   case OMPD_distribute_parallel_for_simd:
6995   case OMPD_teams_distribute:
6996   case OMPD_teams_distribute_simd:
6997   case OMPD_teams_distribute_parallel_for:
6998   case OMPD_teams_distribute_parallel_for_simd:
6999   case OMPD_target_update:
7000   case OMPD_declare_simd:
7001   case OMPD_declare_variant:
7002   case OMPD_begin_declare_variant:
7003   case OMPD_end_declare_variant:
7004   case OMPD_declare_target:
7005   case OMPD_end_declare_target:
7006   case OMPD_declare_reduction:
7007   case OMPD_declare_mapper:
7008   case OMPD_taskloop:
7009   case OMPD_taskloop_simd:
7010   case OMPD_master_taskloop:
7011   case OMPD_master_taskloop_simd:
7012   case OMPD_parallel_master_taskloop:
7013   case OMPD_parallel_master_taskloop_simd:
7014   case OMPD_requires:
7015   case OMPD_unknown:
7016     break;
7017   default:
7018     break;
7019   }
7020   llvm_unreachable("Unsupported directive kind.");
7021 }
7022 
7023 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7024     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7025   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7026          "Clauses associated with the teams directive expected to be emitted "
7027          "only for the host!");
7028   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7029   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7030          "Expected target-based executable directive.");
7031   CGBuilderTy &Bld = CGF.Builder;
7032   llvm::Value *ThreadLimitVal = nullptr;
7033   llvm::Value *NumThreadsVal = nullptr;
7034   switch (DirectiveKind) {
7035   case OMPD_target: {
7036     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7037     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7038       return NumThreads;
7039     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7040         CGF.getContext(), CS->getCapturedStmt());
7041     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7042       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7043         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7044         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7045         const auto *ThreadLimitClause =
7046             Dir->getSingleClause<OMPThreadLimitClause>();
7047         CodeGenFunction::LexicalScope Scope(
7048             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7049         if (const auto *PreInit =
7050                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7051           for (const auto *I : PreInit->decls()) {
7052             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7053               CGF.EmitVarDecl(cast<VarDecl>(*I));
7054             } else {
7055               CodeGenFunction::AutoVarEmission Emission =
7056                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7057               CGF.EmitAutoVarCleanups(Emission);
7058             }
7059           }
7060         }
7061         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7062             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7063         ThreadLimitVal =
7064             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7065       }
7066       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7067           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7068         CS = Dir->getInnermostCapturedStmt();
7069         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7070             CGF.getContext(), CS->getCapturedStmt());
7071         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7072       }
7073       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7074           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7075         CS = Dir->getInnermostCapturedStmt();
7076         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7077           return NumThreads;
7078       }
7079       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7080         return Bld.getInt32(1);
7081     }
7082     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7083   }
7084   case OMPD_target_teams: {
7085     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7086       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7087       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7088       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7089           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7090       ThreadLimitVal =
7091           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7092     }
7093     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7094     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7095       return NumThreads;
7096     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7097         CGF.getContext(), CS->getCapturedStmt());
7098     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7099       if (Dir->getDirectiveKind() == OMPD_distribute) {
7100         CS = Dir->getInnermostCapturedStmt();
7101         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7102           return NumThreads;
7103       }
7104     }
7105     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7106   }
7107   case OMPD_target_teams_distribute:
7108     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7109       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7110       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7111       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7112           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7113       ThreadLimitVal =
7114           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7115     }
7116     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7117   case OMPD_target_parallel:
7118   case OMPD_target_parallel_for:
7119   case OMPD_target_parallel_for_simd:
7120   case OMPD_target_teams_distribute_parallel_for:
7121   case OMPD_target_teams_distribute_parallel_for_simd: {
7122     llvm::Value *CondVal = nullptr;
7123     // Handle if clause. If if clause present, the number of threads is
7124     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7125     if (D.hasClausesOfKind<OMPIfClause>()) {
7126       const OMPIfClause *IfClause = nullptr;
7127       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7128         if (C->getNameModifier() == OMPD_unknown ||
7129             C->getNameModifier() == OMPD_parallel) {
7130           IfClause = C;
7131           break;
7132         }
7133       }
7134       if (IfClause) {
7135         const Expr *Cond = IfClause->getCondition();
7136         bool Result;
7137         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7138           if (!Result)
7139             return Bld.getInt32(1);
7140         } else {
7141           CodeGenFunction::RunCleanupsScope Scope(CGF);
7142           CondVal = CGF.EvaluateExprAsBool(Cond);
7143         }
7144       }
7145     }
7146     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7147       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7148       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7149       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7150           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7151       ThreadLimitVal =
7152           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7153     }
7154     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7155       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7156       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7157       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7158           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7159       NumThreadsVal =
7160           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7161       ThreadLimitVal = ThreadLimitVal
7162                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7163                                                                 ThreadLimitVal),
7164                                               NumThreadsVal, ThreadLimitVal)
7165                            : NumThreadsVal;
7166     }
7167     if (!ThreadLimitVal)
7168       ThreadLimitVal = Bld.getInt32(0);
7169     if (CondVal)
7170       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7171     return ThreadLimitVal;
7172   }
7173   case OMPD_target_teams_distribute_simd:
7174   case OMPD_target_simd:
7175     return Bld.getInt32(1);
7176   case OMPD_parallel:
7177   case OMPD_for:
7178   case OMPD_parallel_for:
7179   case OMPD_parallel_master:
7180   case OMPD_parallel_sections:
7181   case OMPD_for_simd:
7182   case OMPD_parallel_for_simd:
7183   case OMPD_cancel:
7184   case OMPD_cancellation_point:
7185   case OMPD_ordered:
7186   case OMPD_threadprivate:
7187   case OMPD_allocate:
7188   case OMPD_task:
7189   case OMPD_simd:
7190   case OMPD_tile:
7191   case OMPD_unroll:
7192   case OMPD_sections:
7193   case OMPD_section:
7194   case OMPD_single:
7195   case OMPD_master:
7196   case OMPD_critical:
7197   case OMPD_taskyield:
7198   case OMPD_barrier:
7199   case OMPD_taskwait:
7200   case OMPD_taskgroup:
7201   case OMPD_atomic:
7202   case OMPD_flush:
7203   case OMPD_depobj:
7204   case OMPD_scan:
7205   case OMPD_teams:
7206   case OMPD_target_data:
7207   case OMPD_target_exit_data:
7208   case OMPD_target_enter_data:
7209   case OMPD_distribute:
7210   case OMPD_distribute_simd:
7211   case OMPD_distribute_parallel_for:
7212   case OMPD_distribute_parallel_for_simd:
7213   case OMPD_teams_distribute:
7214   case OMPD_teams_distribute_simd:
7215   case OMPD_teams_distribute_parallel_for:
7216   case OMPD_teams_distribute_parallel_for_simd:
7217   case OMPD_target_update:
7218   case OMPD_declare_simd:
7219   case OMPD_declare_variant:
7220   case OMPD_begin_declare_variant:
7221   case OMPD_end_declare_variant:
7222   case OMPD_declare_target:
7223   case OMPD_end_declare_target:
7224   case OMPD_declare_reduction:
7225   case OMPD_declare_mapper:
7226   case OMPD_taskloop:
7227   case OMPD_taskloop_simd:
7228   case OMPD_master_taskloop:
7229   case OMPD_master_taskloop_simd:
7230   case OMPD_parallel_master_taskloop:
7231   case OMPD_parallel_master_taskloop_simd:
7232   case OMPD_requires:
7233   case OMPD_metadirective:
7234   case OMPD_unknown:
7235     break;
7236   default:
7237     break;
7238   }
7239   llvm_unreachable("Unsupported directive kind.");
7240 }
7241 
7242 namespace {
7243 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7244 
7245 // Utility to handle information from clauses associated with a given
7246 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7247 // It provides a convenient interface to obtain the information and generate
7248 // code for that information.
7249 class MappableExprsHandler {
7250 public:
7251   /// Values for bit flags used to specify the mapping type for
7252   /// offloading.
7253   enum OpenMPOffloadMappingFlags : uint64_t {
7254     /// No flags
7255     OMP_MAP_NONE = 0x0,
7256     /// Allocate memory on the device and move data from host to device.
7257     OMP_MAP_TO = 0x01,
7258     /// Allocate memory on the device and move data from device to host.
7259     OMP_MAP_FROM = 0x02,
7260     /// Always perform the requested mapping action on the element, even
7261     /// if it was already mapped before.
7262     OMP_MAP_ALWAYS = 0x04,
7263     /// Delete the element from the device environment, ignoring the
7264     /// current reference count associated with the element.
7265     OMP_MAP_DELETE = 0x08,
7266     /// The element being mapped is a pointer-pointee pair; both the
7267     /// pointer and the pointee should be mapped.
7268     OMP_MAP_PTR_AND_OBJ = 0x10,
7269     /// This flags signals that the base address of an entry should be
7270     /// passed to the target kernel as an argument.
7271     OMP_MAP_TARGET_PARAM = 0x20,
7272     /// Signal that the runtime library has to return the device pointer
7273     /// in the current position for the data being mapped. Used when we have the
7274     /// use_device_ptr or use_device_addr clause.
7275     OMP_MAP_RETURN_PARAM = 0x40,
7276     /// This flag signals that the reference being passed is a pointer to
7277     /// private data.
7278     OMP_MAP_PRIVATE = 0x80,
7279     /// Pass the element to the device by value.
7280     OMP_MAP_LITERAL = 0x100,
7281     /// Implicit map
7282     OMP_MAP_IMPLICIT = 0x200,
7283     /// Close is a hint to the runtime to allocate memory close to
7284     /// the target device.
7285     OMP_MAP_CLOSE = 0x400,
7286     /// 0x800 is reserved for compatibility with XLC.
7287     /// Produce a runtime error if the data is not already allocated.
7288     OMP_MAP_PRESENT = 0x1000,
7289     // Increment and decrement a separate reference counter so that the data
7290     // cannot be unmapped within the associated region.  Thus, this flag is
7291     // intended to be used on 'target' and 'target data' directives because they
7292     // are inherently structured.  It is not intended to be used on 'target
7293     // enter data' and 'target exit data' directives because they are inherently
7294     // dynamic.
7295     // This is an OpenMP extension for the sake of OpenACC support.
7296     OMP_MAP_OMPX_HOLD = 0x2000,
7297     /// Signal that the runtime library should use args as an array of
7298     /// descriptor_dim pointers and use args_size as dims. Used when we have
7299     /// non-contiguous list items in target update directive
7300     OMP_MAP_NON_CONTIG = 0x100000000000,
7301     /// The 16 MSBs of the flags indicate whether the entry is member of some
7302     /// struct/class.
7303     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7304     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7305   };
7306 
7307   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7308   static unsigned getFlagMemberOffset() {
7309     unsigned Offset = 0;
7310     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7311          Remain = Remain >> 1)
7312       Offset++;
7313     return Offset;
7314   }
7315 
7316   /// Class that holds debugging information for a data mapping to be passed to
7317   /// the runtime library.
7318   class MappingExprInfo {
7319     /// The variable declaration used for the data mapping.
7320     const ValueDecl *MapDecl = nullptr;
7321     /// The original expression used in the map clause, or null if there is
7322     /// none.
7323     const Expr *MapExpr = nullptr;
7324 
7325   public:
7326     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7327         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7328 
7329     const ValueDecl *getMapDecl() const { return MapDecl; }
7330     const Expr *getMapExpr() const { return MapExpr; }
7331   };
7332 
7333   /// Class that associates information with a base pointer to be passed to the
7334   /// runtime library.
7335   class BasePointerInfo {
7336     /// The base pointer.
7337     llvm::Value *Ptr = nullptr;
7338     /// The base declaration that refers to this device pointer, or null if
7339     /// there is none.
7340     const ValueDecl *DevPtrDecl = nullptr;
7341 
7342   public:
7343     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7344         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7345     llvm::Value *operator*() const { return Ptr; }
7346     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7347     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7348   };
7349 
7350   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7351   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7352   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7353   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7354   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7355   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7356   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7357 
7358   /// This structure contains combined information generated for mappable
7359   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7360   /// mappers, and non-contiguous information.
7361   struct MapCombinedInfoTy {
7362     struct StructNonContiguousInfo {
7363       bool IsNonContiguous = false;
7364       MapDimArrayTy Dims;
7365       MapNonContiguousArrayTy Offsets;
7366       MapNonContiguousArrayTy Counts;
7367       MapNonContiguousArrayTy Strides;
7368     };
7369     MapExprsArrayTy Exprs;
7370     MapBaseValuesArrayTy BasePointers;
7371     MapValuesArrayTy Pointers;
7372     MapValuesArrayTy Sizes;
7373     MapFlagsArrayTy Types;
7374     MapMappersArrayTy Mappers;
7375     StructNonContiguousInfo NonContigInfo;
7376 
7377     /// Append arrays in \a CurInfo.
7378     void append(MapCombinedInfoTy &CurInfo) {
7379       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7380       BasePointers.append(CurInfo.BasePointers.begin(),
7381                           CurInfo.BasePointers.end());
7382       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7383       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7384       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7385       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7386       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7387                                  CurInfo.NonContigInfo.Dims.end());
7388       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7389                                     CurInfo.NonContigInfo.Offsets.end());
7390       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7391                                    CurInfo.NonContigInfo.Counts.end());
7392       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7393                                     CurInfo.NonContigInfo.Strides.end());
7394     }
7395   };
7396 
7397   /// Map between a struct and the its lowest & highest elements which have been
7398   /// mapped.
7399   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7400   ///                    HE(FieldIndex, Pointer)}
7401   struct StructRangeInfoTy {
7402     MapCombinedInfoTy PreliminaryMapData;
7403     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7404         0, Address::invalid()};
7405     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7406         0, Address::invalid()};
7407     Address Base = Address::invalid();
7408     Address LB = Address::invalid();
7409     bool IsArraySection = false;
7410     bool HasCompleteRecord = false;
7411   };
7412 
7413 private:
7414   /// Kind that defines how a device pointer has to be returned.
7415   struct MapInfo {
7416     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7417     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7418     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7419     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7420     bool ReturnDevicePointer = false;
7421     bool IsImplicit = false;
7422     const ValueDecl *Mapper = nullptr;
7423     const Expr *VarRef = nullptr;
7424     bool ForDeviceAddr = false;
7425 
7426     MapInfo() = default;
7427     MapInfo(
7428         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7429         OpenMPMapClauseKind MapType,
7430         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7431         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7432         bool ReturnDevicePointer, bool IsImplicit,
7433         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7434         bool ForDeviceAddr = false)
7435         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7436           MotionModifiers(MotionModifiers),
7437           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7438           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7439   };
7440 
7441   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7442   /// member and there is no map information about it, then emission of that
7443   /// entry is deferred until the whole struct has been processed.
7444   struct DeferredDevicePtrEntryTy {
7445     const Expr *IE = nullptr;
7446     const ValueDecl *VD = nullptr;
7447     bool ForDeviceAddr = false;
7448 
7449     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7450                              bool ForDeviceAddr)
7451         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7452   };
7453 
7454   /// The target directive from where the mappable clauses were extracted. It
7455   /// is either a executable directive or a user-defined mapper directive.
7456   llvm::PointerUnion<const OMPExecutableDirective *,
7457                      const OMPDeclareMapperDecl *>
7458       CurDir;
7459 
7460   /// Function the directive is being generated for.
7461   CodeGenFunction &CGF;
7462 
7463   /// Set of all first private variables in the current directive.
7464   /// bool data is set to true if the variable is implicitly marked as
7465   /// firstprivate, false otherwise.
7466   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7467 
7468   /// Map between device pointer declarations and their expression components.
7469   /// The key value for declarations in 'this' is null.
7470   llvm::DenseMap<
7471       const ValueDecl *,
7472       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7473       DevPointersMap;
7474 
7475   /// Map between lambda declarations and their map type.
7476   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7477 
7478   llvm::Value *getExprTypeSize(const Expr *E) const {
7479     QualType ExprTy = E->getType().getCanonicalType();
7480 
7481     // Calculate the size for array shaping expression.
7482     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7483       llvm::Value *Size =
7484           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7485       for (const Expr *SE : OAE->getDimensions()) {
7486         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7487         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7488                                       CGF.getContext().getSizeType(),
7489                                       SE->getExprLoc());
7490         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7491       }
7492       return Size;
7493     }
7494 
7495     // Reference types are ignored for mapping purposes.
7496     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7497       ExprTy = RefTy->getPointeeType().getCanonicalType();
7498 
7499     // Given that an array section is considered a built-in type, we need to
7500     // do the calculation based on the length of the section instead of relying
7501     // on CGF.getTypeSize(E->getType()).
7502     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7503       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7504                             OAE->getBase()->IgnoreParenImpCasts())
7505                             .getCanonicalType();
7506 
7507       // If there is no length associated with the expression and lower bound is
7508       // not specified too, that means we are using the whole length of the
7509       // base.
7510       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7511           !OAE->getLowerBound())
7512         return CGF.getTypeSize(BaseTy);
7513 
7514       llvm::Value *ElemSize;
7515       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7516         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7517       } else {
7518         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7519         assert(ATy && "Expecting array type if not a pointer type.");
7520         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7521       }
7522 
7523       // If we don't have a length at this point, that is because we have an
7524       // array section with a single element.
7525       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7526         return ElemSize;
7527 
7528       if (const Expr *LenExpr = OAE->getLength()) {
7529         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7530         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7531                                              CGF.getContext().getSizeType(),
7532                                              LenExpr->getExprLoc());
7533         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7534       }
7535       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7536              OAE->getLowerBound() && "expected array_section[lb:].");
7537       // Size = sizetype - lb * elemtype;
7538       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7539       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7540       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7541                                        CGF.getContext().getSizeType(),
7542                                        OAE->getLowerBound()->getExprLoc());
7543       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7544       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7545       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7546       LengthVal = CGF.Builder.CreateSelect(
7547           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7548       return LengthVal;
7549     }
7550     return CGF.getTypeSize(ExprTy);
7551   }
7552 
7553   /// Return the corresponding bits for a given map clause modifier. Add
7554   /// a flag marking the map as a pointer if requested. Add a flag marking the
7555   /// map as the first one of a series of maps that relate to the same map
7556   /// expression.
7557   OpenMPOffloadMappingFlags getMapTypeBits(
7558       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7559       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7560       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7561     OpenMPOffloadMappingFlags Bits =
7562         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7563     switch (MapType) {
7564     case OMPC_MAP_alloc:
7565     case OMPC_MAP_release:
7566       // alloc and release is the default behavior in the runtime library,  i.e.
7567       // if we don't pass any bits alloc/release that is what the runtime is
7568       // going to do. Therefore, we don't need to signal anything for these two
7569       // type modifiers.
7570       break;
7571     case OMPC_MAP_to:
7572       Bits |= OMP_MAP_TO;
7573       break;
7574     case OMPC_MAP_from:
7575       Bits |= OMP_MAP_FROM;
7576       break;
7577     case OMPC_MAP_tofrom:
7578       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7579       break;
7580     case OMPC_MAP_delete:
7581       Bits |= OMP_MAP_DELETE;
7582       break;
7583     case OMPC_MAP_unknown:
7584       llvm_unreachable("Unexpected map type!");
7585     }
7586     if (AddPtrFlag)
7587       Bits |= OMP_MAP_PTR_AND_OBJ;
7588     if (AddIsTargetParamFlag)
7589       Bits |= OMP_MAP_TARGET_PARAM;
7590     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7591       Bits |= OMP_MAP_ALWAYS;
7592     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7593       Bits |= OMP_MAP_CLOSE;
7594     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7595         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7596       Bits |= OMP_MAP_PRESENT;
7597     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7598       Bits |= OMP_MAP_OMPX_HOLD;
7599     if (IsNonContiguous)
7600       Bits |= OMP_MAP_NON_CONTIG;
7601     return Bits;
7602   }
7603 
7604   /// Return true if the provided expression is a final array section. A
7605   /// final array section, is one whose length can't be proved to be one.
7606   bool isFinalArraySectionExpression(const Expr *E) const {
7607     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7608 
7609     // It is not an array section and therefore not a unity-size one.
7610     if (!OASE)
7611       return false;
7612 
7613     // An array section with no colon always refer to a single element.
7614     if (OASE->getColonLocFirst().isInvalid())
7615       return false;
7616 
7617     const Expr *Length = OASE->getLength();
7618 
7619     // If we don't have a length we have to check if the array has size 1
7620     // for this dimension. Also, we should always expect a length if the
7621     // base type is pointer.
7622     if (!Length) {
7623       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7624                              OASE->getBase()->IgnoreParenImpCasts())
7625                              .getCanonicalType();
7626       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7627         return ATy->getSize().getSExtValue() != 1;
7628       // If we don't have a constant dimension length, we have to consider
7629       // the current section as having any size, so it is not necessarily
7630       // unitary. If it happen to be unity size, that's user fault.
7631       return true;
7632     }
7633 
7634     // Check if the length evaluates to 1.
7635     Expr::EvalResult Result;
7636     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7637       return true; // Can have more that size 1.
7638 
7639     llvm::APSInt ConstLength = Result.Val.getInt();
7640     return ConstLength.getSExtValue() != 1;
7641   }
7642 
7643   /// Generate the base pointers, section pointers, sizes, map type bits, and
7644   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7645   /// map type, map or motion modifiers, and expression components.
7646   /// \a IsFirstComponent should be set to true if the provided set of
7647   /// components is the first associated with a capture.
7648   void generateInfoForComponentList(
7649       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7650       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7651       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7652       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7653       bool IsFirstComponentList, bool IsImplicit,
7654       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7655       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7656       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7657           OverlappedElements = llvm::None) const {
7658     // The following summarizes what has to be generated for each map and the
7659     // types below. The generated information is expressed in this order:
7660     // base pointer, section pointer, size, flags
7661     // (to add to the ones that come from the map type and modifier).
7662     //
7663     // double d;
7664     // int i[100];
7665     // float *p;
7666     //
7667     // struct S1 {
7668     //   int i;
7669     //   float f[50];
7670     // }
7671     // struct S2 {
7672     //   int i;
7673     //   float f[50];
7674     //   S1 s;
7675     //   double *p;
7676     //   struct S2 *ps;
7677     //   int &ref;
7678     // }
7679     // S2 s;
7680     // S2 *ps;
7681     //
7682     // map(d)
7683     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7684     //
7685     // map(i)
7686     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7687     //
7688     // map(i[1:23])
7689     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7690     //
7691     // map(p)
7692     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7693     //
7694     // map(p[1:24])
7695     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7696     // in unified shared memory mode or for local pointers
7697     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7698     //
7699     // map(s)
7700     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7701     //
7702     // map(s.i)
7703     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7704     //
7705     // map(s.s.f)
7706     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7707     //
7708     // map(s.p)
7709     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7710     //
7711     // map(to: s.p[:22])
7712     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7713     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7714     // &(s.p), &(s.p[0]), 22*sizeof(double),
7715     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7716     // (*) alloc space for struct members, only this is a target parameter
7717     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7718     //      optimizes this entry out, same in the examples below)
7719     // (***) map the pointee (map: to)
7720     //
7721     // map(to: s.ref)
7722     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7723     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7724     // (*) alloc space for struct members, only this is a target parameter
7725     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7726     //      optimizes this entry out, same in the examples below)
7727     // (***) map the pointee (map: to)
7728     //
7729     // map(s.ps)
7730     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7731     //
7732     // map(from: s.ps->s.i)
7733     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7734     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7735     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7736     //
7737     // map(to: s.ps->ps)
7738     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7739     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7740     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7741     //
7742     // map(s.ps->ps->ps)
7743     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7744     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7745     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7746     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7747     //
7748     // map(to: s.ps->ps->s.f[:22])
7749     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7750     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7751     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7752     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7753     //
7754     // map(ps)
7755     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7756     //
7757     // map(ps->i)
7758     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7759     //
7760     // map(ps->s.f)
7761     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7762     //
7763     // map(from: ps->p)
7764     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7765     //
7766     // map(to: ps->p[:22])
7767     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7768     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7769     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7770     //
7771     // map(ps->ps)
7772     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7773     //
7774     // map(from: ps->ps->s.i)
7775     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7776     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7777     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7778     //
7779     // map(from: ps->ps->ps)
7780     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7781     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7782     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7783     //
7784     // map(ps->ps->ps->ps)
7785     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7786     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7787     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7788     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7789     //
7790     // map(to: ps->ps->ps->s.f[:22])
7791     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7792     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7793     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7794     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7795     //
7796     // map(to: s.f[:22]) map(from: s.p[:33])
7797     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7798     //     sizeof(double*) (**), TARGET_PARAM
7799     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7800     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7801     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7802     // (*) allocate contiguous space needed to fit all mapped members even if
7803     //     we allocate space for members not mapped (in this example,
7804     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7805     //     them as well because they fall between &s.f[0] and &s.p)
7806     //
7807     // map(from: s.f[:22]) map(to: ps->p[:33])
7808     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7809     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7810     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7811     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7812     // (*) the struct this entry pertains to is the 2nd element in the list of
7813     //     arguments, hence MEMBER_OF(2)
7814     //
7815     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7816     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7817     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7818     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7819     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7820     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7821     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7822     // (*) the struct this entry pertains to is the 4th element in the list
7823     //     of arguments, hence MEMBER_OF(4)
7824 
7825     // Track if the map information being generated is the first for a capture.
7826     bool IsCaptureFirstInfo = IsFirstComponentList;
7827     // When the variable is on a declare target link or in a to clause with
7828     // unified memory, a reference is needed to hold the host/device address
7829     // of the variable.
7830     bool RequiresReference = false;
7831 
7832     // Scan the components from the base to the complete expression.
7833     auto CI = Components.rbegin();
7834     auto CE = Components.rend();
7835     auto I = CI;
7836 
7837     // Track if the map information being generated is the first for a list of
7838     // components.
7839     bool IsExpressionFirstInfo = true;
7840     bool FirstPointerInComplexData = false;
7841     Address BP = Address::invalid();
7842     const Expr *AssocExpr = I->getAssociatedExpression();
7843     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7844     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7845     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7846 
7847     if (isa<MemberExpr>(AssocExpr)) {
7848       // The base is the 'this' pointer. The content of the pointer is going
7849       // to be the base of the field being mapped.
7850       BP = CGF.LoadCXXThisAddress();
7851     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7852                (OASE &&
7853                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7854       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7855     } else if (OAShE &&
7856                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7857       BP = Address(
7858           CGF.EmitScalarExpr(OAShE->getBase()),
7859           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7860           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7861     } else {
7862       // The base is the reference to the variable.
7863       // BP = &Var.
7864       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7865       if (const auto *VD =
7866               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7867         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7868                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7869           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7870               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7871                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7872             RequiresReference = true;
7873             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7874           }
7875         }
7876       }
7877 
7878       // If the variable is a pointer and is being dereferenced (i.e. is not
7879       // the last component), the base has to be the pointer itself, not its
7880       // reference. References are ignored for mapping purposes.
7881       QualType Ty =
7882           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7883       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7884         // No need to generate individual map information for the pointer, it
7885         // can be associated with the combined storage if shared memory mode is
7886         // active or the base declaration is not global variable.
7887         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7888         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7889             !VD || VD->hasLocalStorage())
7890           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7891         else
7892           FirstPointerInComplexData = true;
7893         ++I;
7894       }
7895     }
7896 
7897     // Track whether a component of the list should be marked as MEMBER_OF some
7898     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7899     // in a component list should be marked as MEMBER_OF, all subsequent entries
7900     // do not belong to the base struct. E.g.
7901     // struct S2 s;
7902     // s.ps->ps->ps->f[:]
7903     //   (1) (2) (3) (4)
7904     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7905     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7906     // is the pointee of ps(2) which is not member of struct s, so it should not
7907     // be marked as such (it is still PTR_AND_OBJ).
7908     // The variable is initialized to false so that PTR_AND_OBJ entries which
7909     // are not struct members are not considered (e.g. array of pointers to
7910     // data).
7911     bool ShouldBeMemberOf = false;
7912 
7913     // Variable keeping track of whether or not we have encountered a component
7914     // in the component list which is a member expression. Useful when we have a
7915     // pointer or a final array section, in which case it is the previous
7916     // component in the list which tells us whether we have a member expression.
7917     // E.g. X.f[:]
7918     // While processing the final array section "[:]" it is "f" which tells us
7919     // whether we are dealing with a member of a declared struct.
7920     const MemberExpr *EncounteredME = nullptr;
7921 
7922     // Track for the total number of dimension. Start from one for the dummy
7923     // dimension.
7924     uint64_t DimSize = 1;
7925 
7926     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7927     bool IsPrevMemberReference = false;
7928 
7929     for (; I != CE; ++I) {
7930       // If the current component is member of a struct (parent struct) mark it.
7931       if (!EncounteredME) {
7932         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7933         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7934         // as MEMBER_OF the parent struct.
7935         if (EncounteredME) {
7936           ShouldBeMemberOf = true;
7937           // Do not emit as complex pointer if this is actually not array-like
7938           // expression.
7939           if (FirstPointerInComplexData) {
7940             QualType Ty = std::prev(I)
7941                               ->getAssociatedDeclaration()
7942                               ->getType()
7943                               .getNonReferenceType();
7944             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7945             FirstPointerInComplexData = false;
7946           }
7947         }
7948       }
7949 
7950       auto Next = std::next(I);
7951 
7952       // We need to generate the addresses and sizes if this is the last
7953       // component, if the component is a pointer or if it is an array section
7954       // whose length can't be proved to be one. If this is a pointer, it
7955       // becomes the base address for the following components.
7956 
7957       // A final array section, is one whose length can't be proved to be one.
7958       // If the map item is non-contiguous then we don't treat any array section
7959       // as final array section.
7960       bool IsFinalArraySection =
7961           !IsNonContiguous &&
7962           isFinalArraySectionExpression(I->getAssociatedExpression());
7963 
7964       // If we have a declaration for the mapping use that, otherwise use
7965       // the base declaration of the map clause.
7966       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7967                                      ? I->getAssociatedDeclaration()
7968                                      : BaseDecl;
7969       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7970                                                : MapExpr;
7971 
7972       // Get information on whether the element is a pointer. Have to do a
7973       // special treatment for array sections given that they are built-in
7974       // types.
7975       const auto *OASE =
7976           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7977       const auto *OAShE =
7978           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7979       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7980       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7981       bool IsPointer =
7982           OAShE ||
7983           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7984                        .getCanonicalType()
7985                        ->isAnyPointerType()) ||
7986           I->getAssociatedExpression()->getType()->isAnyPointerType();
7987       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7988                                MapDecl &&
7989                                MapDecl->getType()->isLValueReferenceType();
7990       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7991 
7992       if (OASE)
7993         ++DimSize;
7994 
7995       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7996           IsFinalArraySection) {
7997         // If this is not the last component, we expect the pointer to be
7998         // associated with an array expression or member expression.
7999         assert((Next == CE ||
8000                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8001                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8002                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8003                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8004                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8005                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8006                "Unexpected expression");
8007 
8008         Address LB = Address::invalid();
8009         Address LowestElem = Address::invalid();
8010         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8011                                        const MemberExpr *E) {
8012           const Expr *BaseExpr = E->getBase();
8013           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8014           // scalar.
8015           LValue BaseLV;
8016           if (E->isArrow()) {
8017             LValueBaseInfo BaseInfo;
8018             TBAAAccessInfo TBAAInfo;
8019             Address Addr =
8020                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8021             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8022             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8023           } else {
8024             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8025           }
8026           return BaseLV;
8027         };
8028         if (OAShE) {
8029           LowestElem = LB =
8030               Address(CGF.EmitScalarExpr(OAShE->getBase()),
8031                       CGF.ConvertTypeForMem(
8032                           OAShE->getBase()->getType()->getPointeeType()),
8033                       CGF.getContext().getTypeAlignInChars(
8034                           OAShE->getBase()->getType()));
8035         } else if (IsMemberReference) {
8036           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8037           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8038           LowestElem = CGF.EmitLValueForFieldInitialization(
8039                               BaseLVal, cast<FieldDecl>(MapDecl))
8040                            .getAddress(CGF);
8041           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8042                    .getAddress(CGF);
8043         } else {
8044           LowestElem = LB =
8045               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8046                   .getAddress(CGF);
8047         }
8048 
8049         // If this component is a pointer inside the base struct then we don't
8050         // need to create any entry for it - it will be combined with the object
8051         // it is pointing to into a single PTR_AND_OBJ entry.
8052         bool IsMemberPointerOrAddr =
8053             EncounteredME &&
8054             (((IsPointer || ForDeviceAddr) &&
8055               I->getAssociatedExpression() == EncounteredME) ||
8056              (IsPrevMemberReference && !IsPointer) ||
8057              (IsMemberReference && Next != CE &&
8058               !Next->getAssociatedExpression()->getType()->isPointerType()));
8059         if (!OverlappedElements.empty() && Next == CE) {
8060           // Handle base element with the info for overlapped elements.
8061           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8062           assert(!IsPointer &&
8063                  "Unexpected base element with the pointer type.");
8064           // Mark the whole struct as the struct that requires allocation on the
8065           // device.
8066           PartialStruct.LowestElem = {0, LowestElem};
8067           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8068               I->getAssociatedExpression()->getType());
8069           Address HB = CGF.Builder.CreateConstGEP(
8070               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8071                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8072               TypeSize.getQuantity() - 1);
8073           PartialStruct.HighestElem = {
8074               std::numeric_limits<decltype(
8075                   PartialStruct.HighestElem.first)>::max(),
8076               HB};
8077           PartialStruct.Base = BP;
8078           PartialStruct.LB = LB;
8079           assert(
8080               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8081               "Overlapped elements must be used only once for the variable.");
8082           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8083           // Emit data for non-overlapped data.
8084           OpenMPOffloadMappingFlags Flags =
8085               OMP_MAP_MEMBER_OF |
8086               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8087                              /*AddPtrFlag=*/false,
8088                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8089           llvm::Value *Size = nullptr;
8090           // Do bitcopy of all non-overlapped structure elements.
8091           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8092                    Component : OverlappedElements) {
8093             Address ComponentLB = Address::invalid();
8094             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8095                  Component) {
8096               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8097                 const auto *FD = dyn_cast<FieldDecl>(VD);
8098                 if (FD && FD->getType()->isLValueReferenceType()) {
8099                   const auto *ME =
8100                       cast<MemberExpr>(MC.getAssociatedExpression());
8101                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8102                   ComponentLB =
8103                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8104                           .getAddress(CGF);
8105                 } else {
8106                   ComponentLB =
8107                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8108                           .getAddress(CGF);
8109                 }
8110                 Size = CGF.Builder.CreatePtrDiff(
8111                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8112                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8113                 break;
8114               }
8115             }
8116             assert(Size && "Failed to determine structure size");
8117             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8118             CombinedInfo.BasePointers.push_back(BP.getPointer());
8119             CombinedInfo.Pointers.push_back(LB.getPointer());
8120             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8121                 Size, CGF.Int64Ty, /*isSigned=*/true));
8122             CombinedInfo.Types.push_back(Flags);
8123             CombinedInfo.Mappers.push_back(nullptr);
8124             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8125                                                                       : 1);
8126             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8127           }
8128           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8129           CombinedInfo.BasePointers.push_back(BP.getPointer());
8130           CombinedInfo.Pointers.push_back(LB.getPointer());
8131           Size = CGF.Builder.CreatePtrDiff(
8132               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8133               CGF.EmitCastToVoidPtr(LB.getPointer()));
8134           CombinedInfo.Sizes.push_back(
8135               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8136           CombinedInfo.Types.push_back(Flags);
8137           CombinedInfo.Mappers.push_back(nullptr);
8138           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8139                                                                     : 1);
8140           break;
8141         }
8142         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8143         if (!IsMemberPointerOrAddr ||
8144             (Next == CE && MapType != OMPC_MAP_unknown)) {
8145           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8146           CombinedInfo.BasePointers.push_back(BP.getPointer());
8147           CombinedInfo.Pointers.push_back(LB.getPointer());
8148           CombinedInfo.Sizes.push_back(
8149               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8150           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8151                                                                     : 1);
8152 
8153           // If Mapper is valid, the last component inherits the mapper.
8154           bool HasMapper = Mapper && Next == CE;
8155           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8156 
8157           // We need to add a pointer flag for each map that comes from the
8158           // same expression except for the first one. We also need to signal
8159           // this map is the first one that relates with the current capture
8160           // (there is a set of entries for each capture).
8161           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8162               MapType, MapModifiers, MotionModifiers, IsImplicit,
8163               !IsExpressionFirstInfo || RequiresReference ||
8164                   FirstPointerInComplexData || IsMemberReference,
8165               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8166 
8167           if (!IsExpressionFirstInfo || IsMemberReference) {
8168             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8169             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8170             if (IsPointer || (IsMemberReference && Next != CE))
8171               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8172                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8173 
8174             if (ShouldBeMemberOf) {
8175               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8176               // should be later updated with the correct value of MEMBER_OF.
8177               Flags |= OMP_MAP_MEMBER_OF;
8178               // From now on, all subsequent PTR_AND_OBJ entries should not be
8179               // marked as MEMBER_OF.
8180               ShouldBeMemberOf = false;
8181             }
8182           }
8183 
8184           CombinedInfo.Types.push_back(Flags);
8185         }
8186 
8187         // If we have encountered a member expression so far, keep track of the
8188         // mapped member. If the parent is "*this", then the value declaration
8189         // is nullptr.
8190         if (EncounteredME) {
8191           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8192           unsigned FieldIndex = FD->getFieldIndex();
8193 
8194           // Update info about the lowest and highest elements for this struct
8195           if (!PartialStruct.Base.isValid()) {
8196             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8197             if (IsFinalArraySection) {
8198               Address HB =
8199                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8200                       .getAddress(CGF);
8201               PartialStruct.HighestElem = {FieldIndex, HB};
8202             } else {
8203               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8204             }
8205             PartialStruct.Base = BP;
8206             PartialStruct.LB = BP;
8207           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8208             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8209           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8210             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8211           }
8212         }
8213 
8214         // Need to emit combined struct for array sections.
8215         if (IsFinalArraySection || IsNonContiguous)
8216           PartialStruct.IsArraySection = true;
8217 
8218         // If we have a final array section, we are done with this expression.
8219         if (IsFinalArraySection)
8220           break;
8221 
8222         // The pointer becomes the base for the next element.
8223         if (Next != CE)
8224           BP = IsMemberReference ? LowestElem : LB;
8225 
8226         IsExpressionFirstInfo = false;
8227         IsCaptureFirstInfo = false;
8228         FirstPointerInComplexData = false;
8229         IsPrevMemberReference = IsMemberReference;
8230       } else if (FirstPointerInComplexData) {
8231         QualType Ty = Components.rbegin()
8232                           ->getAssociatedDeclaration()
8233                           ->getType()
8234                           .getNonReferenceType();
8235         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8236         FirstPointerInComplexData = false;
8237       }
8238     }
8239     // If ran into the whole component - allocate the space for the whole
8240     // record.
8241     if (!EncounteredME)
8242       PartialStruct.HasCompleteRecord = true;
8243 
8244     if (!IsNonContiguous)
8245       return;
8246 
8247     const ASTContext &Context = CGF.getContext();
8248 
8249     // For supporting stride in array section, we need to initialize the first
8250     // dimension size as 1, first offset as 0, and first count as 1
8251     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8252     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8253     MapValuesArrayTy CurStrides;
8254     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8255     uint64_t ElementTypeSize;
8256 
8257     // Collect Size information for each dimension and get the element size as
8258     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8259     // should be [10, 10] and the first stride is 4 btyes.
8260     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8261          Components) {
8262       const Expr *AssocExpr = Component.getAssociatedExpression();
8263       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8264 
8265       if (!OASE)
8266         continue;
8267 
8268       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8269       auto *CAT = Context.getAsConstantArrayType(Ty);
8270       auto *VAT = Context.getAsVariableArrayType(Ty);
8271 
8272       // We need all the dimension size except for the last dimension.
8273       assert((VAT || CAT || &Component == &*Components.begin()) &&
8274              "Should be either ConstantArray or VariableArray if not the "
8275              "first Component");
8276 
8277       // Get element size if CurStrides is empty.
8278       if (CurStrides.empty()) {
8279         const Type *ElementType = nullptr;
8280         if (CAT)
8281           ElementType = CAT->getElementType().getTypePtr();
8282         else if (VAT)
8283           ElementType = VAT->getElementType().getTypePtr();
8284         else
8285           assert(&Component == &*Components.begin() &&
8286                  "Only expect pointer (non CAT or VAT) when this is the "
8287                  "first Component");
8288         // If ElementType is null, then it means the base is a pointer
8289         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8290         // for next iteration.
8291         if (ElementType) {
8292           // For the case that having pointer as base, we need to remove one
8293           // level of indirection.
8294           if (&Component != &*Components.begin())
8295             ElementType = ElementType->getPointeeOrArrayElementType();
8296           ElementTypeSize =
8297               Context.getTypeSizeInChars(ElementType).getQuantity();
8298           CurStrides.push_back(
8299               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8300         }
8301       }
8302       // Get dimension value except for the last dimension since we don't need
8303       // it.
8304       if (DimSizes.size() < Components.size() - 1) {
8305         if (CAT)
8306           DimSizes.push_back(llvm::ConstantInt::get(
8307               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8308         else if (VAT)
8309           DimSizes.push_back(CGF.Builder.CreateIntCast(
8310               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8311               /*IsSigned=*/false));
8312       }
8313     }
8314 
8315     // Skip the dummy dimension since we have already have its information.
8316     auto *DI = DimSizes.begin() + 1;
8317     // Product of dimension.
8318     llvm::Value *DimProd =
8319         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8320 
8321     // Collect info for non-contiguous. Notice that offset, count, and stride
8322     // are only meaningful for array-section, so we insert a null for anything
8323     // other than array-section.
8324     // Also, the size of offset, count, and stride are not the same as
8325     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8326     // count, and stride are the same as the number of non-contiguous
8327     // declaration in target update to/from clause.
8328     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8329          Components) {
8330       const Expr *AssocExpr = Component.getAssociatedExpression();
8331 
8332       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8333         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8334             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8335             /*isSigned=*/false);
8336         CurOffsets.push_back(Offset);
8337         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8338         CurStrides.push_back(CurStrides.back());
8339         continue;
8340       }
8341 
8342       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8343 
8344       if (!OASE)
8345         continue;
8346 
8347       // Offset
8348       const Expr *OffsetExpr = OASE->getLowerBound();
8349       llvm::Value *Offset = nullptr;
8350       if (!OffsetExpr) {
8351         // If offset is absent, then we just set it to zero.
8352         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8353       } else {
8354         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8355                                            CGF.Int64Ty,
8356                                            /*isSigned=*/false);
8357       }
8358       CurOffsets.push_back(Offset);
8359 
8360       // Count
8361       const Expr *CountExpr = OASE->getLength();
8362       llvm::Value *Count = nullptr;
8363       if (!CountExpr) {
8364         // In Clang, once a high dimension is an array section, we construct all
8365         // the lower dimension as array section, however, for case like
8366         // arr[0:2][2], Clang construct the inner dimension as an array section
8367         // but it actually is not in an array section form according to spec.
8368         if (!OASE->getColonLocFirst().isValid() &&
8369             !OASE->getColonLocSecond().isValid()) {
8370           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8371         } else {
8372           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8373           // When the length is absent it defaults to ⌈(size −
8374           // lower-bound)/stride⌉, where size is the size of the array
8375           // dimension.
8376           const Expr *StrideExpr = OASE->getStride();
8377           llvm::Value *Stride =
8378               StrideExpr
8379                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8380                                               CGF.Int64Ty, /*isSigned=*/false)
8381                   : nullptr;
8382           if (Stride)
8383             Count = CGF.Builder.CreateUDiv(
8384                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8385           else
8386             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8387         }
8388       } else {
8389         Count = CGF.EmitScalarExpr(CountExpr);
8390       }
8391       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8392       CurCounts.push_back(Count);
8393 
8394       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8395       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8396       //              Offset      Count     Stride
8397       //    D0          0           1         4    (int)    <- dummy dimension
8398       //    D1          0           2         8    (2 * (1) * 4)
8399       //    D2          1           2         20   (1 * (1 * 5) * 4)
8400       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8401       const Expr *StrideExpr = OASE->getStride();
8402       llvm::Value *Stride =
8403           StrideExpr
8404               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8405                                           CGF.Int64Ty, /*isSigned=*/false)
8406               : nullptr;
8407       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8408       if (Stride)
8409         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8410       else
8411         CurStrides.push_back(DimProd);
8412       if (DI != DimSizes.end())
8413         ++DI;
8414     }
8415 
8416     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8417     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8418     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8419   }
8420 
8421   /// Return the adjusted map modifiers if the declaration a capture refers to
8422   /// appears in a first-private clause. This is expected to be used only with
8423   /// directives that start with 'target'.
8424   MappableExprsHandler::OpenMPOffloadMappingFlags
8425   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8426     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8427 
8428     // A first private variable captured by reference will use only the
8429     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8430     // declaration is known as first-private in this handler.
8431     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8432       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8433         return MappableExprsHandler::OMP_MAP_TO |
8434                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8435       return MappableExprsHandler::OMP_MAP_PRIVATE |
8436              MappableExprsHandler::OMP_MAP_TO;
8437     }
8438     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8439     if (I != LambdasMap.end())
8440       // for map(to: lambda): using user specified map type.
8441       return getMapTypeBits(
8442           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8443           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8444           /*AddPtrFlag=*/false,
8445           /*AddIsTargetParamFlag=*/false,
8446           /*isNonContiguous=*/false);
8447     return MappableExprsHandler::OMP_MAP_TO |
8448            MappableExprsHandler::OMP_MAP_FROM;
8449   }
8450 
8451   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8452     // Rotate by getFlagMemberOffset() bits.
8453     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8454                                                   << getFlagMemberOffset());
8455   }
8456 
8457   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8458                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8459     // If the entry is PTR_AND_OBJ but has not been marked with the special
8460     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8461     // marked as MEMBER_OF.
8462     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8463         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8464       return;
8465 
8466     // Reset the placeholder value to prepare the flag for the assignment of the
8467     // proper MEMBER_OF value.
8468     Flags &= ~OMP_MAP_MEMBER_OF;
8469     Flags |= MemberOfFlag;
8470   }
8471 
8472   void getPlainLayout(const CXXRecordDecl *RD,
8473                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8474                       bool AsBase) const {
8475     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8476 
8477     llvm::StructType *St =
8478         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8479 
8480     unsigned NumElements = St->getNumElements();
8481     llvm::SmallVector<
8482         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8483         RecordLayout(NumElements);
8484 
8485     // Fill bases.
8486     for (const auto &I : RD->bases()) {
8487       if (I.isVirtual())
8488         continue;
8489       const auto *Base = I.getType()->getAsCXXRecordDecl();
8490       // Ignore empty bases.
8491       if (Base->isEmpty() || CGF.getContext()
8492                                  .getASTRecordLayout(Base)
8493                                  .getNonVirtualSize()
8494                                  .isZero())
8495         continue;
8496 
8497       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8498       RecordLayout[FieldIndex] = Base;
8499     }
8500     // Fill in virtual bases.
8501     for (const auto &I : RD->vbases()) {
8502       const auto *Base = I.getType()->getAsCXXRecordDecl();
8503       // Ignore empty bases.
8504       if (Base->isEmpty())
8505         continue;
8506       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8507       if (RecordLayout[FieldIndex])
8508         continue;
8509       RecordLayout[FieldIndex] = Base;
8510     }
8511     // Fill in all the fields.
8512     assert(!RD->isUnion() && "Unexpected union.");
8513     for (const auto *Field : RD->fields()) {
8514       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8515       // will fill in later.)
8516       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8517         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8518         RecordLayout[FieldIndex] = Field;
8519       }
8520     }
8521     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8522              &Data : RecordLayout) {
8523       if (Data.isNull())
8524         continue;
8525       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8526         getPlainLayout(Base, Layout, /*AsBase=*/true);
8527       else
8528         Layout.push_back(Data.get<const FieldDecl *>());
8529     }
8530   }
8531 
8532   /// Generate all the base pointers, section pointers, sizes, map types, and
8533   /// mappers for the extracted mappable expressions (all included in \a
8534   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8535   /// pair of the relevant declaration and index where it occurs is appended to
8536   /// the device pointers info array.
8537   void generateAllInfoForClauses(
8538       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8539       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8540           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8541     // We have to process the component lists that relate with the same
8542     // declaration in a single chunk so that we can generate the map flags
8543     // correctly. Therefore, we organize all lists in a map.
8544     enum MapKind { Present, Allocs, Other, Total };
8545     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8546                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8547         Info;
8548 
8549     // Helper function to fill the information map for the different supported
8550     // clauses.
8551     auto &&InfoGen =
8552         [&Info, &SkipVarSet](
8553             const ValueDecl *D, MapKind Kind,
8554             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8555             OpenMPMapClauseKind MapType,
8556             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8557             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8558             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8559             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8560           if (SkipVarSet.contains(D))
8561             return;
8562           auto It = Info.find(D);
8563           if (It == Info.end())
8564             It = Info
8565                      .insert(std::make_pair(
8566                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8567                      .first;
8568           It->second[Kind].emplace_back(
8569               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8570               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8571         };
8572 
8573     for (const auto *Cl : Clauses) {
8574       const auto *C = dyn_cast<OMPMapClause>(Cl);
8575       if (!C)
8576         continue;
8577       MapKind Kind = Other;
8578       if (llvm::is_contained(C->getMapTypeModifiers(),
8579                              OMPC_MAP_MODIFIER_present))
8580         Kind = Present;
8581       else if (C->getMapType() == OMPC_MAP_alloc)
8582         Kind = Allocs;
8583       const auto *EI = C->getVarRefs().begin();
8584       for (const auto L : C->component_lists()) {
8585         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8586         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8587                 C->getMapTypeModifiers(), llvm::None,
8588                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8589                 E);
8590         ++EI;
8591       }
8592     }
8593     for (const auto *Cl : Clauses) {
8594       const auto *C = dyn_cast<OMPToClause>(Cl);
8595       if (!C)
8596         continue;
8597       MapKind Kind = Other;
8598       if (llvm::is_contained(C->getMotionModifiers(),
8599                              OMPC_MOTION_MODIFIER_present))
8600         Kind = Present;
8601       const auto *EI = C->getVarRefs().begin();
8602       for (const auto L : C->component_lists()) {
8603         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8604                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8605                 C->isImplicit(), std::get<2>(L), *EI);
8606         ++EI;
8607       }
8608     }
8609     for (const auto *Cl : Clauses) {
8610       const auto *C = dyn_cast<OMPFromClause>(Cl);
8611       if (!C)
8612         continue;
8613       MapKind Kind = Other;
8614       if (llvm::is_contained(C->getMotionModifiers(),
8615                              OMPC_MOTION_MODIFIER_present))
8616         Kind = Present;
8617       const auto *EI = C->getVarRefs().begin();
8618       for (const auto L : C->component_lists()) {
8619         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8620                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8621                 C->isImplicit(), std::get<2>(L), *EI);
8622         ++EI;
8623       }
8624     }
8625 
8626     // Look at the use_device_ptr clause information and mark the existing map
8627     // entries as such. If there is no map information for an entry in the
8628     // use_device_ptr list, we create one with map type 'alloc' and zero size
8629     // section. It is the user fault if that was not mapped before. If there is
8630     // no map information and the pointer is a struct member, then we defer the
8631     // emission of that entry until the whole struct has been processed.
8632     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8633                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8634         DeferredInfo;
8635     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8636 
8637     for (const auto *Cl : Clauses) {
8638       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8639       if (!C)
8640         continue;
8641       for (const auto L : C->component_lists()) {
8642         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8643             std::get<1>(L);
8644         assert(!Components.empty() &&
8645                "Not expecting empty list of components!");
8646         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8647         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8648         const Expr *IE = Components.back().getAssociatedExpression();
8649         // If the first component is a member expression, we have to look into
8650         // 'this', which maps to null in the map of map information. Otherwise
8651         // look directly for the information.
8652         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8653 
8654         // We potentially have map information for this declaration already.
8655         // Look for the first set of components that refer to it.
8656         if (It != Info.end()) {
8657           bool Found = false;
8658           for (auto &Data : It->second) {
8659             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8660               return MI.Components.back().getAssociatedDeclaration() == VD;
8661             });
8662             // If we found a map entry, signal that the pointer has to be
8663             // returned and move on to the next declaration. Exclude cases where
8664             // the base pointer is mapped as array subscript, array section or
8665             // array shaping. The base address is passed as a pointer to base in
8666             // this case and cannot be used as a base for use_device_ptr list
8667             // item.
8668             if (CI != Data.end()) {
8669               auto PrevCI = std::next(CI->Components.rbegin());
8670               const auto *VarD = dyn_cast<VarDecl>(VD);
8671               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8672                   isa<MemberExpr>(IE) ||
8673                   !VD->getType().getNonReferenceType()->isPointerType() ||
8674                   PrevCI == CI->Components.rend() ||
8675                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8676                   VarD->hasLocalStorage()) {
8677                 CI->ReturnDevicePointer = true;
8678                 Found = true;
8679                 break;
8680               }
8681             }
8682           }
8683           if (Found)
8684             continue;
8685         }
8686 
8687         // We didn't find any match in our map information - generate a zero
8688         // size array section - if the pointer is a struct member we defer this
8689         // action until the whole struct has been processed.
8690         if (isa<MemberExpr>(IE)) {
8691           // Insert the pointer into Info to be processed by
8692           // generateInfoForComponentList. Because it is a member pointer
8693           // without a pointee, no entry will be generated for it, therefore
8694           // we need to generate one after the whole struct has been processed.
8695           // Nonetheless, generateInfoForComponentList must be called to take
8696           // the pointer into account for the calculation of the range of the
8697           // partial struct.
8698           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8699                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8700                   nullptr);
8701           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8702         } else {
8703           llvm::Value *Ptr =
8704               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8705           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8706           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8707           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8708           UseDevicePtrCombinedInfo.Sizes.push_back(
8709               llvm::Constant::getNullValue(CGF.Int64Ty));
8710           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8711           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8712         }
8713       }
8714     }
8715 
8716     // Look at the use_device_addr clause information and mark the existing map
8717     // entries as such. If there is no map information for an entry in the
8718     // use_device_addr list, we create one with map type 'alloc' and zero size
8719     // section. It is the user fault if that was not mapped before. If there is
8720     // no map information and the pointer is a struct member, then we defer the
8721     // emission of that entry until the whole struct has been processed.
8722     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8723     for (const auto *Cl : Clauses) {
8724       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8725       if (!C)
8726         continue;
8727       for (const auto L : C->component_lists()) {
8728         assert(!std::get<1>(L).empty() &&
8729                "Not expecting empty list of components!");
8730         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8731         if (!Processed.insert(VD).second)
8732           continue;
8733         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8734         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8735         // If the first component is a member expression, we have to look into
8736         // 'this', which maps to null in the map of map information. Otherwise
8737         // look directly for the information.
8738         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8739 
8740         // We potentially have map information for this declaration already.
8741         // Look for the first set of components that refer to it.
8742         if (It != Info.end()) {
8743           bool Found = false;
8744           for (auto &Data : It->second) {
8745             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8746               return MI.Components.back().getAssociatedDeclaration() == VD;
8747             });
8748             // If we found a map entry, signal that the pointer has to be
8749             // returned and move on to the next declaration.
8750             if (CI != Data.end()) {
8751               CI->ReturnDevicePointer = true;
8752               Found = true;
8753               break;
8754             }
8755           }
8756           if (Found)
8757             continue;
8758         }
8759 
8760         // We didn't find any match in our map information - generate a zero
8761         // size array section - if the pointer is a struct member we defer this
8762         // action until the whole struct has been processed.
8763         if (isa<MemberExpr>(IE)) {
8764           // Insert the pointer into Info to be processed by
8765           // generateInfoForComponentList. Because it is a member pointer
8766           // without a pointee, no entry will be generated for it, therefore
8767           // we need to generate one after the whole struct has been processed.
8768           // Nonetheless, generateInfoForComponentList must be called to take
8769           // the pointer into account for the calculation of the range of the
8770           // partial struct.
8771           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8772                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8773                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8774           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8775         } else {
8776           llvm::Value *Ptr;
8777           if (IE->isGLValue())
8778             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8779           else
8780             Ptr = CGF.EmitScalarExpr(IE);
8781           CombinedInfo.Exprs.push_back(VD);
8782           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8783           CombinedInfo.Pointers.push_back(Ptr);
8784           CombinedInfo.Sizes.push_back(
8785               llvm::Constant::getNullValue(CGF.Int64Ty));
8786           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8787           CombinedInfo.Mappers.push_back(nullptr);
8788         }
8789       }
8790     }
8791 
8792     for (const auto &Data : Info) {
8793       StructRangeInfoTy PartialStruct;
8794       // Temporary generated information.
8795       MapCombinedInfoTy CurInfo;
8796       const Decl *D = Data.first;
8797       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8798       for (const auto &M : Data.second) {
8799         for (const MapInfo &L : M) {
8800           assert(!L.Components.empty() &&
8801                  "Not expecting declaration with no component lists.");
8802 
8803           // Remember the current base pointer index.
8804           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8805           CurInfo.NonContigInfo.IsNonContiguous =
8806               L.Components.back().isNonContiguous();
8807           generateInfoForComponentList(
8808               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8809               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8810               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8811 
8812           // If this entry relates with a device pointer, set the relevant
8813           // declaration and add the 'return pointer' flag.
8814           if (L.ReturnDevicePointer) {
8815             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8816                    "Unexpected number of mapped base pointers.");
8817 
8818             const ValueDecl *RelevantVD =
8819                 L.Components.back().getAssociatedDeclaration();
8820             assert(RelevantVD &&
8821                    "No relevant declaration related with device pointer??");
8822 
8823             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8824                 RelevantVD);
8825             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8826           }
8827         }
8828       }
8829 
8830       // Append any pending zero-length pointers which are struct members and
8831       // used with use_device_ptr or use_device_addr.
8832       auto CI = DeferredInfo.find(Data.first);
8833       if (CI != DeferredInfo.end()) {
8834         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8835           llvm::Value *BasePtr;
8836           llvm::Value *Ptr;
8837           if (L.ForDeviceAddr) {
8838             if (L.IE->isGLValue())
8839               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8840             else
8841               Ptr = this->CGF.EmitScalarExpr(L.IE);
8842             BasePtr = Ptr;
8843             // Entry is RETURN_PARAM. Also, set the placeholder value
8844             // MEMBER_OF=FFFF so that the entry is later updated with the
8845             // correct value of MEMBER_OF.
8846             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8847           } else {
8848             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8849             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8850                                              L.IE->getExprLoc());
8851             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8852             // placeholder value MEMBER_OF=FFFF so that the entry is later
8853             // updated with the correct value of MEMBER_OF.
8854             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8855                                     OMP_MAP_MEMBER_OF);
8856           }
8857           CurInfo.Exprs.push_back(L.VD);
8858           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8859           CurInfo.Pointers.push_back(Ptr);
8860           CurInfo.Sizes.push_back(
8861               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8862           CurInfo.Mappers.push_back(nullptr);
8863         }
8864       }
8865       // If there is an entry in PartialStruct it means we have a struct with
8866       // individual members mapped. Emit an extra combined entry.
8867       if (PartialStruct.Base.isValid()) {
8868         CurInfo.NonContigInfo.Dims.push_back(0);
8869         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8870       }
8871 
8872       // We need to append the results of this capture to what we already
8873       // have.
8874       CombinedInfo.append(CurInfo);
8875     }
8876     // Append data for use_device_ptr clauses.
8877     CombinedInfo.append(UseDevicePtrCombinedInfo);
8878   }
8879 
8880 public:
8881   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8882       : CurDir(&Dir), CGF(CGF) {
8883     // Extract firstprivate clause information.
8884     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8885       for (const auto *D : C->varlists())
8886         FirstPrivateDecls.try_emplace(
8887             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8888     // Extract implicit firstprivates from uses_allocators clauses.
8889     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8890       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8891         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8892         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8893           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8894                                         /*Implicit=*/true);
8895         else if (const auto *VD = dyn_cast<VarDecl>(
8896                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8897                          ->getDecl()))
8898           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8899       }
8900     }
8901     // Extract device pointer clause information.
8902     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8903       for (auto L : C->component_lists())
8904         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8905     // Extract map information.
8906     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8907       if (C->getMapType() != OMPC_MAP_to)
8908         continue;
8909       for (auto L : C->component_lists()) {
8910         const ValueDecl *VD = std::get<0>(L);
8911         const auto *RD = VD ? VD->getType()
8912                                   .getCanonicalType()
8913                                   .getNonReferenceType()
8914                                   ->getAsCXXRecordDecl()
8915                             : nullptr;
8916         if (RD && RD->isLambda())
8917           LambdasMap.try_emplace(std::get<0>(L), C);
8918       }
8919     }
8920   }
8921 
8922   /// Constructor for the declare mapper directive.
8923   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8924       : CurDir(&Dir), CGF(CGF) {}
8925 
8926   /// Generate code for the combined entry if we have a partially mapped struct
8927   /// and take care of the mapping flags of the arguments corresponding to
8928   /// individual struct members.
8929   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8930                          MapFlagsArrayTy &CurTypes,
8931                          const StructRangeInfoTy &PartialStruct,
8932                          const ValueDecl *VD = nullptr,
8933                          bool NotTargetParams = true) const {
8934     if (CurTypes.size() == 1 &&
8935         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8936         !PartialStruct.IsArraySection)
8937       return;
8938     Address LBAddr = PartialStruct.LowestElem.second;
8939     Address HBAddr = PartialStruct.HighestElem.second;
8940     if (PartialStruct.HasCompleteRecord) {
8941       LBAddr = PartialStruct.LB;
8942       HBAddr = PartialStruct.LB;
8943     }
8944     CombinedInfo.Exprs.push_back(VD);
8945     // Base is the base of the struct
8946     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8947     // Pointer is the address of the lowest element
8948     llvm::Value *LB = LBAddr.getPointer();
8949     CombinedInfo.Pointers.push_back(LB);
8950     // There should not be a mapper for a combined entry.
8951     CombinedInfo.Mappers.push_back(nullptr);
8952     // Size is (addr of {highest+1} element) - (addr of lowest element)
8953     llvm::Value *HB = HBAddr.getPointer();
8954     llvm::Value *HAddr =
8955         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8956     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8957     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8958     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8959     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8960                                                   /*isSigned=*/false);
8961     CombinedInfo.Sizes.push_back(Size);
8962     // Map type is always TARGET_PARAM, if generate info for captures.
8963     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8964                                                  : OMP_MAP_TARGET_PARAM);
8965     // If any element has the present modifier, then make sure the runtime
8966     // doesn't attempt to allocate the struct.
8967     if (CurTypes.end() !=
8968         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8969           return Type & OMP_MAP_PRESENT;
8970         }))
8971       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8972     // Remove TARGET_PARAM flag from the first element
8973     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8974     // If any element has the ompx_hold modifier, then make sure the runtime
8975     // uses the hold reference count for the struct as a whole so that it won't
8976     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8977     // elements as well so the runtime knows which reference count to check
8978     // when determining whether it's time for device-to-host transfers of
8979     // individual elements.
8980     if (CurTypes.end() !=
8981         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8982           return Type & OMP_MAP_OMPX_HOLD;
8983         })) {
8984       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8985       for (auto &M : CurTypes)
8986         M |= OMP_MAP_OMPX_HOLD;
8987     }
8988 
8989     // All other current entries will be MEMBER_OF the combined entry
8990     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8991     // 0xFFFF in the MEMBER_OF field).
8992     OpenMPOffloadMappingFlags MemberOfFlag =
8993         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8994     for (auto &M : CurTypes)
8995       setCorrectMemberOfFlag(M, MemberOfFlag);
8996   }
8997 
8998   /// Generate all the base pointers, section pointers, sizes, map types, and
8999   /// mappers for the extracted mappable expressions (all included in \a
9000   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9001   /// pair of the relevant declaration and index where it occurs is appended to
9002   /// the device pointers info array.
9003   void generateAllInfo(
9004       MapCombinedInfoTy &CombinedInfo,
9005       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9006           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9007     assert(CurDir.is<const OMPExecutableDirective *>() &&
9008            "Expect a executable directive");
9009     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9010     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9011   }
9012 
9013   /// Generate all the base pointers, section pointers, sizes, map types, and
9014   /// mappers for the extracted map clauses of user-defined mapper (all included
9015   /// in \a CombinedInfo).
9016   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9017     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9018            "Expect a declare mapper directive");
9019     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9020     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9021   }
9022 
9023   /// Emit capture info for lambdas for variables captured by reference.
9024   void generateInfoForLambdaCaptures(
9025       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9026       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9027     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9028     const auto *RD = VDType->getAsCXXRecordDecl();
9029     if (!RD || !RD->isLambda())
9030       return;
9031     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9032                    CGF.getContext().getDeclAlign(VD));
9033     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9034     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9035     FieldDecl *ThisCapture = nullptr;
9036     RD->getCaptureFields(Captures, ThisCapture);
9037     if (ThisCapture) {
9038       LValue ThisLVal =
9039           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9040       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9041       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9042                                  VDLVal.getPointer(CGF));
9043       CombinedInfo.Exprs.push_back(VD);
9044       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9045       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9046       CombinedInfo.Sizes.push_back(
9047           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9048                                     CGF.Int64Ty, /*isSigned=*/true));
9049       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9050                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9051       CombinedInfo.Mappers.push_back(nullptr);
9052     }
9053     for (const LambdaCapture &LC : RD->captures()) {
9054       if (!LC.capturesVariable())
9055         continue;
9056       const VarDecl *VD = LC.getCapturedVar();
9057       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9058         continue;
9059       auto It = Captures.find(VD);
9060       assert(It != Captures.end() && "Found lambda capture without field.");
9061       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9062       if (LC.getCaptureKind() == LCK_ByRef) {
9063         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9064         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9065                                    VDLVal.getPointer(CGF));
9066         CombinedInfo.Exprs.push_back(VD);
9067         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9068         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9069         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9070             CGF.getTypeSize(
9071                 VD->getType().getCanonicalType().getNonReferenceType()),
9072             CGF.Int64Ty, /*isSigned=*/true));
9073       } else {
9074         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9075         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9076                                    VDLVal.getPointer(CGF));
9077         CombinedInfo.Exprs.push_back(VD);
9078         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9079         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9080         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9081       }
9082       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9083                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9084       CombinedInfo.Mappers.push_back(nullptr);
9085     }
9086   }
9087 
9088   /// Set correct indices for lambdas captures.
9089   void adjustMemberOfForLambdaCaptures(
9090       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9091       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9092       MapFlagsArrayTy &Types) const {
9093     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9094       // Set correct member_of idx for all implicit lambda captures.
9095       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9096                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9097         continue;
9098       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9099       assert(BasePtr && "Unable to find base lambda address.");
9100       int TgtIdx = -1;
9101       for (unsigned J = I; J > 0; --J) {
9102         unsigned Idx = J - 1;
9103         if (Pointers[Idx] != BasePtr)
9104           continue;
9105         TgtIdx = Idx;
9106         break;
9107       }
9108       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9109       // All other current entries will be MEMBER_OF the combined entry
9110       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9111       // 0xFFFF in the MEMBER_OF field).
9112       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9113       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9114     }
9115   }
9116 
9117   /// Generate the base pointers, section pointers, sizes, map types, and
9118   /// mappers associated to a given capture (all included in \a CombinedInfo).
9119   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9120                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9121                               StructRangeInfoTy &PartialStruct) const {
9122     assert(!Cap->capturesVariableArrayType() &&
9123            "Not expecting to generate map info for a variable array type!");
9124 
9125     // We need to know when we generating information for the first component
9126     const ValueDecl *VD = Cap->capturesThis()
9127                               ? nullptr
9128                               : Cap->getCapturedVar()->getCanonicalDecl();
9129 
9130     // for map(to: lambda): skip here, processing it in
9131     // generateDefaultMapInfo
9132     if (LambdasMap.count(VD))
9133       return;
9134 
9135     // If this declaration appears in a is_device_ptr clause we just have to
9136     // pass the pointer by value. If it is a reference to a declaration, we just
9137     // pass its value.
9138     if (DevPointersMap.count(VD)) {
9139       CombinedInfo.Exprs.push_back(VD);
9140       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9141       CombinedInfo.Pointers.push_back(Arg);
9142       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9143           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9144           /*isSigned=*/true));
9145       CombinedInfo.Types.push_back(
9146           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9147           OMP_MAP_TARGET_PARAM);
9148       CombinedInfo.Mappers.push_back(nullptr);
9149       return;
9150     }
9151 
9152     using MapData =
9153         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9154                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9155                    const ValueDecl *, const Expr *>;
9156     SmallVector<MapData, 4> DeclComponentLists;
9157     assert(CurDir.is<const OMPExecutableDirective *>() &&
9158            "Expect a executable directive");
9159     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9160     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9161       const auto *EI = C->getVarRefs().begin();
9162       for (const auto L : C->decl_component_lists(VD)) {
9163         const ValueDecl *VDecl, *Mapper;
9164         // The Expression is not correct if the mapping is implicit
9165         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9166         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9167         std::tie(VDecl, Components, Mapper) = L;
9168         assert(VDecl == VD && "We got information for the wrong declaration??");
9169         assert(!Components.empty() &&
9170                "Not expecting declaration with no component lists.");
9171         DeclComponentLists.emplace_back(Components, C->getMapType(),
9172                                         C->getMapTypeModifiers(),
9173                                         C->isImplicit(), Mapper, E);
9174         ++EI;
9175       }
9176     }
9177     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9178                                              const MapData &RHS) {
9179       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9180       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9181       bool HasPresent =
9182           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9183       bool HasAllocs = MapType == OMPC_MAP_alloc;
9184       MapModifiers = std::get<2>(RHS);
9185       MapType = std::get<1>(LHS);
9186       bool HasPresentR =
9187           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9188       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9189       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9190     });
9191 
9192     // Find overlapping elements (including the offset from the base element).
9193     llvm::SmallDenseMap<
9194         const MapData *,
9195         llvm::SmallVector<
9196             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9197         4>
9198         OverlappedData;
9199     size_t Count = 0;
9200     for (const MapData &L : DeclComponentLists) {
9201       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9202       OpenMPMapClauseKind MapType;
9203       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9204       bool IsImplicit;
9205       const ValueDecl *Mapper;
9206       const Expr *VarRef;
9207       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9208           L;
9209       ++Count;
9210       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9211         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9212         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9213                  VarRef) = L1;
9214         auto CI = Components.rbegin();
9215         auto CE = Components.rend();
9216         auto SI = Components1.rbegin();
9217         auto SE = Components1.rend();
9218         for (; CI != CE && SI != SE; ++CI, ++SI) {
9219           if (CI->getAssociatedExpression()->getStmtClass() !=
9220               SI->getAssociatedExpression()->getStmtClass())
9221             break;
9222           // Are we dealing with different variables/fields?
9223           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9224             break;
9225         }
9226         // Found overlapping if, at least for one component, reached the head
9227         // of the components list.
9228         if (CI == CE || SI == SE) {
9229           // Ignore it if it is the same component.
9230           if (CI == CE && SI == SE)
9231             continue;
9232           const auto It = (SI == SE) ? CI : SI;
9233           // If one component is a pointer and another one is a kind of
9234           // dereference of this pointer (array subscript, section, dereference,
9235           // etc.), it is not an overlapping.
9236           // Same, if one component is a base and another component is a
9237           // dereferenced pointer memberexpr with the same base.
9238           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9239               (std::prev(It)->getAssociatedDeclaration() &&
9240                std::prev(It)
9241                    ->getAssociatedDeclaration()
9242                    ->getType()
9243                    ->isPointerType()) ||
9244               (It->getAssociatedDeclaration() &&
9245                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9246                std::next(It) != CE && std::next(It) != SE))
9247             continue;
9248           const MapData &BaseData = CI == CE ? L : L1;
9249           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9250               SI == SE ? Components : Components1;
9251           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9252           OverlappedElements.getSecond().push_back(SubData);
9253         }
9254       }
9255     }
9256     // Sort the overlapped elements for each item.
9257     llvm::SmallVector<const FieldDecl *, 4> Layout;
9258     if (!OverlappedData.empty()) {
9259       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9260       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9261       while (BaseType != OrigType) {
9262         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9263         OrigType = BaseType->getPointeeOrArrayElementType();
9264       }
9265 
9266       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9267         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9268       else {
9269         const auto *RD = BaseType->getAsRecordDecl();
9270         Layout.append(RD->field_begin(), RD->field_end());
9271       }
9272     }
9273     for (auto &Pair : OverlappedData) {
9274       llvm::stable_sort(
9275           Pair.getSecond(),
9276           [&Layout](
9277               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9278               OMPClauseMappableExprCommon::MappableExprComponentListRef
9279                   Second) {
9280             auto CI = First.rbegin();
9281             auto CE = First.rend();
9282             auto SI = Second.rbegin();
9283             auto SE = Second.rend();
9284             for (; CI != CE && SI != SE; ++CI, ++SI) {
9285               if (CI->getAssociatedExpression()->getStmtClass() !=
9286                   SI->getAssociatedExpression()->getStmtClass())
9287                 break;
9288               // Are we dealing with different variables/fields?
9289               if (CI->getAssociatedDeclaration() !=
9290                   SI->getAssociatedDeclaration())
9291                 break;
9292             }
9293 
9294             // Lists contain the same elements.
9295             if (CI == CE && SI == SE)
9296               return false;
9297 
9298             // List with less elements is less than list with more elements.
9299             if (CI == CE || SI == SE)
9300               return CI == CE;
9301 
9302             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9303             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9304             if (FD1->getParent() == FD2->getParent())
9305               return FD1->getFieldIndex() < FD2->getFieldIndex();
9306             const auto *It =
9307                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9308                   return FD == FD1 || FD == FD2;
9309                 });
9310             return *It == FD1;
9311           });
9312     }
9313 
9314     // Associated with a capture, because the mapping flags depend on it.
9315     // Go through all of the elements with the overlapped elements.
9316     bool IsFirstComponentList = true;
9317     for (const auto &Pair : OverlappedData) {
9318       const MapData &L = *Pair.getFirst();
9319       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9320       OpenMPMapClauseKind MapType;
9321       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9322       bool IsImplicit;
9323       const ValueDecl *Mapper;
9324       const Expr *VarRef;
9325       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9326           L;
9327       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9328           OverlappedComponents = Pair.getSecond();
9329       generateInfoForComponentList(
9330           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9331           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9332           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9333       IsFirstComponentList = false;
9334     }
9335     // Go through other elements without overlapped elements.
9336     for (const MapData &L : DeclComponentLists) {
9337       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9338       OpenMPMapClauseKind MapType;
9339       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9340       bool IsImplicit;
9341       const ValueDecl *Mapper;
9342       const Expr *VarRef;
9343       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9344           L;
9345       auto It = OverlappedData.find(&L);
9346       if (It == OverlappedData.end())
9347         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9348                                      Components, CombinedInfo, PartialStruct,
9349                                      IsFirstComponentList, IsImplicit, Mapper,
9350                                      /*ForDeviceAddr=*/false, VD, VarRef);
9351       IsFirstComponentList = false;
9352     }
9353   }
9354 
9355   /// Generate the default map information for a given capture \a CI,
9356   /// record field declaration \a RI and captured value \a CV.
9357   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9358                               const FieldDecl &RI, llvm::Value *CV,
9359                               MapCombinedInfoTy &CombinedInfo) const {
9360     bool IsImplicit = true;
9361     // Do the default mapping.
9362     if (CI.capturesThis()) {
9363       CombinedInfo.Exprs.push_back(nullptr);
9364       CombinedInfo.BasePointers.push_back(CV);
9365       CombinedInfo.Pointers.push_back(CV);
9366       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9367       CombinedInfo.Sizes.push_back(
9368           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9369                                     CGF.Int64Ty, /*isSigned=*/true));
9370       // Default map type.
9371       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9372     } else if (CI.capturesVariableByCopy()) {
9373       const VarDecl *VD = CI.getCapturedVar();
9374       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9375       CombinedInfo.BasePointers.push_back(CV);
9376       CombinedInfo.Pointers.push_back(CV);
9377       if (!RI.getType()->isAnyPointerType()) {
9378         // We have to signal to the runtime captures passed by value that are
9379         // not pointers.
9380         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9381         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9382             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9383       } else {
9384         // Pointers are implicitly mapped with a zero size and no flags
9385         // (other than first map that is added for all implicit maps).
9386         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9387         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9388       }
9389       auto I = FirstPrivateDecls.find(VD);
9390       if (I != FirstPrivateDecls.end())
9391         IsImplicit = I->getSecond();
9392     } else {
9393       assert(CI.capturesVariable() && "Expected captured reference.");
9394       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9395       QualType ElementType = PtrTy->getPointeeType();
9396       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9397           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9398       // The default map type for a scalar/complex type is 'to' because by
9399       // default the value doesn't have to be retrieved. For an aggregate
9400       // type, the default is 'tofrom'.
9401       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9402       const VarDecl *VD = CI.getCapturedVar();
9403       auto I = FirstPrivateDecls.find(VD);
9404       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9405       CombinedInfo.BasePointers.push_back(CV);
9406       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9407         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9408             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9409             AlignmentSource::Decl));
9410         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9411       } else {
9412         CombinedInfo.Pointers.push_back(CV);
9413       }
9414       if (I != FirstPrivateDecls.end())
9415         IsImplicit = I->getSecond();
9416     }
9417     // Every default map produces a single argument which is a target parameter.
9418     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9419 
9420     // Add flag stating this is an implicit map.
9421     if (IsImplicit)
9422       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9423 
9424     // No user-defined mapper for default mapping.
9425     CombinedInfo.Mappers.push_back(nullptr);
9426   }
9427 };
9428 } // anonymous namespace
9429 
9430 static void emitNonContiguousDescriptor(
9431     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9432     CGOpenMPRuntime::TargetDataInfo &Info) {
9433   CodeGenModule &CGM = CGF.CGM;
9434   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9435       &NonContigInfo = CombinedInfo.NonContigInfo;
9436 
9437   // Build an array of struct descriptor_dim and then assign it to
9438   // offload_args.
9439   //
9440   // struct descriptor_dim {
9441   //  uint64_t offset;
9442   //  uint64_t count;
9443   //  uint64_t stride
9444   // };
9445   ASTContext &C = CGF.getContext();
9446   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9447   RecordDecl *RD;
9448   RD = C.buildImplicitRecord("descriptor_dim");
9449   RD->startDefinition();
9450   addFieldToRecordDecl(C, RD, Int64Ty);
9451   addFieldToRecordDecl(C, RD, Int64Ty);
9452   addFieldToRecordDecl(C, RD, Int64Ty);
9453   RD->completeDefinition();
9454   QualType DimTy = C.getRecordType(RD);
9455 
9456   enum { OffsetFD = 0, CountFD, StrideFD };
9457   // We need two index variable here since the size of "Dims" is the same as the
9458   // size of Components, however, the size of offset, count, and stride is equal
9459   // to the size of base declaration that is non-contiguous.
9460   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9461     // Skip emitting ir if dimension size is 1 since it cannot be
9462     // non-contiguous.
9463     if (NonContigInfo.Dims[I] == 1)
9464       continue;
9465     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9466     QualType ArrayTy =
9467         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9468     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9469     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9470       unsigned RevIdx = EE - II - 1;
9471       LValue DimsLVal = CGF.MakeAddrLValue(
9472           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9473       // Offset
9474       LValue OffsetLVal = CGF.EmitLValueForField(
9475           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9476       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9477       // Count
9478       LValue CountLVal = CGF.EmitLValueForField(
9479           DimsLVal, *std::next(RD->field_begin(), CountFD));
9480       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9481       // Stride
9482       LValue StrideLVal = CGF.EmitLValueForField(
9483           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9484       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9485     }
9486     // args[I] = &dims
9487     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9488         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9489     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9490         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9491         Info.PointersArray, 0, I);
9492     Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9493     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9494     ++L;
9495   }
9496 }
9497 
9498 // Try to extract the base declaration from a `this->x` expression if possible.
9499 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9500   if (!E)
9501     return nullptr;
9502 
9503   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9504     if (const MemberExpr *ME =
9505             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9506       return ME->getMemberDecl();
9507   return nullptr;
9508 }
9509 
9510 /// Emit a string constant containing the names of the values mapped to the
9511 /// offloading runtime library.
9512 llvm::Constant *
9513 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9514                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9515 
9516   uint32_t SrcLocStrSize;
9517   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9518     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9519 
9520   SourceLocation Loc;
9521   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9522     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9523       Loc = VD->getLocation();
9524     else
9525       Loc = MapExprs.getMapExpr()->getExprLoc();
9526   } else {
9527     Loc = MapExprs.getMapDecl()->getLocation();
9528   }
9529 
9530   std::string ExprName;
9531   if (MapExprs.getMapExpr()) {
9532     PrintingPolicy P(CGF.getContext().getLangOpts());
9533     llvm::raw_string_ostream OS(ExprName);
9534     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9535     OS.flush();
9536   } else {
9537     ExprName = MapExprs.getMapDecl()->getNameAsString();
9538   }
9539 
9540   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9541   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9542                                          PLoc.getLine(), PLoc.getColumn(),
9543                                          SrcLocStrSize);
9544 }
9545 
9546 /// Emit the arrays used to pass the captures and map information to the
9547 /// offloading runtime library. If there is no map or capture information,
9548 /// return nullptr by reference.
9549 static void emitOffloadingArrays(
9550     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9551     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9552     bool IsNonContiguous = false) {
9553   CodeGenModule &CGM = CGF.CGM;
9554   ASTContext &Ctx = CGF.getContext();
9555 
9556   // Reset the array information.
9557   Info.clearArrayInfo();
9558   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9559 
9560   if (Info.NumberOfPtrs) {
9561     // Detect if we have any capture size requiring runtime evaluation of the
9562     // size so that a constant array could be eventually used.
9563 
9564     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9565     QualType PointerArrayType = Ctx.getConstantArrayType(
9566         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9567         /*IndexTypeQuals=*/0);
9568 
9569     Info.BasePointersArray =
9570         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9571     Info.PointersArray =
9572         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9573     Address MappersArray =
9574         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9575     Info.MappersArray = MappersArray.getPointer();
9576 
9577     // If we don't have any VLA types or other types that require runtime
9578     // evaluation, we can use a constant array for the map sizes, otherwise we
9579     // need to fill up the arrays as we do for the pointers.
9580     QualType Int64Ty =
9581         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9582     SmallVector<llvm::Constant *> ConstSizes(
9583         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9584     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9585     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9586       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9587         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9588           if (IsNonContiguous && (CombinedInfo.Types[I] &
9589                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9590             ConstSizes[I] = llvm::ConstantInt::get(
9591                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9592           else
9593             ConstSizes[I] = CI;
9594           continue;
9595         }
9596       }
9597       RuntimeSizes.set(I);
9598     }
9599 
9600     if (RuntimeSizes.all()) {
9601       QualType SizeArrayType = Ctx.getConstantArrayType(
9602           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9603           /*IndexTypeQuals=*/0);
9604       Info.SizesArray =
9605           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9606     } else {
9607       auto *SizesArrayInit = llvm::ConstantArray::get(
9608           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9609       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9610       auto *SizesArrayGbl = new llvm::GlobalVariable(
9611           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9612           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9613       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9614       if (RuntimeSizes.any()) {
9615         QualType SizeArrayType = Ctx.getConstantArrayType(
9616             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9617             /*IndexTypeQuals=*/0);
9618         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9619         llvm::Value *GblConstPtr =
9620             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9621                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9622         CGF.Builder.CreateMemCpy(
9623             Buffer,
9624             Address(GblConstPtr, CGM.Int64Ty,
9625                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9626                         /*DestWidth=*/64, /*Signed=*/false))),
9627             CGF.getTypeSize(SizeArrayType));
9628         Info.SizesArray = Buffer.getPointer();
9629       } else {
9630         Info.SizesArray = SizesArrayGbl;
9631       }
9632     }
9633 
9634     // The map types are always constant so we don't need to generate code to
9635     // fill arrays. Instead, we create an array constant.
9636     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9637     llvm::copy(CombinedInfo.Types, Mapping.begin());
9638     std::string MaptypesName =
9639         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9640     auto *MapTypesArrayGbl =
9641         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9642     Info.MapTypesArray = MapTypesArrayGbl;
9643 
9644     // The information types are only built if there is debug information
9645     // requested.
9646     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9647       Info.MapNamesArray = llvm::Constant::getNullValue(
9648           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9649     } else {
9650       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9651         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9652       };
9653       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9654       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9655       std::string MapnamesName =
9656           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9657       auto *MapNamesArrayGbl =
9658           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9659       Info.MapNamesArray = MapNamesArrayGbl;
9660     }
9661 
9662     // If there's a present map type modifier, it must not be applied to the end
9663     // of a region, so generate a separate map type array in that case.
9664     if (Info.separateBeginEndCalls()) {
9665       bool EndMapTypesDiffer = false;
9666       for (uint64_t &Type : Mapping) {
9667         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9668           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9669           EndMapTypesDiffer = true;
9670         }
9671       }
9672       if (EndMapTypesDiffer) {
9673         MapTypesArrayGbl =
9674             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9675         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9676       }
9677     }
9678 
9679     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9680       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9681       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9682           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9683           Info.BasePointersArray, 0, I);
9684       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9685           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9686       Address BPAddr(BP, BPVal->getType(),
9687                      Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9688       CGF.Builder.CreateStore(BPVal, BPAddr);
9689 
9690       if (Info.requiresDevicePointerInfo())
9691         if (const ValueDecl *DevVD =
9692                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9693           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9694 
9695       llvm::Value *PVal = CombinedInfo.Pointers[I];
9696       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9697           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9698           Info.PointersArray, 0, I);
9699       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9700           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9701       Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9702       CGF.Builder.CreateStore(PVal, PAddr);
9703 
9704       if (RuntimeSizes.test(I)) {
9705         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9706             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9707             Info.SizesArray,
9708             /*Idx0=*/0,
9709             /*Idx1=*/I);
9710         Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9711         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9712                                                           CGM.Int64Ty,
9713                                                           /*isSigned=*/true),
9714                                 SAddr);
9715       }
9716 
9717       // Fill up the mapper array.
9718       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9719       if (CombinedInfo.Mappers[I]) {
9720         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9721             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9722         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9723         Info.HasMapper = true;
9724       }
9725       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9726       CGF.Builder.CreateStore(MFunc, MAddr);
9727     }
9728   }
9729 
9730   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9731       Info.NumberOfPtrs == 0)
9732     return;
9733 
9734   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9735 }
9736 
9737 namespace {
9738 /// Additional arguments for emitOffloadingArraysArgument function.
9739 struct ArgumentsOptions {
9740   bool ForEndCall = false;
9741   ArgumentsOptions() = default;
9742   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9743 };
9744 } // namespace
9745 
9746 /// Emit the arguments to be passed to the runtime library based on the
9747 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9748 /// ForEndCall, emit map types to be passed for the end of the region instead of
9749 /// the beginning.
9750 static void emitOffloadingArraysArgument(
9751     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9752     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9753     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9754     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9755     const ArgumentsOptions &Options = ArgumentsOptions()) {
9756   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9757          "expected region end call to runtime only when end call is separate");
9758   CodeGenModule &CGM = CGF.CGM;
9759   if (Info.NumberOfPtrs) {
9760     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9761         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9762         Info.BasePointersArray,
9763         /*Idx0=*/0, /*Idx1=*/0);
9764     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9765         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9766         Info.PointersArray,
9767         /*Idx0=*/0,
9768         /*Idx1=*/0);
9769     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9770         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9771         /*Idx0=*/0, /*Idx1=*/0);
9772     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9773         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9774         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9775                                                     : Info.MapTypesArray,
9776         /*Idx0=*/0,
9777         /*Idx1=*/0);
9778 
9779     // Only emit the mapper information arrays if debug information is
9780     // requested.
9781     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9782       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9783     else
9784       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9785           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9786           Info.MapNamesArray,
9787           /*Idx0=*/0,
9788           /*Idx1=*/0);
9789     // If there is no user-defined mapper, set the mapper array to nullptr to
9790     // avoid an unnecessary data privatization
9791     if (!Info.HasMapper)
9792       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9793     else
9794       MappersArrayArg =
9795           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9796   } else {
9797     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9798     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9799     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9800     MapTypesArrayArg =
9801         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9802     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9803     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9804   }
9805 }
9806 
9807 /// Check for inner distribute directive.
9808 static const OMPExecutableDirective *
9809 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9810   const auto *CS = D.getInnermostCapturedStmt();
9811   const auto *Body =
9812       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9813   const Stmt *ChildStmt =
9814       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9815 
9816   if (const auto *NestedDir =
9817           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9818     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9819     switch (D.getDirectiveKind()) {
9820     case OMPD_target:
9821       if (isOpenMPDistributeDirective(DKind))
9822         return NestedDir;
9823       if (DKind == OMPD_teams) {
9824         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9825             /*IgnoreCaptured=*/true);
9826         if (!Body)
9827           return nullptr;
9828         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9829         if (const auto *NND =
9830                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9831           DKind = NND->getDirectiveKind();
9832           if (isOpenMPDistributeDirective(DKind))
9833             return NND;
9834         }
9835       }
9836       return nullptr;
9837     case OMPD_target_teams:
9838       if (isOpenMPDistributeDirective(DKind))
9839         return NestedDir;
9840       return nullptr;
9841     case OMPD_target_parallel:
9842     case OMPD_target_simd:
9843     case OMPD_target_parallel_for:
9844     case OMPD_target_parallel_for_simd:
9845       return nullptr;
9846     case OMPD_target_teams_distribute:
9847     case OMPD_target_teams_distribute_simd:
9848     case OMPD_target_teams_distribute_parallel_for:
9849     case OMPD_target_teams_distribute_parallel_for_simd:
9850     case OMPD_parallel:
9851     case OMPD_for:
9852     case OMPD_parallel_for:
9853     case OMPD_parallel_master:
9854     case OMPD_parallel_sections:
9855     case OMPD_for_simd:
9856     case OMPD_parallel_for_simd:
9857     case OMPD_cancel:
9858     case OMPD_cancellation_point:
9859     case OMPD_ordered:
9860     case OMPD_threadprivate:
9861     case OMPD_allocate:
9862     case OMPD_task:
9863     case OMPD_simd:
9864     case OMPD_tile:
9865     case OMPD_unroll:
9866     case OMPD_sections:
9867     case OMPD_section:
9868     case OMPD_single:
9869     case OMPD_master:
9870     case OMPD_critical:
9871     case OMPD_taskyield:
9872     case OMPD_barrier:
9873     case OMPD_taskwait:
9874     case OMPD_taskgroup:
9875     case OMPD_atomic:
9876     case OMPD_flush:
9877     case OMPD_depobj:
9878     case OMPD_scan:
9879     case OMPD_teams:
9880     case OMPD_target_data:
9881     case OMPD_target_exit_data:
9882     case OMPD_target_enter_data:
9883     case OMPD_distribute:
9884     case OMPD_distribute_simd:
9885     case OMPD_distribute_parallel_for:
9886     case OMPD_distribute_parallel_for_simd:
9887     case OMPD_teams_distribute:
9888     case OMPD_teams_distribute_simd:
9889     case OMPD_teams_distribute_parallel_for:
9890     case OMPD_teams_distribute_parallel_for_simd:
9891     case OMPD_target_update:
9892     case OMPD_declare_simd:
9893     case OMPD_declare_variant:
9894     case OMPD_begin_declare_variant:
9895     case OMPD_end_declare_variant:
9896     case OMPD_declare_target:
9897     case OMPD_end_declare_target:
9898     case OMPD_declare_reduction:
9899     case OMPD_declare_mapper:
9900     case OMPD_taskloop:
9901     case OMPD_taskloop_simd:
9902     case OMPD_master_taskloop:
9903     case OMPD_master_taskloop_simd:
9904     case OMPD_parallel_master_taskloop:
9905     case OMPD_parallel_master_taskloop_simd:
9906     case OMPD_requires:
9907     case OMPD_metadirective:
9908     case OMPD_unknown:
9909     default:
9910       llvm_unreachable("Unexpected directive.");
9911     }
9912   }
9913 
9914   return nullptr;
9915 }
9916 
9917 /// Emit the user-defined mapper function. The code generation follows the
9918 /// pattern in the example below.
9919 /// \code
9920 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9921 ///                                           void *base, void *begin,
9922 ///                                           int64_t size, int64_t type,
9923 ///                                           void *name = nullptr) {
9924 ///   // Allocate space for an array section first or add a base/begin for
9925 ///   // pointer dereference.
9926 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9927 ///       !maptype.IsDelete)
9928 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9929 ///                                 size*sizeof(Ty), clearToFromMember(type));
9930 ///   // Map members.
9931 ///   for (unsigned i = 0; i < size; i++) {
9932 ///     // For each component specified by this mapper:
9933 ///     for (auto c : begin[i]->all_components) {
9934 ///       if (c.hasMapper())
9935 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9936 ///                       c.arg_type, c.arg_name);
9937 ///       else
9938 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9939 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9940 ///                                     c.arg_name);
9941 ///     }
9942 ///   }
9943 ///   // Delete the array section.
9944 ///   if (size > 1 && maptype.IsDelete)
9945 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9946 ///                                 size*sizeof(Ty), clearToFromMember(type));
9947 /// }
9948 /// \endcode
9949 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9950                                             CodeGenFunction *CGF) {
9951   if (UDMMap.count(D) > 0)
9952     return;
9953   ASTContext &C = CGM.getContext();
9954   QualType Ty = D->getType();
9955   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9956   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9957   auto *MapperVarDecl =
9958       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9959   SourceLocation Loc = D->getLocation();
9960   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9961   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9962 
9963   // Prepare mapper function arguments and attributes.
9964   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9965                               C.VoidPtrTy, ImplicitParamDecl::Other);
9966   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9967                             ImplicitParamDecl::Other);
9968   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9969                              C.VoidPtrTy, ImplicitParamDecl::Other);
9970   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9971                             ImplicitParamDecl::Other);
9972   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9973                             ImplicitParamDecl::Other);
9974   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9975                             ImplicitParamDecl::Other);
9976   FunctionArgList Args;
9977   Args.push_back(&HandleArg);
9978   Args.push_back(&BaseArg);
9979   Args.push_back(&BeginArg);
9980   Args.push_back(&SizeArg);
9981   Args.push_back(&TypeArg);
9982   Args.push_back(&NameArg);
9983   const CGFunctionInfo &FnInfo =
9984       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9985   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9986   SmallString<64> TyStr;
9987   llvm::raw_svector_ostream Out(TyStr);
9988   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9989   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9990   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9991                                     Name, &CGM.getModule());
9992   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9993   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9994   // Start the mapper function code generation.
9995   CodeGenFunction MapperCGF(CGM);
9996   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9997   // Compute the starting and end addresses of array elements.
9998   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9999       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10000       C.getPointerType(Int64Ty), Loc);
10001   // Prepare common arguments for array initiation and deletion.
10002   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10003       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10004       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10005   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10006       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10007       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10008   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10009       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10010       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10011   // Convert the size in bytes into the number of array elements.
10012   Size = MapperCGF.Builder.CreateExactUDiv(
10013       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10014   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10015       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10016   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
10017   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10018       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10019       C.getPointerType(Int64Ty), Loc);
10020   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10021       MapperCGF.GetAddrOfLocalVar(&NameArg),
10022       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10023 
10024   // Emit array initiation if this is an array section and \p MapType indicates
10025   // that memory allocation is required.
10026   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10027   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10028                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10029 
10030   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10031 
10032   // Emit the loop header block.
10033   MapperCGF.EmitBlock(HeadBB);
10034   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10035   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10036   // Evaluate whether the initial condition is satisfied.
10037   llvm::Value *IsEmpty =
10038       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10039   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10040   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10041 
10042   // Emit the loop body block.
10043   MapperCGF.EmitBlock(BodyBB);
10044   llvm::BasicBlock *LastBB = BodyBB;
10045   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10046       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10047   PtrPHI->addIncoming(PtrBegin, EntryBB);
10048   Address PtrCurrent(PtrPHI, ElemTy,
10049                      MapperCGF.GetAddrOfLocalVar(&BeginArg)
10050                          .getAlignment()
10051                          .alignmentOfArrayElement(ElementSize));
10052   // Privatize the declared variable of mapper to be the current array element.
10053   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10054   Scope.addPrivate(MapperVarDecl, PtrCurrent);
10055   (void)Scope.Privatize();
10056 
10057   // Get map clause information. Fill up the arrays with all mapped variables.
10058   MappableExprsHandler::MapCombinedInfoTy Info;
10059   MappableExprsHandler MEHandler(*D, MapperCGF);
10060   MEHandler.generateAllInfoForMapper(Info);
10061 
10062   // Call the runtime API __tgt_mapper_num_components to get the number of
10063   // pre-existing components.
10064   llvm::Value *OffloadingArgs[] = {Handle};
10065   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10066       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10067                                             OMPRTL___tgt_mapper_num_components),
10068       OffloadingArgs);
10069   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10070       PreviousSize,
10071       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10072 
10073   // Fill up the runtime mapper handle for all components.
10074   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10075     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10076         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10077     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10078         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10079     llvm::Value *CurSizeArg = Info.Sizes[I];
10080     llvm::Value *CurNameArg =
10081         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10082             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10083             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10084 
10085     // Extract the MEMBER_OF field from the map type.
10086     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10087     llvm::Value *MemberMapType =
10088         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10089 
10090     // Combine the map type inherited from user-defined mapper with that
10091     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10092     // bits of the \a MapType, which is the input argument of the mapper
10093     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10094     // bits of MemberMapType.
10095     // [OpenMP 5.0], 1.2.6. map-type decay.
10096     //        | alloc |  to   | from  | tofrom | release | delete
10097     // ----------------------------------------------------------
10098     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10099     // to     | alloc |  to   | alloc |   to   | release | delete
10100     // from   | alloc | alloc | from  |  from  | release | delete
10101     // tofrom | alloc |  to   | from  | tofrom | release | delete
10102     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10103         MapType,
10104         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10105                                    MappableExprsHandler::OMP_MAP_FROM));
10106     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10107     llvm::BasicBlock *AllocElseBB =
10108         MapperCGF.createBasicBlock("omp.type.alloc.else");
10109     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10110     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10111     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10112     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10113     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10114     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10115     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10116     MapperCGF.EmitBlock(AllocBB);
10117     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10118         MemberMapType,
10119         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10120                                      MappableExprsHandler::OMP_MAP_FROM)));
10121     MapperCGF.Builder.CreateBr(EndBB);
10122     MapperCGF.EmitBlock(AllocElseBB);
10123     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10124         LeftToFrom,
10125         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10126     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10127     // In case of to, clear OMP_MAP_FROM.
10128     MapperCGF.EmitBlock(ToBB);
10129     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10130         MemberMapType,
10131         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10132     MapperCGF.Builder.CreateBr(EndBB);
10133     MapperCGF.EmitBlock(ToElseBB);
10134     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10135         LeftToFrom,
10136         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10137     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10138     // In case of from, clear OMP_MAP_TO.
10139     MapperCGF.EmitBlock(FromBB);
10140     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10141         MemberMapType,
10142         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10143     // In case of tofrom, do nothing.
10144     MapperCGF.EmitBlock(EndBB);
10145     LastBB = EndBB;
10146     llvm::PHINode *CurMapType =
10147         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10148     CurMapType->addIncoming(AllocMapType, AllocBB);
10149     CurMapType->addIncoming(ToMapType, ToBB);
10150     CurMapType->addIncoming(FromMapType, FromBB);
10151     CurMapType->addIncoming(MemberMapType, ToElseBB);
10152 
10153     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10154                                      CurSizeArg, CurMapType, CurNameArg};
10155     if (Info.Mappers[I]) {
10156       // Call the corresponding mapper function.
10157       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10158           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10159       assert(MapperFunc && "Expect a valid mapper function is available.");
10160       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10161     } else {
10162       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10163       // data structure.
10164       MapperCGF.EmitRuntimeCall(
10165           OMPBuilder.getOrCreateRuntimeFunction(
10166               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10167           OffloadingArgs);
10168     }
10169   }
10170 
10171   // Update the pointer to point to the next element that needs to be mapped,
10172   // and check whether we have mapped all elements.
10173   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10174       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10175   PtrPHI->addIncoming(PtrNext, LastBB);
10176   llvm::Value *IsDone =
10177       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10178   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10179   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10180 
10181   MapperCGF.EmitBlock(ExitBB);
10182   // Emit array deletion if this is an array section and \p MapType indicates
10183   // that deletion is required.
10184   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10185                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10186 
10187   // Emit the function exit block.
10188   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10189   MapperCGF.FinishFunction();
10190   UDMMap.try_emplace(D, Fn);
10191   if (CGF) {
10192     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10193     Decls.second.push_back(D);
10194   }
10195 }
10196 
10197 /// Emit the array initialization or deletion portion for user-defined mapper
10198 /// code generation. First, it evaluates whether an array section is mapped and
10199 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10200 /// true, and \a MapType indicates to not delete this array, array
10201 /// initialization code is generated. If \a IsInit is false, and \a MapType
10202 /// indicates to not this array, array deletion code is generated.
10203 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10204     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10205     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10206     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10207     bool IsInit) {
10208   StringRef Prefix = IsInit ? ".init" : ".del";
10209 
10210   // Evaluate if this is an array section.
10211   llvm::BasicBlock *BodyBB =
10212       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10213   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10214       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10215   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10216       MapType,
10217       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10218   llvm::Value *DeleteCond;
10219   llvm::Value *Cond;
10220   if (IsInit) {
10221     // base != begin?
10222     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10223     // IsPtrAndObj?
10224     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10225         MapType,
10226         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10227     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10228     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10229     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10230     DeleteCond = MapperCGF.Builder.CreateIsNull(
10231         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10232   } else {
10233     Cond = IsArray;
10234     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10235         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10236   }
10237   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10238   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10239 
10240   MapperCGF.EmitBlock(BodyBB);
10241   // Get the array size by multiplying element size and element number (i.e., \p
10242   // Size).
10243   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10244       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10245   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10246   // memory allocation/deletion purpose only.
10247   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10248       MapType,
10249       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10250                                    MappableExprsHandler::OMP_MAP_FROM)));
10251   MapTypeArg = MapperCGF.Builder.CreateOr(
10252       MapTypeArg,
10253       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10254 
10255   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10256   // data structure.
10257   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10258                                    ArraySize, MapTypeArg, MapName};
10259   MapperCGF.EmitRuntimeCall(
10260       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10261                                             OMPRTL___tgt_push_mapper_component),
10262       OffloadingArgs);
10263 }
10264 
10265 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10266     const OMPDeclareMapperDecl *D) {
10267   auto I = UDMMap.find(D);
10268   if (I != UDMMap.end())
10269     return I->second;
10270   emitUserDefinedMapper(D);
10271   return UDMMap.lookup(D);
10272 }
10273 
10274 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10275     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10276     llvm::Value *DeviceID,
10277     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10278                                      const OMPLoopDirective &D)>
10279         SizeEmitter) {
10280   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10281   const OMPExecutableDirective *TD = &D;
10282   // Get nested teams distribute kind directive, if any.
10283   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10284     TD = getNestedDistributeDirective(CGM.getContext(), D);
10285   if (!TD)
10286     return;
10287   const auto *LD = cast<OMPLoopDirective>(TD);
10288   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10289                                                          PrePostActionTy &) {
10290     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10291       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10292       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10293       CGF.EmitRuntimeCall(
10294           OMPBuilder.getOrCreateRuntimeFunction(
10295               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10296           Args);
10297     }
10298   };
10299   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10300 }
10301 
10302 void CGOpenMPRuntime::emitTargetCall(
10303     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10304     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10305     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10306     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10307                                      const OMPLoopDirective &D)>
10308         SizeEmitter) {
10309   if (!CGF.HaveInsertPoint())
10310     return;
10311 
10312   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10313                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10314 
10315   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10316 
10317   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10318                                  D.hasClausesOfKind<OMPNowaitClause>();
10319   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10320   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10321   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10322                                             PrePostActionTy &) {
10323     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10324   };
10325   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10326 
10327   CodeGenFunction::OMPTargetDataInfo InputInfo;
10328   llvm::Value *MapTypesArray = nullptr;
10329   llvm::Value *MapNamesArray = nullptr;
10330   // Generate code for the host fallback function.
10331   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10332                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10333     if (OffloadingMandatory) {
10334       CGF.Builder.CreateUnreachable();
10335     } else {
10336       if (RequiresOuterTask) {
10337         CapturedVars.clear();
10338         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10339       }
10340       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10341     }
10342   };
10343   // Fill up the pointer arrays and transfer execution to the device.
10344   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10345                     &MapNamesArray, SizeEmitter,
10346                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10347     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10348       // Reverse offloading is not supported, so just execute on the host.
10349       FallbackGen(CGF);
10350       return;
10351     }
10352 
10353     // On top of the arrays that were filled up, the target offloading call
10354     // takes as arguments the device id as well as the host pointer. The host
10355     // pointer is used by the runtime library to identify the current target
10356     // region, so it only has to be unique and not necessarily point to
10357     // anything. It could be the pointer to the outlined function that
10358     // implements the target region, but we aren't using that so that the
10359     // compiler doesn't need to keep that, and could therefore inline the host
10360     // function if proven worthwhile during optimization.
10361 
10362     // From this point on, we need to have an ID of the target region defined.
10363     assert(OutlinedFnID && "Invalid outlined function ID!");
10364     (void)OutlinedFnID;
10365 
10366     // Emit device ID if any.
10367     llvm::Value *DeviceID;
10368     if (Device.getPointer()) {
10369       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10370               Device.getInt() == OMPC_DEVICE_device_num) &&
10371              "Expected device_num modifier.");
10372       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10373       DeviceID =
10374           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10375     } else {
10376       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10377     }
10378 
10379     // Emit the number of elements in the offloading arrays.
10380     llvm::Value *PointerNum =
10381         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10382 
10383     // Return value of the runtime offloading call.
10384     llvm::Value *Return;
10385 
10386     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10387     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10388 
10389     // Source location for the ident struct
10390     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10391 
10392     // Emit tripcount for the target loop-based directive.
10393     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10394 
10395     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10396     // The target region is an outlined function launched by the runtime
10397     // via calls __tgt_target() or __tgt_target_teams().
10398     //
10399     // __tgt_target() launches a target region with one team and one thread,
10400     // executing a serial region.  This master thread may in turn launch
10401     // more threads within its team upon encountering a parallel region,
10402     // however, no additional teams can be launched on the device.
10403     //
10404     // __tgt_target_teams() launches a target region with one or more teams,
10405     // each with one or more threads.  This call is required for target
10406     // constructs such as:
10407     //  'target teams'
10408     //  'target' / 'teams'
10409     //  'target teams distribute parallel for'
10410     //  'target parallel'
10411     // and so on.
10412     //
10413     // Note that on the host and CPU targets, the runtime implementation of
10414     // these calls simply call the outlined function without forking threads.
10415     // The outlined functions themselves have runtime calls to
10416     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10417     // the compiler in emitTeamsCall() and emitParallelCall().
10418     //
10419     // In contrast, on the NVPTX target, the implementation of
10420     // __tgt_target_teams() launches a GPU kernel with the requested number
10421     // of teams and threads so no additional calls to the runtime are required.
10422     if (NumTeams) {
10423       // If we have NumTeams defined this means that we have an enclosed teams
10424       // region. Therefore we also expect to have NumThreads defined. These two
10425       // values should be defined in the presence of a teams directive,
10426       // regardless of having any clauses associated. If the user is using teams
10427       // but no clauses, these two values will be the default that should be
10428       // passed to the runtime library - a 32-bit integer with the value zero.
10429       assert(NumThreads && "Thread limit expression should be available along "
10430                            "with number of teams.");
10431       SmallVector<llvm::Value *> OffloadingArgs = {
10432           RTLoc,
10433           DeviceID,
10434           OutlinedFnID,
10435           PointerNum,
10436           InputInfo.BasePointersArray.getPointer(),
10437           InputInfo.PointersArray.getPointer(),
10438           InputInfo.SizesArray.getPointer(),
10439           MapTypesArray,
10440           MapNamesArray,
10441           InputInfo.MappersArray.getPointer(),
10442           NumTeams,
10443           NumThreads};
10444       if (HasNowait) {
10445         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10446         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10447         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10448         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10449         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10450         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10451       }
10452       Return = CGF.EmitRuntimeCall(
10453           OMPBuilder.getOrCreateRuntimeFunction(
10454               CGM.getModule(), HasNowait
10455                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10456                                    : OMPRTL___tgt_target_teams_mapper),
10457           OffloadingArgs);
10458     } else {
10459       SmallVector<llvm::Value *> OffloadingArgs = {
10460           RTLoc,
10461           DeviceID,
10462           OutlinedFnID,
10463           PointerNum,
10464           InputInfo.BasePointersArray.getPointer(),
10465           InputInfo.PointersArray.getPointer(),
10466           InputInfo.SizesArray.getPointer(),
10467           MapTypesArray,
10468           MapNamesArray,
10469           InputInfo.MappersArray.getPointer()};
10470       if (HasNowait) {
10471         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10472         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10473         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10474         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10475         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10476         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10477       }
10478       Return = CGF.EmitRuntimeCall(
10479           OMPBuilder.getOrCreateRuntimeFunction(
10480               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10481                                          : OMPRTL___tgt_target_mapper),
10482           OffloadingArgs);
10483     }
10484 
10485     // Check the error code and execute the host version if required.
10486     llvm::BasicBlock *OffloadFailedBlock =
10487         CGF.createBasicBlock("omp_offload.failed");
10488     llvm::BasicBlock *OffloadContBlock =
10489         CGF.createBasicBlock("omp_offload.cont");
10490     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10491     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10492 
10493     CGF.EmitBlock(OffloadFailedBlock);
10494     FallbackGen(CGF);
10495 
10496     CGF.EmitBranch(OffloadContBlock);
10497 
10498     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10499   };
10500 
10501   // Notify that the host version must be executed.
10502   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10503     FallbackGen(CGF);
10504   };
10505 
10506   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10507                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10508                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10509     // Fill up the arrays with all the captured variables.
10510     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10511 
10512     // Get mappable expression information.
10513     MappableExprsHandler MEHandler(D, CGF);
10514     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10515     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10516 
10517     auto RI = CS.getCapturedRecordDecl()->field_begin();
10518     auto *CV = CapturedVars.begin();
10519     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10520                                               CE = CS.capture_end();
10521          CI != CE; ++CI, ++RI, ++CV) {
10522       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10523       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10524 
10525       // VLA sizes are passed to the outlined region by copy and do not have map
10526       // information associated.
10527       if (CI->capturesVariableArrayType()) {
10528         CurInfo.Exprs.push_back(nullptr);
10529         CurInfo.BasePointers.push_back(*CV);
10530         CurInfo.Pointers.push_back(*CV);
10531         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10532             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10533         // Copy to the device as an argument. No need to retrieve it.
10534         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10535                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10536                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10537         CurInfo.Mappers.push_back(nullptr);
10538       } else {
10539         // If we have any information in the map clause, we use it, otherwise we
10540         // just do a default mapping.
10541         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10542         if (!CI->capturesThis())
10543           MappedVarSet.insert(CI->getCapturedVar());
10544         else
10545           MappedVarSet.insert(nullptr);
10546         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10547           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10548         // Generate correct mapping for variables captured by reference in
10549         // lambdas.
10550         if (CI->capturesVariable())
10551           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10552                                                   CurInfo, LambdaPointers);
10553       }
10554       // We expect to have at least an element of information for this capture.
10555       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10556              "Non-existing map pointer for capture!");
10557       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10558              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10559              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10560              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10561              "Inconsistent map information sizes!");
10562 
10563       // If there is an entry in PartialStruct it means we have a struct with
10564       // individual members mapped. Emit an extra combined entry.
10565       if (PartialStruct.Base.isValid()) {
10566         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10567         MEHandler.emitCombinedEntry(
10568             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10569             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10570       }
10571 
10572       // We need to append the results of this capture to what we already have.
10573       CombinedInfo.append(CurInfo);
10574     }
10575     // Adjust MEMBER_OF flags for the lambdas captures.
10576     MEHandler.adjustMemberOfForLambdaCaptures(
10577         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10578         CombinedInfo.Types);
10579     // Map any list items in a map clause that were not captures because they
10580     // weren't referenced within the construct.
10581     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10582 
10583     TargetDataInfo Info;
10584     // Fill up the arrays and create the arguments.
10585     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10586     emitOffloadingArraysArgument(
10587         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10588         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10589         {/*ForEndCall=*/false});
10590 
10591     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10592     InputInfo.BasePointersArray =
10593         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10594     InputInfo.PointersArray =
10595         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10596     InputInfo.SizesArray =
10597         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10598     InputInfo.MappersArray =
10599         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10600     MapTypesArray = Info.MapTypesArray;
10601     MapNamesArray = Info.MapNamesArray;
10602     if (RequiresOuterTask)
10603       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10604     else
10605       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10606   };
10607 
10608   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10609                              CodeGenFunction &CGF, PrePostActionTy &) {
10610     if (RequiresOuterTask) {
10611       CodeGenFunction::OMPTargetDataInfo InputInfo;
10612       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10613     } else {
10614       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10615     }
10616   };
10617 
10618   // If we have a target function ID it means that we need to support
10619   // offloading, otherwise, just execute on the host. We need to execute on host
10620   // regardless of the conditional in the if clause if, e.g., the user do not
10621   // specify target triples.
10622   if (OutlinedFnID) {
10623     if (IfCond) {
10624       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10625     } else {
10626       RegionCodeGenTy ThenRCG(TargetThenGen);
10627       ThenRCG(CGF);
10628     }
10629   } else {
10630     RegionCodeGenTy ElseRCG(TargetElseGen);
10631     ElseRCG(CGF);
10632   }
10633 }
10634 
10635 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10636                                                     StringRef ParentName) {
10637   if (!S)
10638     return;
10639 
10640   // Codegen OMP target directives that offload compute to the device.
10641   bool RequiresDeviceCodegen =
10642       isa<OMPExecutableDirective>(S) &&
10643       isOpenMPTargetExecutionDirective(
10644           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10645 
10646   if (RequiresDeviceCodegen) {
10647     const auto &E = *cast<OMPExecutableDirective>(S);
10648     unsigned DeviceID;
10649     unsigned FileID;
10650     unsigned Line;
10651     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10652                              FileID, Line);
10653 
10654     // Is this a target region that should not be emitted as an entry point? If
10655     // so just signal we are done with this target region.
10656     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10657                                                             ParentName, Line))
10658       return;
10659 
10660     switch (E.getDirectiveKind()) {
10661     case OMPD_target:
10662       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10663                                                    cast<OMPTargetDirective>(E));
10664       break;
10665     case OMPD_target_parallel:
10666       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10667           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10668       break;
10669     case OMPD_target_teams:
10670       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10671           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10672       break;
10673     case OMPD_target_teams_distribute:
10674       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10675           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10676       break;
10677     case OMPD_target_teams_distribute_simd:
10678       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10679           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10680       break;
10681     case OMPD_target_parallel_for:
10682       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10683           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10684       break;
10685     case OMPD_target_parallel_for_simd:
10686       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10687           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10688       break;
10689     case OMPD_target_simd:
10690       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10691           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10692       break;
10693     case OMPD_target_teams_distribute_parallel_for:
10694       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10695           CGM, ParentName,
10696           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10697       break;
10698     case OMPD_target_teams_distribute_parallel_for_simd:
10699       CodeGenFunction::
10700           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10701               CGM, ParentName,
10702               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10703       break;
10704     case OMPD_parallel:
10705     case OMPD_for:
10706     case OMPD_parallel_for:
10707     case OMPD_parallel_master:
10708     case OMPD_parallel_sections:
10709     case OMPD_for_simd:
10710     case OMPD_parallel_for_simd:
10711     case OMPD_cancel:
10712     case OMPD_cancellation_point:
10713     case OMPD_ordered:
10714     case OMPD_threadprivate:
10715     case OMPD_allocate:
10716     case OMPD_task:
10717     case OMPD_simd:
10718     case OMPD_tile:
10719     case OMPD_unroll:
10720     case OMPD_sections:
10721     case OMPD_section:
10722     case OMPD_single:
10723     case OMPD_master:
10724     case OMPD_critical:
10725     case OMPD_taskyield:
10726     case OMPD_barrier:
10727     case OMPD_taskwait:
10728     case OMPD_taskgroup:
10729     case OMPD_atomic:
10730     case OMPD_flush:
10731     case OMPD_depobj:
10732     case OMPD_scan:
10733     case OMPD_teams:
10734     case OMPD_target_data:
10735     case OMPD_target_exit_data:
10736     case OMPD_target_enter_data:
10737     case OMPD_distribute:
10738     case OMPD_distribute_simd:
10739     case OMPD_distribute_parallel_for:
10740     case OMPD_distribute_parallel_for_simd:
10741     case OMPD_teams_distribute:
10742     case OMPD_teams_distribute_simd:
10743     case OMPD_teams_distribute_parallel_for:
10744     case OMPD_teams_distribute_parallel_for_simd:
10745     case OMPD_target_update:
10746     case OMPD_declare_simd:
10747     case OMPD_declare_variant:
10748     case OMPD_begin_declare_variant:
10749     case OMPD_end_declare_variant:
10750     case OMPD_declare_target:
10751     case OMPD_end_declare_target:
10752     case OMPD_declare_reduction:
10753     case OMPD_declare_mapper:
10754     case OMPD_taskloop:
10755     case OMPD_taskloop_simd:
10756     case OMPD_master_taskloop:
10757     case OMPD_master_taskloop_simd:
10758     case OMPD_parallel_master_taskloop:
10759     case OMPD_parallel_master_taskloop_simd:
10760     case OMPD_requires:
10761     case OMPD_metadirective:
10762     case OMPD_unknown:
10763     default:
10764       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10765     }
10766     return;
10767   }
10768 
10769   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10770     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10771       return;
10772 
10773     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10774     return;
10775   }
10776 
10777   // If this is a lambda function, look into its body.
10778   if (const auto *L = dyn_cast<LambdaExpr>(S))
10779     S = L->getBody();
10780 
10781   // Keep looking for target regions recursively.
10782   for (const Stmt *II : S->children())
10783     scanForTargetRegionsFunctions(II, ParentName);
10784 }
10785 
10786 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10787   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10788       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10789   if (!DevTy)
10790     return false;
10791   // Do not emit device_type(nohost) functions for the host.
10792   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10793     return true;
10794   // Do not emit device_type(host) functions for the device.
10795   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10796     return true;
10797   return false;
10798 }
10799 
10800 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10801   // If emitting code for the host, we do not process FD here. Instead we do
10802   // the normal code generation.
10803   if (!CGM.getLangOpts().OpenMPIsDevice) {
10804     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10805       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10806                                   CGM.getLangOpts().OpenMPIsDevice))
10807         return true;
10808     return false;
10809   }
10810 
10811   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10812   // Try to detect target regions in the function.
10813   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10814     StringRef Name = CGM.getMangledName(GD);
10815     scanForTargetRegionsFunctions(FD->getBody(), Name);
10816     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10817                                 CGM.getLangOpts().OpenMPIsDevice))
10818       return true;
10819   }
10820 
10821   // Do not to emit function if it is not marked as declare target.
10822   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10823          AlreadyEmittedTargetDecls.count(VD) == 0;
10824 }
10825 
10826 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10827   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10828                               CGM.getLangOpts().OpenMPIsDevice))
10829     return true;
10830 
10831   if (!CGM.getLangOpts().OpenMPIsDevice)
10832     return false;
10833 
10834   // Check if there are Ctors/Dtors in this declaration and look for target
10835   // regions in it. We use the complete variant to produce the kernel name
10836   // mangling.
10837   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10838   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10839     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10840       StringRef ParentName =
10841           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10842       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10843     }
10844     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10845       StringRef ParentName =
10846           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10847       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10848     }
10849   }
10850 
10851   // Do not to emit variable if it is not marked as declare target.
10852   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10853       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10854           cast<VarDecl>(GD.getDecl()));
10855   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10856       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10857        HasRequiresUnifiedSharedMemory)) {
10858     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10859     return true;
10860   }
10861   return false;
10862 }
10863 
10864 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10865                                                    llvm::Constant *Addr) {
10866   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10867       !CGM.getLangOpts().OpenMPIsDevice)
10868     return;
10869 
10870   // If we have host/nohost variables, they do not need to be registered.
10871   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10872       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10873   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10874     return;
10875 
10876   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10877       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10878   if (!Res) {
10879     if (CGM.getLangOpts().OpenMPIsDevice) {
10880       // Register non-target variables being emitted in device code (debug info
10881       // may cause this).
10882       StringRef VarName = CGM.getMangledName(VD);
10883       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10884     }
10885     return;
10886   }
10887   // Register declare target variables.
10888   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10889   StringRef VarName;
10890   CharUnits VarSize;
10891   llvm::GlobalValue::LinkageTypes Linkage;
10892 
10893   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10894       !HasRequiresUnifiedSharedMemory) {
10895     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10896     VarName = CGM.getMangledName(VD);
10897     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10898       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10899       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10900     } else {
10901       VarSize = CharUnits::Zero();
10902     }
10903     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10904     // Temp solution to prevent optimizations of the internal variables.
10905     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10906       // Do not create a "ref-variable" if the original is not also available
10907       // on the host.
10908       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10909         return;
10910       std::string RefName = getName({VarName, "ref"});
10911       if (!CGM.GetGlobalValue(RefName)) {
10912         llvm::Constant *AddrRef =
10913             getOrCreateInternalVariable(Addr->getType(), RefName);
10914         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10915         GVAddrRef->setConstant(/*Val=*/true);
10916         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10917         GVAddrRef->setInitializer(Addr);
10918         CGM.addCompilerUsedGlobal(GVAddrRef);
10919       }
10920     }
10921   } else {
10922     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10923             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10924              HasRequiresUnifiedSharedMemory)) &&
10925            "Declare target attribute must link or to with unified memory.");
10926     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10927       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10928     else
10929       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10930 
10931     if (CGM.getLangOpts().OpenMPIsDevice) {
10932       VarName = Addr->getName();
10933       Addr = nullptr;
10934     } else {
10935       VarName = getAddrOfDeclareTargetVar(VD).getName();
10936       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10937     }
10938     VarSize = CGM.getPointerSize();
10939     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10940   }
10941 
10942   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10943       VarName, Addr, VarSize, Flags, Linkage);
10944 }
10945 
10946 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10947   if (isa<FunctionDecl>(GD.getDecl()) ||
10948       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10949     return emitTargetFunctions(GD);
10950 
10951   return emitTargetGlobalVariable(GD);
10952 }
10953 
10954 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10955   for (const VarDecl *VD : DeferredGlobalVariables) {
10956     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10957         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10958     if (!Res)
10959       continue;
10960     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10961         !HasRequiresUnifiedSharedMemory) {
10962       CGM.EmitGlobal(VD);
10963     } else {
10964       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10965               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10966                HasRequiresUnifiedSharedMemory)) &&
10967              "Expected link clause or to clause with unified memory.");
10968       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10969     }
10970   }
10971 }
10972 
10973 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10974     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10975   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10976          " Expected target-based directive.");
10977 }
10978 
10979 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10980   for (const OMPClause *Clause : D->clauselists()) {
10981     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10982       HasRequiresUnifiedSharedMemory = true;
10983     } else if (const auto *AC =
10984                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10985       switch (AC->getAtomicDefaultMemOrderKind()) {
10986       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10987         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10988         break;
10989       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10990         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10991         break;
10992       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10993         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10994         break;
10995       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10996         break;
10997       }
10998     }
10999   }
11000 }
11001 
11002 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11003   return RequiresAtomicOrdering;
11004 }
11005 
11006 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11007                                                        LangAS &AS) {
11008   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11009     return false;
11010   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11011   switch(A->getAllocatorType()) {
11012   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11013   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11014   // Not supported, fallback to the default mem space.
11015   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11016   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11017   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11018   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11019   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11020   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11021   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11022     AS = LangAS::Default;
11023     return true;
11024   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11025     llvm_unreachable("Expected predefined allocator for the variables with the "
11026                      "static storage.");
11027   }
11028   return false;
11029 }
11030 
11031 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11032   return HasRequiresUnifiedSharedMemory;
11033 }
11034 
11035 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11036     CodeGenModule &CGM)
11037     : CGM(CGM) {
11038   if (CGM.getLangOpts().OpenMPIsDevice) {
11039     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11040     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11041   }
11042 }
11043 
11044 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11045   if (CGM.getLangOpts().OpenMPIsDevice)
11046     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11047 }
11048 
11049 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11050   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11051     return true;
11052 
11053   const auto *D = cast<FunctionDecl>(GD.getDecl());
11054   // Do not to emit function if it is marked as declare target as it was already
11055   // emitted.
11056   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11057     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11058       if (auto *F = dyn_cast_or_null<llvm::Function>(
11059               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11060         return !F->isDeclaration();
11061       return false;
11062     }
11063     return true;
11064   }
11065 
11066   return !AlreadyEmittedTargetDecls.insert(D).second;
11067 }
11068 
11069 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11070   // If we don't have entries or if we are emitting code for the device, we
11071   // don't need to do anything.
11072   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11073       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11074       (OffloadEntriesInfoManager.empty() &&
11075        !HasEmittedDeclareTargetRegion &&
11076        !HasEmittedTargetRegion))
11077     return nullptr;
11078 
11079   // Create and register the function that handles the requires directives.
11080   ASTContext &C = CGM.getContext();
11081 
11082   llvm::Function *RequiresRegFn;
11083   {
11084     CodeGenFunction CGF(CGM);
11085     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11086     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11087     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11088     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11089     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11090     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11091     // TODO: check for other requires clauses.
11092     // The requires directive takes effect only when a target region is
11093     // present in the compilation unit. Otherwise it is ignored and not
11094     // passed to the runtime. This avoids the runtime from throwing an error
11095     // for mismatching requires clauses across compilation units that don't
11096     // contain at least 1 target region.
11097     assert((HasEmittedTargetRegion ||
11098             HasEmittedDeclareTargetRegion ||
11099             !OffloadEntriesInfoManager.empty()) &&
11100            "Target or declare target region expected.");
11101     if (HasRequiresUnifiedSharedMemory)
11102       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11103     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11104                             CGM.getModule(), OMPRTL___tgt_register_requires),
11105                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11106     CGF.FinishFunction();
11107   }
11108   return RequiresRegFn;
11109 }
11110 
11111 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11112                                     const OMPExecutableDirective &D,
11113                                     SourceLocation Loc,
11114                                     llvm::Function *OutlinedFn,
11115                                     ArrayRef<llvm::Value *> CapturedVars) {
11116   if (!CGF.HaveInsertPoint())
11117     return;
11118 
11119   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11120   CodeGenFunction::RunCleanupsScope Scope(CGF);
11121 
11122   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11123   llvm::Value *Args[] = {
11124       RTLoc,
11125       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11126       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11127   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11128   RealArgs.append(std::begin(Args), std::end(Args));
11129   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11130 
11131   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11132       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11133   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11134 }
11135 
11136 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11137                                          const Expr *NumTeams,
11138                                          const Expr *ThreadLimit,
11139                                          SourceLocation Loc) {
11140   if (!CGF.HaveInsertPoint())
11141     return;
11142 
11143   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11144 
11145   llvm::Value *NumTeamsVal =
11146       NumTeams
11147           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11148                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11149           : CGF.Builder.getInt32(0);
11150 
11151   llvm::Value *ThreadLimitVal =
11152       ThreadLimit
11153           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11154                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11155           : CGF.Builder.getInt32(0);
11156 
11157   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11158   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11159                                      ThreadLimitVal};
11160   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11161                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11162                       PushNumTeamsArgs);
11163 }
11164 
11165 void CGOpenMPRuntime::emitTargetDataCalls(
11166     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11167     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11168   if (!CGF.HaveInsertPoint())
11169     return;
11170 
11171   // Action used to replace the default codegen action and turn privatization
11172   // off.
11173   PrePostActionTy NoPrivAction;
11174 
11175   // Generate the code for the opening of the data environment. Capture all the
11176   // arguments of the runtime call by reference because they are used in the
11177   // closing of the region.
11178   auto &&BeginThenGen = [this, &D, Device, &Info,
11179                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11180     // Fill up the arrays with all the mapped variables.
11181     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11182 
11183     // Get map clause information.
11184     MappableExprsHandler MEHandler(D, CGF);
11185     MEHandler.generateAllInfo(CombinedInfo);
11186 
11187     // Fill up the arrays and create the arguments.
11188     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11189                          /*IsNonContiguous=*/true);
11190 
11191     llvm::Value *BasePointersArrayArg = nullptr;
11192     llvm::Value *PointersArrayArg = nullptr;
11193     llvm::Value *SizesArrayArg = nullptr;
11194     llvm::Value *MapTypesArrayArg = nullptr;
11195     llvm::Value *MapNamesArrayArg = nullptr;
11196     llvm::Value *MappersArrayArg = nullptr;
11197     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11198                                  SizesArrayArg, MapTypesArrayArg,
11199                                  MapNamesArrayArg, MappersArrayArg, Info);
11200 
11201     // Emit device ID if any.
11202     llvm::Value *DeviceID = nullptr;
11203     if (Device) {
11204       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11205                                            CGF.Int64Ty, /*isSigned=*/true);
11206     } else {
11207       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11208     }
11209 
11210     // Emit the number of elements in the offloading arrays.
11211     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11212     //
11213     // Source location for the ident struct
11214     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11215 
11216     llvm::Value *OffloadingArgs[] = {RTLoc,
11217                                      DeviceID,
11218                                      PointerNum,
11219                                      BasePointersArrayArg,
11220                                      PointersArrayArg,
11221                                      SizesArrayArg,
11222                                      MapTypesArrayArg,
11223                                      MapNamesArrayArg,
11224                                      MappersArrayArg};
11225     CGF.EmitRuntimeCall(
11226         OMPBuilder.getOrCreateRuntimeFunction(
11227             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11228         OffloadingArgs);
11229 
11230     // If device pointer privatization is required, emit the body of the region
11231     // here. It will have to be duplicated: with and without privatization.
11232     if (!Info.CaptureDeviceAddrMap.empty())
11233       CodeGen(CGF);
11234   };
11235 
11236   // Generate code for the closing of the data region.
11237   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11238                                                 PrePostActionTy &) {
11239     assert(Info.isValid() && "Invalid data environment closing arguments.");
11240 
11241     llvm::Value *BasePointersArrayArg = nullptr;
11242     llvm::Value *PointersArrayArg = nullptr;
11243     llvm::Value *SizesArrayArg = nullptr;
11244     llvm::Value *MapTypesArrayArg = nullptr;
11245     llvm::Value *MapNamesArrayArg = nullptr;
11246     llvm::Value *MappersArrayArg = nullptr;
11247     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11248                                  SizesArrayArg, MapTypesArrayArg,
11249                                  MapNamesArrayArg, MappersArrayArg, Info,
11250                                  {/*ForEndCall=*/true});
11251 
11252     // Emit device ID if any.
11253     llvm::Value *DeviceID = nullptr;
11254     if (Device) {
11255       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11256                                            CGF.Int64Ty, /*isSigned=*/true);
11257     } else {
11258       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11259     }
11260 
11261     // Emit the number of elements in the offloading arrays.
11262     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11263 
11264     // Source location for the ident struct
11265     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11266 
11267     llvm::Value *OffloadingArgs[] = {RTLoc,
11268                                      DeviceID,
11269                                      PointerNum,
11270                                      BasePointersArrayArg,
11271                                      PointersArrayArg,
11272                                      SizesArrayArg,
11273                                      MapTypesArrayArg,
11274                                      MapNamesArrayArg,
11275                                      MappersArrayArg};
11276     CGF.EmitRuntimeCall(
11277         OMPBuilder.getOrCreateRuntimeFunction(
11278             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11279         OffloadingArgs);
11280   };
11281 
11282   // If we need device pointer privatization, we need to emit the body of the
11283   // region with no privatization in the 'else' branch of the conditional.
11284   // Otherwise, we don't have to do anything.
11285   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11286                                                          PrePostActionTy &) {
11287     if (!Info.CaptureDeviceAddrMap.empty()) {
11288       CodeGen.setAction(NoPrivAction);
11289       CodeGen(CGF);
11290     }
11291   };
11292 
11293   // We don't have to do anything to close the region if the if clause evaluates
11294   // to false.
11295   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11296 
11297   if (IfCond) {
11298     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11299   } else {
11300     RegionCodeGenTy RCG(BeginThenGen);
11301     RCG(CGF);
11302   }
11303 
11304   // If we don't require privatization of device pointers, we emit the body in
11305   // between the runtime calls. This avoids duplicating the body code.
11306   if (Info.CaptureDeviceAddrMap.empty()) {
11307     CodeGen.setAction(NoPrivAction);
11308     CodeGen(CGF);
11309   }
11310 
11311   if (IfCond) {
11312     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11313   } else {
11314     RegionCodeGenTy RCG(EndThenGen);
11315     RCG(CGF);
11316   }
11317 }
11318 
11319 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11320     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11321     const Expr *Device) {
11322   if (!CGF.HaveInsertPoint())
11323     return;
11324 
11325   assert((isa<OMPTargetEnterDataDirective>(D) ||
11326           isa<OMPTargetExitDataDirective>(D) ||
11327           isa<OMPTargetUpdateDirective>(D)) &&
11328          "Expecting either target enter, exit data, or update directives.");
11329 
11330   CodeGenFunction::OMPTargetDataInfo InputInfo;
11331   llvm::Value *MapTypesArray = nullptr;
11332   llvm::Value *MapNamesArray = nullptr;
11333   // Generate the code for the opening of the data environment.
11334   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11335                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11336     // Emit device ID if any.
11337     llvm::Value *DeviceID = nullptr;
11338     if (Device) {
11339       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11340                                            CGF.Int64Ty, /*isSigned=*/true);
11341     } else {
11342       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11343     }
11344 
11345     // Emit the number of elements in the offloading arrays.
11346     llvm::Constant *PointerNum =
11347         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11348 
11349     // Source location for the ident struct
11350     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11351 
11352     llvm::Value *OffloadingArgs[] = {RTLoc,
11353                                      DeviceID,
11354                                      PointerNum,
11355                                      InputInfo.BasePointersArray.getPointer(),
11356                                      InputInfo.PointersArray.getPointer(),
11357                                      InputInfo.SizesArray.getPointer(),
11358                                      MapTypesArray,
11359                                      MapNamesArray,
11360                                      InputInfo.MappersArray.getPointer()};
11361 
11362     // Select the right runtime function call for each standalone
11363     // directive.
11364     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11365     RuntimeFunction RTLFn;
11366     switch (D.getDirectiveKind()) {
11367     case OMPD_target_enter_data:
11368       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11369                         : OMPRTL___tgt_target_data_begin_mapper;
11370       break;
11371     case OMPD_target_exit_data:
11372       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11373                         : OMPRTL___tgt_target_data_end_mapper;
11374       break;
11375     case OMPD_target_update:
11376       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11377                         : OMPRTL___tgt_target_data_update_mapper;
11378       break;
11379     case OMPD_parallel:
11380     case OMPD_for:
11381     case OMPD_parallel_for:
11382     case OMPD_parallel_master:
11383     case OMPD_parallel_sections:
11384     case OMPD_for_simd:
11385     case OMPD_parallel_for_simd:
11386     case OMPD_cancel:
11387     case OMPD_cancellation_point:
11388     case OMPD_ordered:
11389     case OMPD_threadprivate:
11390     case OMPD_allocate:
11391     case OMPD_task:
11392     case OMPD_simd:
11393     case OMPD_tile:
11394     case OMPD_unroll:
11395     case OMPD_sections:
11396     case OMPD_section:
11397     case OMPD_single:
11398     case OMPD_master:
11399     case OMPD_critical:
11400     case OMPD_taskyield:
11401     case OMPD_barrier:
11402     case OMPD_taskwait:
11403     case OMPD_taskgroup:
11404     case OMPD_atomic:
11405     case OMPD_flush:
11406     case OMPD_depobj:
11407     case OMPD_scan:
11408     case OMPD_teams:
11409     case OMPD_target_data:
11410     case OMPD_distribute:
11411     case OMPD_distribute_simd:
11412     case OMPD_distribute_parallel_for:
11413     case OMPD_distribute_parallel_for_simd:
11414     case OMPD_teams_distribute:
11415     case OMPD_teams_distribute_simd:
11416     case OMPD_teams_distribute_parallel_for:
11417     case OMPD_teams_distribute_parallel_for_simd:
11418     case OMPD_declare_simd:
11419     case OMPD_declare_variant:
11420     case OMPD_begin_declare_variant:
11421     case OMPD_end_declare_variant:
11422     case OMPD_declare_target:
11423     case OMPD_end_declare_target:
11424     case OMPD_declare_reduction:
11425     case OMPD_declare_mapper:
11426     case OMPD_taskloop:
11427     case OMPD_taskloop_simd:
11428     case OMPD_master_taskloop:
11429     case OMPD_master_taskloop_simd:
11430     case OMPD_parallel_master_taskloop:
11431     case OMPD_parallel_master_taskloop_simd:
11432     case OMPD_target:
11433     case OMPD_target_simd:
11434     case OMPD_target_teams_distribute:
11435     case OMPD_target_teams_distribute_simd:
11436     case OMPD_target_teams_distribute_parallel_for:
11437     case OMPD_target_teams_distribute_parallel_for_simd:
11438     case OMPD_target_teams:
11439     case OMPD_target_parallel:
11440     case OMPD_target_parallel_for:
11441     case OMPD_target_parallel_for_simd:
11442     case OMPD_requires:
11443     case OMPD_metadirective:
11444     case OMPD_unknown:
11445     default:
11446       llvm_unreachable("Unexpected standalone target data directive.");
11447       break;
11448     }
11449     CGF.EmitRuntimeCall(
11450         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11451         OffloadingArgs);
11452   };
11453 
11454   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11455                           &MapNamesArray](CodeGenFunction &CGF,
11456                                           PrePostActionTy &) {
11457     // Fill up the arrays with all the mapped variables.
11458     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11459 
11460     // Get map clause information.
11461     MappableExprsHandler MEHandler(D, CGF);
11462     MEHandler.generateAllInfo(CombinedInfo);
11463 
11464     TargetDataInfo Info;
11465     // Fill up the arrays and create the arguments.
11466     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11467                          /*IsNonContiguous=*/true);
11468     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11469                              D.hasClausesOfKind<OMPNowaitClause>();
11470     emitOffloadingArraysArgument(
11471         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11472         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11473         {/*ForEndCall=*/false});
11474     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11475     InputInfo.BasePointersArray =
11476         Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11477     InputInfo.PointersArray =
11478         Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11479     InputInfo.SizesArray =
11480         Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11481     InputInfo.MappersArray =
11482         Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11483     MapTypesArray = Info.MapTypesArray;
11484     MapNamesArray = Info.MapNamesArray;
11485     if (RequiresOuterTask)
11486       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11487     else
11488       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11489   };
11490 
11491   if (IfCond) {
11492     emitIfClause(CGF, IfCond, TargetThenGen,
11493                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11494   } else {
11495     RegionCodeGenTy ThenRCG(TargetThenGen);
11496     ThenRCG(CGF);
11497   }
11498 }
11499 
11500 namespace {
11501   /// Kind of parameter in a function with 'declare simd' directive.
11502   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11503   /// Attribute set of the parameter.
11504   struct ParamAttrTy {
11505     ParamKindTy Kind = Vector;
11506     llvm::APSInt StrideOrArg;
11507     llvm::APSInt Alignment;
11508   };
11509 } // namespace
11510 
11511 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11512                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11513   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11514   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11515   // of that clause. The VLEN value must be power of 2.
11516   // In other case the notion of the function`s "characteristic data type" (CDT)
11517   // is used to compute the vector length.
11518   // CDT is defined in the following order:
11519   //   a) For non-void function, the CDT is the return type.
11520   //   b) If the function has any non-uniform, non-linear parameters, then the
11521   //   CDT is the type of the first such parameter.
11522   //   c) If the CDT determined by a) or b) above is struct, union, or class
11523   //   type which is pass-by-value (except for the type that maps to the
11524   //   built-in complex data type), the characteristic data type is int.
11525   //   d) If none of the above three cases is applicable, the CDT is int.
11526   // The VLEN is then determined based on the CDT and the size of vector
11527   // register of that ISA for which current vector version is generated. The
11528   // VLEN is computed using the formula below:
11529   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11530   // where vector register size specified in section 3.2.1 Registers and the
11531   // Stack Frame of original AMD64 ABI document.
11532   QualType RetType = FD->getReturnType();
11533   if (RetType.isNull())
11534     return 0;
11535   ASTContext &C = FD->getASTContext();
11536   QualType CDT;
11537   if (!RetType.isNull() && !RetType->isVoidType()) {
11538     CDT = RetType;
11539   } else {
11540     unsigned Offset = 0;
11541     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11542       if (ParamAttrs[Offset].Kind == Vector)
11543         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11544       ++Offset;
11545     }
11546     if (CDT.isNull()) {
11547       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11548         if (ParamAttrs[I + Offset].Kind == Vector) {
11549           CDT = FD->getParamDecl(I)->getType();
11550           break;
11551         }
11552       }
11553     }
11554   }
11555   if (CDT.isNull())
11556     CDT = C.IntTy;
11557   CDT = CDT->getCanonicalTypeUnqualified();
11558   if (CDT->isRecordType() || CDT->isUnionType())
11559     CDT = C.IntTy;
11560   return C.getTypeSize(CDT);
11561 }
11562 
11563 static void
11564 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11565                            const llvm::APSInt &VLENVal,
11566                            ArrayRef<ParamAttrTy> ParamAttrs,
11567                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11568   struct ISADataTy {
11569     char ISA;
11570     unsigned VecRegSize;
11571   };
11572   ISADataTy ISAData[] = {
11573       {
11574           'b', 128
11575       }, // SSE
11576       {
11577           'c', 256
11578       }, // AVX
11579       {
11580           'd', 256
11581       }, // AVX2
11582       {
11583           'e', 512
11584       }, // AVX512
11585   };
11586   llvm::SmallVector<char, 2> Masked;
11587   switch (State) {
11588   case OMPDeclareSimdDeclAttr::BS_Undefined:
11589     Masked.push_back('N');
11590     Masked.push_back('M');
11591     break;
11592   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11593     Masked.push_back('N');
11594     break;
11595   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11596     Masked.push_back('M');
11597     break;
11598   }
11599   for (char Mask : Masked) {
11600     for (const ISADataTy &Data : ISAData) {
11601       SmallString<256> Buffer;
11602       llvm::raw_svector_ostream Out(Buffer);
11603       Out << "_ZGV" << Data.ISA << Mask;
11604       if (!VLENVal) {
11605         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11606         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11607         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11608       } else {
11609         Out << VLENVal;
11610       }
11611       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11612         switch (ParamAttr.Kind){
11613         case LinearWithVarStride:
11614           Out << 's' << ParamAttr.StrideOrArg;
11615           break;
11616         case Linear:
11617           Out << 'l';
11618           if (ParamAttr.StrideOrArg != 1)
11619             Out << ParamAttr.StrideOrArg;
11620           break;
11621         case Uniform:
11622           Out << 'u';
11623           break;
11624         case Vector:
11625           Out << 'v';
11626           break;
11627         }
11628         if (!!ParamAttr.Alignment)
11629           Out << 'a' << ParamAttr.Alignment;
11630       }
11631       Out << '_' << Fn->getName();
11632       Fn->addFnAttr(Out.str());
11633     }
11634   }
11635 }
11636 
11637 // This are the Functions that are needed to mangle the name of the
11638 // vector functions generated by the compiler, according to the rules
11639 // defined in the "Vector Function ABI specifications for AArch64",
11640 // available at
11641 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11642 
11643 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11644 ///
11645 /// TODO: Need to implement the behavior for reference marked with a
11646 /// var or no linear modifiers (1.b in the section). For this, we
11647 /// need to extend ParamKindTy to support the linear modifiers.
11648 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11649   QT = QT.getCanonicalType();
11650 
11651   if (QT->isVoidType())
11652     return false;
11653 
11654   if (Kind == ParamKindTy::Uniform)
11655     return false;
11656 
11657   if (Kind == ParamKindTy::Linear)
11658     return false;
11659 
11660   // TODO: Handle linear references with modifiers
11661 
11662   if (Kind == ParamKindTy::LinearWithVarStride)
11663     return false;
11664 
11665   return true;
11666 }
11667 
11668 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11669 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11670   QT = QT.getCanonicalType();
11671   unsigned Size = C.getTypeSize(QT);
11672 
11673   // Only scalars and complex within 16 bytes wide set PVB to true.
11674   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11675     return false;
11676 
11677   if (QT->isFloatingType())
11678     return true;
11679 
11680   if (QT->isIntegerType())
11681     return true;
11682 
11683   if (QT->isPointerType())
11684     return true;
11685 
11686   // TODO: Add support for complex types (section 3.1.2, item 2).
11687 
11688   return false;
11689 }
11690 
11691 /// Computes the lane size (LS) of a return type or of an input parameter,
11692 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11693 /// TODO: Add support for references, section 3.2.1, item 1.
11694 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11695   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11696     QualType PTy = QT.getCanonicalType()->getPointeeType();
11697     if (getAArch64PBV(PTy, C))
11698       return C.getTypeSize(PTy);
11699   }
11700   if (getAArch64PBV(QT, C))
11701     return C.getTypeSize(QT);
11702 
11703   return C.getTypeSize(C.getUIntPtrType());
11704 }
11705 
11706 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11707 // signature of the scalar function, as defined in 3.2.2 of the
11708 // AAVFABI.
11709 static std::tuple<unsigned, unsigned, bool>
11710 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11711   QualType RetType = FD->getReturnType().getCanonicalType();
11712 
11713   ASTContext &C = FD->getASTContext();
11714 
11715   bool OutputBecomesInput = false;
11716 
11717   llvm::SmallVector<unsigned, 8> Sizes;
11718   if (!RetType->isVoidType()) {
11719     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11720     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11721       OutputBecomesInput = true;
11722   }
11723   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11724     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11725     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11726   }
11727 
11728   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11729   // The LS of a function parameter / return value can only be a power
11730   // of 2, starting from 8 bits, up to 128.
11731   assert(llvm::all_of(Sizes,
11732                       [](unsigned Size) {
11733                         return Size == 8 || Size == 16 || Size == 32 ||
11734                                Size == 64 || Size == 128;
11735                       }) &&
11736          "Invalid size");
11737 
11738   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11739                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11740                          OutputBecomesInput);
11741 }
11742 
11743 /// Mangle the parameter part of the vector function name according to
11744 /// their OpenMP classification. The mangling function is defined in
11745 /// section 3.5 of the AAVFABI.
11746 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11747   SmallString<256> Buffer;
11748   llvm::raw_svector_ostream Out(Buffer);
11749   for (const auto &ParamAttr : ParamAttrs) {
11750     switch (ParamAttr.Kind) {
11751     case LinearWithVarStride:
11752       Out << "ls" << ParamAttr.StrideOrArg;
11753       break;
11754     case Linear:
11755       Out << 'l';
11756       // Don't print the step value if it is not present or if it is
11757       // equal to 1.
11758       if (ParamAttr.StrideOrArg != 1)
11759         Out << ParamAttr.StrideOrArg;
11760       break;
11761     case Uniform:
11762       Out << 'u';
11763       break;
11764     case Vector:
11765       Out << 'v';
11766       break;
11767     }
11768 
11769     if (!!ParamAttr.Alignment)
11770       Out << 'a' << ParamAttr.Alignment;
11771   }
11772 
11773   return std::string(Out.str());
11774 }
11775 
11776 // Function used to add the attribute. The parameter `VLEN` is
11777 // templated to allow the use of "x" when targeting scalable functions
11778 // for SVE.
11779 template <typename T>
11780 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11781                                  char ISA, StringRef ParSeq,
11782                                  StringRef MangledName, bool OutputBecomesInput,
11783                                  llvm::Function *Fn) {
11784   SmallString<256> Buffer;
11785   llvm::raw_svector_ostream Out(Buffer);
11786   Out << Prefix << ISA << LMask << VLEN;
11787   if (OutputBecomesInput)
11788     Out << "v";
11789   Out << ParSeq << "_" << MangledName;
11790   Fn->addFnAttr(Out.str());
11791 }
11792 
11793 // Helper function to generate the Advanced SIMD names depending on
11794 // the value of the NDS when simdlen is not present.
11795 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11796                                       StringRef Prefix, char ISA,
11797                                       StringRef ParSeq, StringRef MangledName,
11798                                       bool OutputBecomesInput,
11799                                       llvm::Function *Fn) {
11800   switch (NDS) {
11801   case 8:
11802     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11803                          OutputBecomesInput, Fn);
11804     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11805                          OutputBecomesInput, Fn);
11806     break;
11807   case 16:
11808     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11809                          OutputBecomesInput, Fn);
11810     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11811                          OutputBecomesInput, Fn);
11812     break;
11813   case 32:
11814     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11815                          OutputBecomesInput, Fn);
11816     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11817                          OutputBecomesInput, Fn);
11818     break;
11819   case 64:
11820   case 128:
11821     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11822                          OutputBecomesInput, Fn);
11823     break;
11824   default:
11825     llvm_unreachable("Scalar type is too wide.");
11826   }
11827 }
11828 
11829 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11830 static void emitAArch64DeclareSimdFunction(
11831     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11832     ArrayRef<ParamAttrTy> ParamAttrs,
11833     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11834     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11835 
11836   // Get basic data for building the vector signature.
11837   const auto Data = getNDSWDS(FD, ParamAttrs);
11838   const unsigned NDS = std::get<0>(Data);
11839   const unsigned WDS = std::get<1>(Data);
11840   const bool OutputBecomesInput = std::get<2>(Data);
11841 
11842   // Check the values provided via `simdlen` by the user.
11843   // 1. A `simdlen(1)` doesn't produce vector signatures,
11844   if (UserVLEN == 1) {
11845     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11846         DiagnosticsEngine::Warning,
11847         "The clause simdlen(1) has no effect when targeting aarch64.");
11848     CGM.getDiags().Report(SLoc, DiagID);
11849     return;
11850   }
11851 
11852   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11853   // Advanced SIMD output.
11854   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11855     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11856         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11857                                     "power of 2 when targeting Advanced SIMD.");
11858     CGM.getDiags().Report(SLoc, DiagID);
11859     return;
11860   }
11861 
11862   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11863   // limits.
11864   if (ISA == 's' && UserVLEN != 0) {
11865     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11866       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11867           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11868                                       "lanes in the architectural constraints "
11869                                       "for SVE (min is 128-bit, max is "
11870                                       "2048-bit, by steps of 128-bit)");
11871       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11872       return;
11873     }
11874   }
11875 
11876   // Sort out parameter sequence.
11877   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11878   StringRef Prefix = "_ZGV";
11879   // Generate simdlen from user input (if any).
11880   if (UserVLEN) {
11881     if (ISA == 's') {
11882       // SVE generates only a masked function.
11883       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11884                            OutputBecomesInput, Fn);
11885     } else {
11886       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11887       // Advanced SIMD generates one or two functions, depending on
11888       // the `[not]inbranch` clause.
11889       switch (State) {
11890       case OMPDeclareSimdDeclAttr::BS_Undefined:
11891         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11892                              OutputBecomesInput, Fn);
11893         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11894                              OutputBecomesInput, Fn);
11895         break;
11896       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11897         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11898                              OutputBecomesInput, Fn);
11899         break;
11900       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11901         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11902                              OutputBecomesInput, Fn);
11903         break;
11904       }
11905     }
11906   } else {
11907     // If no user simdlen is provided, follow the AAVFABI rules for
11908     // generating the vector length.
11909     if (ISA == 's') {
11910       // SVE, section 3.4.1, item 1.
11911       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11912                            OutputBecomesInput, Fn);
11913     } else {
11914       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11915       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11916       // two vector names depending on the use of the clause
11917       // `[not]inbranch`.
11918       switch (State) {
11919       case OMPDeclareSimdDeclAttr::BS_Undefined:
11920         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11921                                   OutputBecomesInput, Fn);
11922         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11923                                   OutputBecomesInput, Fn);
11924         break;
11925       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11926         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11927                                   OutputBecomesInput, Fn);
11928         break;
11929       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11930         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11931                                   OutputBecomesInput, Fn);
11932         break;
11933       }
11934     }
11935   }
11936 }
11937 
11938 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11939                                               llvm::Function *Fn) {
11940   ASTContext &C = CGM.getContext();
11941   FD = FD->getMostRecentDecl();
11942   while (FD) {
11943     // Map params to their positions in function decl.
11944     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11945     if (isa<CXXMethodDecl>(FD))
11946       ParamPositions.try_emplace(FD, 0);
11947     unsigned ParamPos = ParamPositions.size();
11948     for (const ParmVarDecl *P : FD->parameters()) {
11949       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11950       ++ParamPos;
11951     }
11952     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11953       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11954       // Mark uniform parameters.
11955       for (const Expr *E : Attr->uniforms()) {
11956         E = E->IgnoreParenImpCasts();
11957         unsigned Pos;
11958         if (isa<CXXThisExpr>(E)) {
11959           Pos = ParamPositions[FD];
11960         } else {
11961           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11962                                 ->getCanonicalDecl();
11963           auto It = ParamPositions.find(PVD);
11964           assert(It != ParamPositions.end() && "Function parameter not found");
11965           Pos = It->second;
11966         }
11967         ParamAttrs[Pos].Kind = Uniform;
11968       }
11969       // Get alignment info.
11970       auto *NI = Attr->alignments_begin();
11971       for (const Expr *E : Attr->aligneds()) {
11972         E = E->IgnoreParenImpCasts();
11973         unsigned Pos;
11974         QualType ParmTy;
11975         if (isa<CXXThisExpr>(E)) {
11976           Pos = ParamPositions[FD];
11977           ParmTy = E->getType();
11978         } else {
11979           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11980                                 ->getCanonicalDecl();
11981           auto It = ParamPositions.find(PVD);
11982           assert(It != ParamPositions.end() && "Function parameter not found");
11983           Pos = It->second;
11984           ParmTy = PVD->getType();
11985         }
11986         ParamAttrs[Pos].Alignment =
11987             (*NI)
11988                 ? (*NI)->EvaluateKnownConstInt(C)
11989                 : llvm::APSInt::getUnsigned(
11990                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11991                           .getQuantity());
11992         ++NI;
11993       }
11994       // Mark linear parameters.
11995       auto *SI = Attr->steps_begin();
11996       for (const Expr *E : Attr->linears()) {
11997         E = E->IgnoreParenImpCasts();
11998         unsigned Pos;
11999         // Rescaling factor needed to compute the linear parameter
12000         // value in the mangled name.
12001         unsigned PtrRescalingFactor = 1;
12002         if (isa<CXXThisExpr>(E)) {
12003           Pos = ParamPositions[FD];
12004         } else {
12005           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12006                                 ->getCanonicalDecl();
12007           auto It = ParamPositions.find(PVD);
12008           assert(It != ParamPositions.end() && "Function parameter not found");
12009           Pos = It->second;
12010           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12011             PtrRescalingFactor = CGM.getContext()
12012                                      .getTypeSizeInChars(P->getPointeeType())
12013                                      .getQuantity();
12014         }
12015         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12016         ParamAttr.Kind = Linear;
12017         // Assuming a stride of 1, for `linear` without modifiers.
12018         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12019         if (*SI) {
12020           Expr::EvalResult Result;
12021           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12022             if (const auto *DRE =
12023                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12024               if (const auto *StridePVD =
12025                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12026                 ParamAttr.Kind = LinearWithVarStride;
12027                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12028                 assert(It != ParamPositions.end() &&
12029                        "Function parameter not found");
12030                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12031               }
12032             }
12033           } else {
12034             ParamAttr.StrideOrArg = Result.Val.getInt();
12035           }
12036         }
12037         // If we are using a linear clause on a pointer, we need to
12038         // rescale the value of linear_step with the byte size of the
12039         // pointee type.
12040         if (Linear == ParamAttr.Kind)
12041           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12042         ++SI;
12043       }
12044       llvm::APSInt VLENVal;
12045       SourceLocation ExprLoc;
12046       const Expr *VLENExpr = Attr->getSimdlen();
12047       if (VLENExpr) {
12048         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12049         ExprLoc = VLENExpr->getExprLoc();
12050       }
12051       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12052       if (CGM.getTriple().isX86()) {
12053         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12054       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12055         unsigned VLEN = VLENVal.getExtValue();
12056         StringRef MangledName = Fn->getName();
12057         if (CGM.getTarget().hasFeature("sve"))
12058           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12059                                          MangledName, 's', 128, Fn, ExprLoc);
12060         if (CGM.getTarget().hasFeature("neon"))
12061           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12062                                          MangledName, 'n', 128, Fn, ExprLoc);
12063       }
12064     }
12065     FD = FD->getPreviousDecl();
12066   }
12067 }
12068 
12069 namespace {
12070 /// Cleanup action for doacross support.
12071 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12072 public:
12073   static const int DoacrossFinArgs = 2;
12074 
12075 private:
12076   llvm::FunctionCallee RTLFn;
12077   llvm::Value *Args[DoacrossFinArgs];
12078 
12079 public:
12080   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12081                     ArrayRef<llvm::Value *> CallArgs)
12082       : RTLFn(RTLFn) {
12083     assert(CallArgs.size() == DoacrossFinArgs);
12084     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12085   }
12086   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12087     if (!CGF.HaveInsertPoint())
12088       return;
12089     CGF.EmitRuntimeCall(RTLFn, Args);
12090   }
12091 };
12092 } // namespace
12093 
12094 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12095                                        const OMPLoopDirective &D,
12096                                        ArrayRef<Expr *> NumIterations) {
12097   if (!CGF.HaveInsertPoint())
12098     return;
12099 
12100   ASTContext &C = CGM.getContext();
12101   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12102   RecordDecl *RD;
12103   if (KmpDimTy.isNull()) {
12104     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12105     //  kmp_int64 lo; // lower
12106     //  kmp_int64 up; // upper
12107     //  kmp_int64 st; // stride
12108     // };
12109     RD = C.buildImplicitRecord("kmp_dim");
12110     RD->startDefinition();
12111     addFieldToRecordDecl(C, RD, Int64Ty);
12112     addFieldToRecordDecl(C, RD, Int64Ty);
12113     addFieldToRecordDecl(C, RD, Int64Ty);
12114     RD->completeDefinition();
12115     KmpDimTy = C.getRecordType(RD);
12116   } else {
12117     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12118   }
12119   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12120   QualType ArrayTy =
12121       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12122 
12123   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12124   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12125   enum { LowerFD = 0, UpperFD, StrideFD };
12126   // Fill dims with data.
12127   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12128     LValue DimsLVal = CGF.MakeAddrLValue(
12129         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12130     // dims.upper = num_iterations;
12131     LValue UpperLVal = CGF.EmitLValueForField(
12132         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12133     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12134         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12135         Int64Ty, NumIterations[I]->getExprLoc());
12136     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12137     // dims.stride = 1;
12138     LValue StrideLVal = CGF.EmitLValueForField(
12139         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12140     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12141                           StrideLVal);
12142   }
12143 
12144   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12145   // kmp_int32 num_dims, struct kmp_dim * dims);
12146   llvm::Value *Args[] = {
12147       emitUpdateLocation(CGF, D.getBeginLoc()),
12148       getThreadID(CGF, D.getBeginLoc()),
12149       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12150       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12151           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12152           CGM.VoidPtrTy)};
12153 
12154   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12155       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12156   CGF.EmitRuntimeCall(RTLFn, Args);
12157   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12158       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12159   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12160       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12161   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12162                                              llvm::makeArrayRef(FiniArgs));
12163 }
12164 
12165 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12166                                           const OMPDependClause *C) {
12167   QualType Int64Ty =
12168       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12169   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12170   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12171       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12172   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12173   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12174     const Expr *CounterVal = C->getLoopData(I);
12175     assert(CounterVal);
12176     llvm::Value *CntVal = CGF.EmitScalarConversion(
12177         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12178         CounterVal->getExprLoc());
12179     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12180                           /*Volatile=*/false, Int64Ty);
12181   }
12182   llvm::Value *Args[] = {
12183       emitUpdateLocation(CGF, C->getBeginLoc()),
12184       getThreadID(CGF, C->getBeginLoc()),
12185       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12186   llvm::FunctionCallee RTLFn;
12187   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12188     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12189                                                   OMPRTL___kmpc_doacross_post);
12190   } else {
12191     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12192     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12193                                                   OMPRTL___kmpc_doacross_wait);
12194   }
12195   CGF.EmitRuntimeCall(RTLFn, Args);
12196 }
12197 
12198 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12199                                llvm::FunctionCallee Callee,
12200                                ArrayRef<llvm::Value *> Args) const {
12201   assert(Loc.isValid() && "Outlined function call location must be valid.");
12202   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12203 
12204   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12205     if (Fn->doesNotThrow()) {
12206       CGF.EmitNounwindRuntimeCall(Fn, Args);
12207       return;
12208     }
12209   }
12210   CGF.EmitRuntimeCall(Callee, Args);
12211 }
12212 
12213 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12214     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12215     ArrayRef<llvm::Value *> Args) const {
12216   emitCall(CGF, Loc, OutlinedFn, Args);
12217 }
12218 
12219 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12220   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12221     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12222       HasEmittedDeclareTargetRegion = true;
12223 }
12224 
12225 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12226                                              const VarDecl *NativeParam,
12227                                              const VarDecl *TargetParam) const {
12228   return CGF.GetAddrOfLocalVar(NativeParam);
12229 }
12230 
12231 /// Return allocator value from expression, or return a null allocator (default
12232 /// when no allocator specified).
12233 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12234                                     const Expr *Allocator) {
12235   llvm::Value *AllocVal;
12236   if (Allocator) {
12237     AllocVal = CGF.EmitScalarExpr(Allocator);
12238     // According to the standard, the original allocator type is a enum
12239     // (integer). Convert to pointer type, if required.
12240     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12241                                         CGF.getContext().VoidPtrTy,
12242                                         Allocator->getExprLoc());
12243   } else {
12244     // If no allocator specified, it defaults to the null allocator.
12245     AllocVal = llvm::Constant::getNullValue(
12246         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12247   }
12248   return AllocVal;
12249 }
12250 
12251 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12252                                                    const VarDecl *VD) {
12253   if (!VD)
12254     return Address::invalid();
12255   Address UntiedAddr = Address::invalid();
12256   Address UntiedRealAddr = Address::invalid();
12257   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12258   if (It != FunctionToUntiedTaskStackMap.end()) {
12259     const UntiedLocalVarsAddressesMap &UntiedData =
12260         UntiedLocalVarsStack[It->second];
12261     auto I = UntiedData.find(VD);
12262     if (I != UntiedData.end()) {
12263       UntiedAddr = I->second.first;
12264       UntiedRealAddr = I->second.second;
12265     }
12266   }
12267   const VarDecl *CVD = VD->getCanonicalDecl();
12268   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12269     // Use the default allocation.
12270     if (!isAllocatableDecl(VD))
12271       return UntiedAddr;
12272     llvm::Value *Size;
12273     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12274     if (CVD->getType()->isVariablyModifiedType()) {
12275       Size = CGF.getTypeSize(CVD->getType());
12276       // Align the size: ((size + align - 1) / align) * align
12277       Size = CGF.Builder.CreateNUWAdd(
12278           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12279       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12280       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12281     } else {
12282       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12283       Size = CGM.getSize(Sz.alignTo(Align));
12284     }
12285     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12286     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12287     const Expr *Allocator = AA->getAllocator();
12288     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12289     llvm::Value *Alignment =
12290         AA->getAlignment()
12291             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12292                                         CGM.SizeTy, /*isSigned=*/false)
12293             : nullptr;
12294     SmallVector<llvm::Value *, 4> Args;
12295     Args.push_back(ThreadID);
12296     if (Alignment)
12297       Args.push_back(Alignment);
12298     Args.push_back(Size);
12299     Args.push_back(AllocVal);
12300     llvm::omp::RuntimeFunction FnID =
12301         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12302     llvm::Value *Addr = CGF.EmitRuntimeCall(
12303         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12304         getName({CVD->getName(), ".void.addr"}));
12305     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12306         CGM.getModule(), OMPRTL___kmpc_free);
12307     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12308     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12309         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12310     if (UntiedAddr.isValid())
12311       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12312 
12313     // Cleanup action for allocate support.
12314     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12315       llvm::FunctionCallee RTLFn;
12316       SourceLocation::UIntTy LocEncoding;
12317       Address Addr;
12318       const Expr *AllocExpr;
12319 
12320     public:
12321       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12322                            SourceLocation::UIntTy LocEncoding, Address Addr,
12323                            const Expr *AllocExpr)
12324           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12325             AllocExpr(AllocExpr) {}
12326       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12327         if (!CGF.HaveInsertPoint())
12328           return;
12329         llvm::Value *Args[3];
12330         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12331             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12332         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12333             Addr.getPointer(), CGF.VoidPtrTy);
12334         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12335         Args[2] = AllocVal;
12336         CGF.EmitRuntimeCall(RTLFn, Args);
12337       }
12338     };
12339     Address VDAddr =
12340         UntiedRealAddr.isValid()
12341             ? UntiedRealAddr
12342             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12343     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12344         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12345         VDAddr, Allocator);
12346     if (UntiedRealAddr.isValid())
12347       if (auto *Region =
12348               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12349         Region->emitUntiedSwitch(CGF);
12350     return VDAddr;
12351   }
12352   return UntiedAddr;
12353 }
12354 
12355 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12356                                              const VarDecl *VD) const {
12357   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12358   if (It == FunctionToUntiedTaskStackMap.end())
12359     return false;
12360   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12361 }
12362 
12363 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12364     CodeGenModule &CGM, const OMPLoopDirective &S)
12365     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12366   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12367   if (!NeedToPush)
12368     return;
12369   NontemporalDeclsSet &DS =
12370       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12371   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12372     for (const Stmt *Ref : C->private_refs()) {
12373       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12374       const ValueDecl *VD;
12375       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12376         VD = DRE->getDecl();
12377       } else {
12378         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12379         assert((ME->isImplicitCXXThis() ||
12380                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12381                "Expected member of current class.");
12382         VD = ME->getMemberDecl();
12383       }
12384       DS.insert(VD);
12385     }
12386   }
12387 }
12388 
12389 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12390   if (!NeedToPush)
12391     return;
12392   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12393 }
12394 
12395 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12396     CodeGenFunction &CGF,
12397     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12398                           std::pair<Address, Address>> &LocalVars)
12399     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12400   if (!NeedToPush)
12401     return;
12402   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12403       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12404   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12405 }
12406 
12407 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12408   if (!NeedToPush)
12409     return;
12410   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12411 }
12412 
12413 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12414   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12415 
12416   return llvm::any_of(
12417       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12418       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12419 }
12420 
12421 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12422     const OMPExecutableDirective &S,
12423     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12424     const {
12425   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12426   // Vars in target/task regions must be excluded completely.
12427   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12428       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12429     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12430     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12431     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12432     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12433       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12434         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12435     }
12436   }
12437   // Exclude vars in private clauses.
12438   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12439     for (const Expr *Ref : C->varlists()) {
12440       if (!Ref->getType()->isScalarType())
12441         continue;
12442       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12443       if (!DRE)
12444         continue;
12445       NeedToCheckForLPCs.insert(DRE->getDecl());
12446     }
12447   }
12448   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12449     for (const Expr *Ref : C->varlists()) {
12450       if (!Ref->getType()->isScalarType())
12451         continue;
12452       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12453       if (!DRE)
12454         continue;
12455       NeedToCheckForLPCs.insert(DRE->getDecl());
12456     }
12457   }
12458   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12459     for (const Expr *Ref : C->varlists()) {
12460       if (!Ref->getType()->isScalarType())
12461         continue;
12462       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12463       if (!DRE)
12464         continue;
12465       NeedToCheckForLPCs.insert(DRE->getDecl());
12466     }
12467   }
12468   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12469     for (const Expr *Ref : C->varlists()) {
12470       if (!Ref->getType()->isScalarType())
12471         continue;
12472       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12473       if (!DRE)
12474         continue;
12475       NeedToCheckForLPCs.insert(DRE->getDecl());
12476     }
12477   }
12478   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12479     for (const Expr *Ref : C->varlists()) {
12480       if (!Ref->getType()->isScalarType())
12481         continue;
12482       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12483       if (!DRE)
12484         continue;
12485       NeedToCheckForLPCs.insert(DRE->getDecl());
12486     }
12487   }
12488   for (const Decl *VD : NeedToCheckForLPCs) {
12489     for (const LastprivateConditionalData &Data :
12490          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12491       if (Data.DeclToUniqueName.count(VD) > 0) {
12492         if (!Data.Disabled)
12493           NeedToAddForLPCsAsDisabled.insert(VD);
12494         break;
12495       }
12496     }
12497   }
12498 }
12499 
12500 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12501     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12502     : CGM(CGF.CGM),
12503       Action((CGM.getLangOpts().OpenMP >= 50 &&
12504               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12505                            [](const OMPLastprivateClause *C) {
12506                              return C->getKind() ==
12507                                     OMPC_LASTPRIVATE_conditional;
12508                            }))
12509                  ? ActionToDo::PushAsLastprivateConditional
12510                  : ActionToDo::DoNotPush) {
12511   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12512   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12513     return;
12514   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12515          "Expected a push action.");
12516   LastprivateConditionalData &Data =
12517       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12518   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12519     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12520       continue;
12521 
12522     for (const Expr *Ref : C->varlists()) {
12523       Data.DeclToUniqueName.insert(std::make_pair(
12524           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12525           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12526     }
12527   }
12528   Data.IVLVal = IVLVal;
12529   Data.Fn = CGF.CurFn;
12530 }
12531 
12532 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12533     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12534     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12535   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12536   if (CGM.getLangOpts().OpenMP < 50)
12537     return;
12538   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12539   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12540   if (!NeedToAddForLPCsAsDisabled.empty()) {
12541     Action = ActionToDo::DisableLastprivateConditional;
12542     LastprivateConditionalData &Data =
12543         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12544     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12545       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12546     Data.Fn = CGF.CurFn;
12547     Data.Disabled = true;
12548   }
12549 }
12550 
12551 CGOpenMPRuntime::LastprivateConditionalRAII
12552 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12553     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12554   return LastprivateConditionalRAII(CGF, S);
12555 }
12556 
12557 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12558   if (CGM.getLangOpts().OpenMP < 50)
12559     return;
12560   if (Action == ActionToDo::DisableLastprivateConditional) {
12561     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12562            "Expected list of disabled private vars.");
12563     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12564   }
12565   if (Action == ActionToDo::PushAsLastprivateConditional) {
12566     assert(
12567         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12568         "Expected list of lastprivate conditional vars.");
12569     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12570   }
12571 }
12572 
12573 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12574                                                         const VarDecl *VD) {
12575   ASTContext &C = CGM.getContext();
12576   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12577   if (I == LastprivateConditionalToTypes.end())
12578     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12579   QualType NewType;
12580   const FieldDecl *VDField;
12581   const FieldDecl *FiredField;
12582   LValue BaseLVal;
12583   auto VI = I->getSecond().find(VD);
12584   if (VI == I->getSecond().end()) {
12585     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12586     RD->startDefinition();
12587     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12588     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12589     RD->completeDefinition();
12590     NewType = C.getRecordType(RD);
12591     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12592     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12593     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12594   } else {
12595     NewType = std::get<0>(VI->getSecond());
12596     VDField = std::get<1>(VI->getSecond());
12597     FiredField = std::get<2>(VI->getSecond());
12598     BaseLVal = std::get<3>(VI->getSecond());
12599   }
12600   LValue FiredLVal =
12601       CGF.EmitLValueForField(BaseLVal, FiredField);
12602   CGF.EmitStoreOfScalar(
12603       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12604       FiredLVal);
12605   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12606 }
12607 
12608 namespace {
12609 /// Checks if the lastprivate conditional variable is referenced in LHS.
12610 class LastprivateConditionalRefChecker final
12611     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12612   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12613   const Expr *FoundE = nullptr;
12614   const Decl *FoundD = nullptr;
12615   StringRef UniqueDeclName;
12616   LValue IVLVal;
12617   llvm::Function *FoundFn = nullptr;
12618   SourceLocation Loc;
12619 
12620 public:
12621   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12622     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12623          llvm::reverse(LPM)) {
12624       auto It = D.DeclToUniqueName.find(E->getDecl());
12625       if (It == D.DeclToUniqueName.end())
12626         continue;
12627       if (D.Disabled)
12628         return false;
12629       FoundE = E;
12630       FoundD = E->getDecl()->getCanonicalDecl();
12631       UniqueDeclName = It->second;
12632       IVLVal = D.IVLVal;
12633       FoundFn = D.Fn;
12634       break;
12635     }
12636     return FoundE == E;
12637   }
12638   bool VisitMemberExpr(const MemberExpr *E) {
12639     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12640       return false;
12641     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12642          llvm::reverse(LPM)) {
12643       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12644       if (It == D.DeclToUniqueName.end())
12645         continue;
12646       if (D.Disabled)
12647         return false;
12648       FoundE = E;
12649       FoundD = E->getMemberDecl()->getCanonicalDecl();
12650       UniqueDeclName = It->second;
12651       IVLVal = D.IVLVal;
12652       FoundFn = D.Fn;
12653       break;
12654     }
12655     return FoundE == E;
12656   }
12657   bool VisitStmt(const Stmt *S) {
12658     for (const Stmt *Child : S->children()) {
12659       if (!Child)
12660         continue;
12661       if (const auto *E = dyn_cast<Expr>(Child))
12662         if (!E->isGLValue())
12663           continue;
12664       if (Visit(Child))
12665         return true;
12666     }
12667     return false;
12668   }
12669   explicit LastprivateConditionalRefChecker(
12670       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12671       : LPM(LPM) {}
12672   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12673   getFoundData() const {
12674     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12675   }
12676 };
12677 } // namespace
12678 
12679 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12680                                                        LValue IVLVal,
12681                                                        StringRef UniqueDeclName,
12682                                                        LValue LVal,
12683                                                        SourceLocation Loc) {
12684   // Last updated loop counter for the lastprivate conditional var.
12685   // int<xx> last_iv = 0;
12686   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12687   llvm::Constant *LastIV =
12688       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12689   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12690       IVLVal.getAlignment().getAsAlign());
12691   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12692 
12693   // Last value of the lastprivate conditional.
12694   // decltype(priv_a) last_a;
12695   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12696       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12697   Last->setAlignment(LVal.getAlignment().getAsAlign());
12698   LValue LastLVal = CGF.MakeAddrLValue(
12699       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12700 
12701   // Global loop counter. Required to handle inner parallel-for regions.
12702   // iv
12703   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12704 
12705   // #pragma omp critical(a)
12706   // if (last_iv <= iv) {
12707   //   last_iv = iv;
12708   //   last_a = priv_a;
12709   // }
12710   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12711                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12712     Action.Enter(CGF);
12713     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12714     // (last_iv <= iv) ? Check if the variable is updated and store new
12715     // value in global var.
12716     llvm::Value *CmpRes;
12717     if (IVLVal.getType()->isSignedIntegerType()) {
12718       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12719     } else {
12720       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12721              "Loop iteration variable must be integer.");
12722       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12723     }
12724     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12725     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12726     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12727     // {
12728     CGF.EmitBlock(ThenBB);
12729 
12730     //   last_iv = iv;
12731     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12732 
12733     //   last_a = priv_a;
12734     switch (CGF.getEvaluationKind(LVal.getType())) {
12735     case TEK_Scalar: {
12736       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12737       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12738       break;
12739     }
12740     case TEK_Complex: {
12741       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12742       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12743       break;
12744     }
12745     case TEK_Aggregate:
12746       llvm_unreachable(
12747           "Aggregates are not supported in lastprivate conditional.");
12748     }
12749     // }
12750     CGF.EmitBranch(ExitBB);
12751     // There is no need to emit line number for unconditional branch.
12752     (void)ApplyDebugLocation::CreateEmpty(CGF);
12753     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12754   };
12755 
12756   if (CGM.getLangOpts().OpenMPSimd) {
12757     // Do not emit as a critical region as no parallel region could be emitted.
12758     RegionCodeGenTy ThenRCG(CodeGen);
12759     ThenRCG(CGF);
12760   } else {
12761     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12762   }
12763 }
12764 
12765 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12766                                                          const Expr *LHS) {
12767   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12768     return;
12769   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12770   if (!Checker.Visit(LHS))
12771     return;
12772   const Expr *FoundE;
12773   const Decl *FoundD;
12774   StringRef UniqueDeclName;
12775   LValue IVLVal;
12776   llvm::Function *FoundFn;
12777   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12778       Checker.getFoundData();
12779   if (FoundFn != CGF.CurFn) {
12780     // Special codegen for inner parallel regions.
12781     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12782     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12783     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12784            "Lastprivate conditional is not found in outer region.");
12785     QualType StructTy = std::get<0>(It->getSecond());
12786     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12787     LValue PrivLVal = CGF.EmitLValue(FoundE);
12788     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12789         PrivLVal.getAddress(CGF),
12790         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12791         CGF.ConvertTypeForMem(StructTy));
12792     LValue BaseLVal =
12793         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12794     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12795     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12796                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12797                         FiredLVal, llvm::AtomicOrdering::Unordered,
12798                         /*IsVolatile=*/true, /*isInit=*/false);
12799     return;
12800   }
12801 
12802   // Private address of the lastprivate conditional in the current context.
12803   // priv_a
12804   LValue LVal = CGF.EmitLValue(FoundE);
12805   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12806                                    FoundE->getExprLoc());
12807 }
12808 
12809 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12810     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12811     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12812   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12813     return;
12814   auto Range = llvm::reverse(LastprivateConditionalStack);
12815   auto It = llvm::find_if(
12816       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12817   if (It == Range.end() || It->Fn != CGF.CurFn)
12818     return;
12819   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12820   assert(LPCI != LastprivateConditionalToTypes.end() &&
12821          "Lastprivates must be registered already.");
12822   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12823   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12824   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12825   for (const auto &Pair : It->DeclToUniqueName) {
12826     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12827     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12828       continue;
12829     auto I = LPCI->getSecond().find(Pair.first);
12830     assert(I != LPCI->getSecond().end() &&
12831            "Lastprivate must be rehistered already.");
12832     // bool Cmp = priv_a.Fired != 0;
12833     LValue BaseLVal = std::get<3>(I->getSecond());
12834     LValue FiredLVal =
12835         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12836     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12837     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12838     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12839     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12840     // if (Cmp) {
12841     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12842     CGF.EmitBlock(ThenBB);
12843     Address Addr = CGF.GetAddrOfLocalVar(VD);
12844     LValue LVal;
12845     if (VD->getType()->isReferenceType())
12846       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12847                                            AlignmentSource::Decl);
12848     else
12849       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12850                                 AlignmentSource::Decl);
12851     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12852                                      D.getBeginLoc());
12853     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12854     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12855     // }
12856   }
12857 }
12858 
12859 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12860     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12861     SourceLocation Loc) {
12862   if (CGF.getLangOpts().OpenMP < 50)
12863     return;
12864   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12865   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12866          "Unknown lastprivate conditional variable.");
12867   StringRef UniqueName = It->second;
12868   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12869   // The variable was not updated in the region - exit.
12870   if (!GV)
12871     return;
12872   LValue LPLVal = CGF.MakeAddrLValue(
12873       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12874       PrivLVal.getType().getNonReferenceType());
12875   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12876   CGF.EmitStoreOfScalar(Res, PrivLVal);
12877 }
12878 
12879 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12880     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12881     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12882   llvm_unreachable("Not supported in SIMD-only mode");
12883 }
12884 
12885 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12886     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12887     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12888   llvm_unreachable("Not supported in SIMD-only mode");
12889 }
12890 
12891 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12892     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12893     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12894     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12895     bool Tied, unsigned &NumberOfParts) {
12896   llvm_unreachable("Not supported in SIMD-only mode");
12897 }
12898 
12899 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12900                                            SourceLocation Loc,
12901                                            llvm::Function *OutlinedFn,
12902                                            ArrayRef<llvm::Value *> CapturedVars,
12903                                            const Expr *IfCond,
12904                                            llvm::Value *NumThreads) {
12905   llvm_unreachable("Not supported in SIMD-only mode");
12906 }
12907 
12908 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12909     CodeGenFunction &CGF, StringRef CriticalName,
12910     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12911     const Expr *Hint) {
12912   llvm_unreachable("Not supported in SIMD-only mode");
12913 }
12914 
12915 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12916                                            const RegionCodeGenTy &MasterOpGen,
12917                                            SourceLocation Loc) {
12918   llvm_unreachable("Not supported in SIMD-only mode");
12919 }
12920 
12921 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12922                                            const RegionCodeGenTy &MasterOpGen,
12923                                            SourceLocation Loc,
12924                                            const Expr *Filter) {
12925   llvm_unreachable("Not supported in SIMD-only mode");
12926 }
12927 
12928 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12929                                             SourceLocation Loc) {
12930   llvm_unreachable("Not supported in SIMD-only mode");
12931 }
12932 
12933 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12934     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12935     SourceLocation Loc) {
12936   llvm_unreachable("Not supported in SIMD-only mode");
12937 }
12938 
12939 void CGOpenMPSIMDRuntime::emitSingleRegion(
12940     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12941     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12942     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12943     ArrayRef<const Expr *> AssignmentOps) {
12944   llvm_unreachable("Not supported in SIMD-only mode");
12945 }
12946 
12947 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12948                                             const RegionCodeGenTy &OrderedOpGen,
12949                                             SourceLocation Loc,
12950                                             bool IsThreads) {
12951   llvm_unreachable("Not supported in SIMD-only mode");
12952 }
12953 
12954 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12955                                           SourceLocation Loc,
12956                                           OpenMPDirectiveKind Kind,
12957                                           bool EmitChecks,
12958                                           bool ForceSimpleCall) {
12959   llvm_unreachable("Not supported in SIMD-only mode");
12960 }
12961 
12962 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12963     CodeGenFunction &CGF, SourceLocation Loc,
12964     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12965     bool Ordered, const DispatchRTInput &DispatchValues) {
12966   llvm_unreachable("Not supported in SIMD-only mode");
12967 }
12968 
12969 void CGOpenMPSIMDRuntime::emitForStaticInit(
12970     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12971     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12972   llvm_unreachable("Not supported in SIMD-only mode");
12973 }
12974 
12975 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12976     CodeGenFunction &CGF, SourceLocation Loc,
12977     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12978   llvm_unreachable("Not supported in SIMD-only mode");
12979 }
12980 
12981 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12982                                                      SourceLocation Loc,
12983                                                      unsigned IVSize,
12984                                                      bool IVSigned) {
12985   llvm_unreachable("Not supported in SIMD-only mode");
12986 }
12987 
12988 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12989                                               SourceLocation Loc,
12990                                               OpenMPDirectiveKind DKind) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
12994 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12995                                               SourceLocation Loc,
12996                                               unsigned IVSize, bool IVSigned,
12997                                               Address IL, Address LB,
12998                                               Address UB, Address ST) {
12999   llvm_unreachable("Not supported in SIMD-only mode");
13000 }
13001 
13002 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
13003                                                llvm::Value *NumThreads,
13004                                                SourceLocation Loc) {
13005   llvm_unreachable("Not supported in SIMD-only mode");
13006 }
13007 
13008 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13009                                              ProcBindKind ProcBind,
13010                                              SourceLocation Loc) {
13011   llvm_unreachable("Not supported in SIMD-only mode");
13012 }
13013 
13014 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13015                                                     const VarDecl *VD,
13016                                                     Address VDAddr,
13017                                                     SourceLocation Loc) {
13018   llvm_unreachable("Not supported in SIMD-only mode");
13019 }
13020 
13021 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13022     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13023     CodeGenFunction *CGF) {
13024   llvm_unreachable("Not supported in SIMD-only mode");
13025 }
13026 
13027 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13028     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13029   llvm_unreachable("Not supported in SIMD-only mode");
13030 }
13031 
13032 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13033                                     ArrayRef<const Expr *> Vars,
13034                                     SourceLocation Loc,
13035                                     llvm::AtomicOrdering AO) {
13036   llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038 
13039 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13040                                        const OMPExecutableDirective &D,
13041                                        llvm::Function *TaskFunction,
13042                                        QualType SharedsTy, Address Shareds,
13043                                        const Expr *IfCond,
13044                                        const OMPTaskDataTy &Data) {
13045   llvm_unreachable("Not supported in SIMD-only mode");
13046 }
13047 
13048 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13049     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13050     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13051     const Expr *IfCond, const OMPTaskDataTy &Data) {
13052   llvm_unreachable("Not supported in SIMD-only mode");
13053 }
13054 
13055 void CGOpenMPSIMDRuntime::emitReduction(
13056     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13057     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13058     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13059   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13060   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13061                                  ReductionOps, Options);
13062 }
13063 
13064 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13065     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13066     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13067   llvm_unreachable("Not supported in SIMD-only mode");
13068 }
13069 
13070 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13071                                                 SourceLocation Loc,
13072                                                 bool IsWorksharingReduction) {
13073   llvm_unreachable("Not supported in SIMD-only mode");
13074 }
13075 
13076 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13077                                                   SourceLocation Loc,
13078                                                   ReductionCodeGen &RCG,
13079                                                   unsigned N) {
13080   llvm_unreachable("Not supported in SIMD-only mode");
13081 }
13082 
13083 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13084                                                   SourceLocation Loc,
13085                                                   llvm::Value *ReductionsPtr,
13086                                                   LValue SharedLVal) {
13087   llvm_unreachable("Not supported in SIMD-only mode");
13088 }
13089 
13090 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13091                                            SourceLocation Loc,
13092                                            const OMPTaskDataTy &Data) {
13093   llvm_unreachable("Not supported in SIMD-only mode");
13094 }
13095 
13096 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13097     CodeGenFunction &CGF, SourceLocation Loc,
13098     OpenMPDirectiveKind CancelRegion) {
13099   llvm_unreachable("Not supported in SIMD-only mode");
13100 }
13101 
13102 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13103                                          SourceLocation Loc, const Expr *IfCond,
13104                                          OpenMPDirectiveKind CancelRegion) {
13105   llvm_unreachable("Not supported in SIMD-only mode");
13106 }
13107 
13108 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13109     const OMPExecutableDirective &D, StringRef ParentName,
13110     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13111     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13112   llvm_unreachable("Not supported in SIMD-only mode");
13113 }
13114 
13115 void CGOpenMPSIMDRuntime::emitTargetCall(
13116     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13117     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13118     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13119     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13120                                      const OMPLoopDirective &D)>
13121         SizeEmitter) {
13122   llvm_unreachable("Not supported in SIMD-only mode");
13123 }
13124 
13125 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13126   llvm_unreachable("Not supported in SIMD-only mode");
13127 }
13128 
13129 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13130   llvm_unreachable("Not supported in SIMD-only mode");
13131 }
13132 
13133 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13134   return false;
13135 }
13136 
13137 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13138                                         const OMPExecutableDirective &D,
13139                                         SourceLocation Loc,
13140                                         llvm::Function *OutlinedFn,
13141                                         ArrayRef<llvm::Value *> CapturedVars) {
13142   llvm_unreachable("Not supported in SIMD-only mode");
13143 }
13144 
13145 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13146                                              const Expr *NumTeams,
13147                                              const Expr *ThreadLimit,
13148                                              SourceLocation Loc) {
13149   llvm_unreachable("Not supported in SIMD-only mode");
13150 }
13151 
13152 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13153     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13154     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13155   llvm_unreachable("Not supported in SIMD-only mode");
13156 }
13157 
13158 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13159     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13160     const Expr *Device) {
13161   llvm_unreachable("Not supported in SIMD-only mode");
13162 }
13163 
13164 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13165                                            const OMPLoopDirective &D,
13166                                            ArrayRef<Expr *> NumIterations) {
13167   llvm_unreachable("Not supported in SIMD-only mode");
13168 }
13169 
13170 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13171                                               const OMPDependClause *C) {
13172   llvm_unreachable("Not supported in SIMD-only mode");
13173 }
13174 
13175 const VarDecl *
13176 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13177                                         const VarDecl *NativeParam) const {
13178   llvm_unreachable("Not supported in SIMD-only mode");
13179 }
13180 
13181 Address
13182 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13183                                          const VarDecl *NativeParam,
13184                                          const VarDecl *TargetParam) const {
13185   llvm_unreachable("Not supported in SIMD-only mode");
13186 }
13187