1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/Support/AtomicOrdering.h"
40 #include "llvm/Support/Format.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <cassert>
43 #include <numeric>
44 
45 using namespace clang;
46 using namespace CodeGen;
47 using namespace llvm::omp;
48 
49 namespace {
50 /// Base class for handling code generation inside OpenMP regions.
51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
52 public:
53   /// Kinds of OpenMP regions used in codegen.
54   enum CGOpenMPRegionKind {
55     /// Region with outlined function for standalone 'parallel'
56     /// directive.
57     ParallelOutlinedRegion,
58     /// Region with outlined function for standalone 'task' directive.
59     TaskOutlinedRegion,
60     /// Region for constructs that do not require function outlining,
61     /// like 'for', 'sections', 'atomic' etc. directives.
62     InlinedRegion,
63     /// Region with outlined function for standalone 'target' directive.
64     TargetRegion,
65   };
66 
67   CGOpenMPRegionInfo(const CapturedStmt &CS,
68                      const CGOpenMPRegionKind RegionKind,
69                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
70                      bool HasCancel)
71       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
72         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
73 
74   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
75                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
76                      bool HasCancel)
77       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
78         Kind(Kind), HasCancel(HasCancel) {}
79 
80   /// Get a variable or parameter for storing global thread id
81   /// inside OpenMP construct.
82   virtual const VarDecl *getThreadIDVariable() const = 0;
83 
84   /// Emit the captured statement body.
85   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
86 
87   /// Get an LValue for the current ThreadID variable.
88   /// \return LValue for thread id variable. This LValue always has type int32*.
89   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
90 
91   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
92 
93   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
94 
95   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
96 
97   bool hasCancel() const { return HasCancel; }
98 
99   static bool classof(const CGCapturedStmtInfo *Info) {
100     return Info->getKind() == CR_OpenMP;
101   }
102 
103   ~CGOpenMPRegionInfo() override = default;
104 
105 protected:
106   CGOpenMPRegionKind RegionKind;
107   RegionCodeGenTy CodeGen;
108   OpenMPDirectiveKind Kind;
109   bool HasCancel;
110 };
111 
112 /// API for captured statement code generation in OpenMP constructs.
113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
114 public:
115   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
116                              const RegionCodeGenTy &CodeGen,
117                              OpenMPDirectiveKind Kind, bool HasCancel,
118                              StringRef HelperName)
119       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
120                            HasCancel),
121         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
122     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
123   }
124 
125   /// Get a variable or parameter for storing global thread id
126   /// inside OpenMP construct.
127   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
128 
129   /// Get the name of the capture helper.
130   StringRef getHelperName() const override { return HelperName; }
131 
132   static bool classof(const CGCapturedStmtInfo *Info) {
133     return CGOpenMPRegionInfo::classof(Info) &&
134            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135                ParallelOutlinedRegion;
136   }
137 
138 private:
139   /// A variable or parameter storing global thread id for OpenMP
140   /// constructs.
141   const VarDecl *ThreadIDVar;
142   StringRef HelperName;
143 };
144 
145 /// API for captured statement code generation in OpenMP constructs.
146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
147 public:
148   class UntiedTaskActionTy final : public PrePostActionTy {
149     bool Untied;
150     const VarDecl *PartIDVar;
151     const RegionCodeGenTy UntiedCodeGen;
152     llvm::SwitchInst *UntiedSwitch = nullptr;
153 
154   public:
155     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
156                        const RegionCodeGenTy &UntiedCodeGen)
157         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
158     void Enter(CodeGenFunction &CGF) override {
159       if (Untied) {
160         // Emit task switching point.
161         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
162             CGF.GetAddrOfLocalVar(PartIDVar),
163             PartIDVar->getType()->castAs<PointerType>());
164         llvm::Value *Res =
165             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
166         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
167         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
168         CGF.EmitBlock(DoneBB);
169         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
170         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
171         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
172                               CGF.Builder.GetInsertBlock());
173         emitUntiedSwitch(CGF);
174       }
175     }
176     void emitUntiedSwitch(CodeGenFunction &CGF) const {
177       if (Untied) {
178         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
179             CGF.GetAddrOfLocalVar(PartIDVar),
180             PartIDVar->getType()->castAs<PointerType>());
181         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
182                               PartIdLVal);
183         UntiedCodeGen(CGF);
184         CodeGenFunction::JumpDest CurPoint =
185             CGF.getJumpDestInCurrentScope(".untied.next.");
186         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
187         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
188         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
189                               CGF.Builder.GetInsertBlock());
190         CGF.EmitBranchThroughCleanup(CurPoint);
191         CGF.EmitBlock(CurPoint.getBlock());
192       }
193     }
194     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
195   };
196   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
197                                  const VarDecl *ThreadIDVar,
198                                  const RegionCodeGenTy &CodeGen,
199                                  OpenMPDirectiveKind Kind, bool HasCancel,
200                                  const UntiedTaskActionTy &Action)
201       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
202         ThreadIDVar(ThreadIDVar), Action(Action) {
203     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
204   }
205 
206   /// Get a variable or parameter for storing global thread id
207   /// inside OpenMP construct.
208   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
209 
210   /// Get an LValue for the current ThreadID variable.
211   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
212 
213   /// Get the name of the capture helper.
214   StringRef getHelperName() const override { return ".omp_outlined."; }
215 
216   void emitUntiedSwitch(CodeGenFunction &CGF) override {
217     Action.emitUntiedSwitch(CGF);
218   }
219 
220   static bool classof(const CGCapturedStmtInfo *Info) {
221     return CGOpenMPRegionInfo::classof(Info) &&
222            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
223                TaskOutlinedRegion;
224   }
225 
226 private:
227   /// A variable or parameter storing global thread id for OpenMP
228   /// constructs.
229   const VarDecl *ThreadIDVar;
230   /// Action for emitting code for untied tasks.
231   const UntiedTaskActionTy &Action;
232 };
233 
234 /// API for inlined captured statement code generation in OpenMP
235 /// constructs.
236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
237 public:
238   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
239                             const RegionCodeGenTy &CodeGen,
240                             OpenMPDirectiveKind Kind, bool HasCancel)
241       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
242         OldCSI(OldCSI),
243         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
244 
245   // Retrieve the value of the context parameter.
246   llvm::Value *getContextValue() const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->getContextValue();
249     llvm_unreachable("No context value for inlined OpenMP region");
250   }
251 
252   void setContextValue(llvm::Value *V) override {
253     if (OuterRegionInfo) {
254       OuterRegionInfo->setContextValue(V);
255       return;
256     }
257     llvm_unreachable("No context value for inlined OpenMP region");
258   }
259 
260   /// Lookup the captured field decl for a variable.
261   const FieldDecl *lookup(const VarDecl *VD) const override {
262     if (OuterRegionInfo)
263       return OuterRegionInfo->lookup(VD);
264     // If there is no outer outlined region,no need to lookup in a list of
265     // captured variables, we can use the original one.
266     return nullptr;
267   }
268 
269   FieldDecl *getThisFieldDecl() const override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThisFieldDecl();
272     return nullptr;
273   }
274 
275   /// Get a variable or parameter for storing global thread id
276   /// inside OpenMP construct.
277   const VarDecl *getThreadIDVariable() const override {
278     if (OuterRegionInfo)
279       return OuterRegionInfo->getThreadIDVariable();
280     return nullptr;
281   }
282 
283   /// Get an LValue for the current ThreadID variable.
284   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
285     if (OuterRegionInfo)
286       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
287     llvm_unreachable("No LValue for inlined OpenMP construct");
288   }
289 
290   /// Get the name of the capture helper.
291   StringRef getHelperName() const override {
292     if (auto *OuterRegionInfo = getOldCSI())
293       return OuterRegionInfo->getHelperName();
294     llvm_unreachable("No helper name for inlined OpenMP construct");
295   }
296 
297   void emitUntiedSwitch(CodeGenFunction &CGF) override {
298     if (OuterRegionInfo)
299       OuterRegionInfo->emitUntiedSwitch(CGF);
300   }
301 
302   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
303 
304   static bool classof(const CGCapturedStmtInfo *Info) {
305     return CGOpenMPRegionInfo::classof(Info) &&
306            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
307   }
308 
309   ~CGOpenMPInlinedRegionInfo() override = default;
310 
311 private:
312   /// CodeGen info about outer OpenMP region.
313   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
314   CGOpenMPRegionInfo *OuterRegionInfo;
315 };
316 
317 /// API for captured statement code generation in OpenMP target
318 /// constructs. For this captures, implicit parameters are used instead of the
319 /// captured fields. The name of the target region has to be unique in a given
320 /// application so it is provided by the client, because only the client has
321 /// the information to generate that.
322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
323 public:
324   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
325                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
326       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
327                            /*HasCancel=*/false),
328         HelperName(HelperName) {}
329 
330   /// This is unused for target regions because each starts executing
331   /// with a single thread.
332   const VarDecl *getThreadIDVariable() const override { return nullptr; }
333 
334   /// Get the name of the capture helper.
335   StringRef getHelperName() const override { return HelperName; }
336 
337   static bool classof(const CGCapturedStmtInfo *Info) {
338     return CGOpenMPRegionInfo::classof(Info) &&
339            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
340   }
341 
342 private:
343   StringRef HelperName;
344 };
345 
346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
347   llvm_unreachable("No codegen for expressions");
348 }
349 /// API for generation of expressions captured in a innermost OpenMP
350 /// region.
351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
352 public:
353   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
354       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
355                                   OMPD_unknown,
356                                   /*HasCancel=*/false),
357         PrivScope(CGF) {
358     // Make sure the globals captured in the provided statement are local by
359     // using the privatization logic. We assume the same variable is not
360     // captured more than once.
361     for (const auto &C : CS.captures()) {
362       if (!C.capturesVariable() && !C.capturesVariableByCopy())
363         continue;
364 
365       const VarDecl *VD = C.getCapturedVar();
366       if (VD->isLocalVarDeclOrParm())
367         continue;
368 
369       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
370                       /*RefersToEnclosingVariableOrCapture=*/false,
371                       VD->getType().getNonReferenceType(), VK_LValue,
372                       C.getLocation());
373       PrivScope.addPrivate(
374           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
638                             [=]() { return Private; });
639     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
640                             [=]() { return Original; });
641     (void)PrivateScope.Privatize();
642     RValue Func = RValue::get(Reduction.second);
643     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
644     CGF.EmitIgnoredExpr(InitOp);
645   } else {
646     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
647     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
648     auto *GV = new llvm::GlobalVariable(
649         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
650         llvm::GlobalValue::PrivateLinkage, Init, Name);
651     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
652     RValue InitRVal;
653     switch (CGF.getEvaluationKind(Ty)) {
654     case TEK_Scalar:
655       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
656       break;
657     case TEK_Complex:
658       InitRVal =
659           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
660       break;
661     case TEK_Aggregate: {
662       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
663       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
664       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
665                            /*IsInitializer=*/false);
666       return;
667     }
668     }
669     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
670     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
671     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
672                          /*IsInitializer=*/false);
673   }
674 }
675 
676 /// Emit initialization of arrays of complex types.
677 /// \param DestAddr Address of the array.
678 /// \param Type Type of array.
679 /// \param Init Initial expression of array.
680 /// \param SrcAddr Address of the original array.
681 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
682                                  QualType Type, bool EmitDeclareReductionInit,
683                                  const Expr *Init,
684                                  const OMPDeclareReductionDecl *DRD,
685                                  Address SrcAddr = Address::invalid()) {
686   // Perform element-by-element initialization.
687   QualType ElementTy;
688 
689   // Drill down to the base element type on both arrays.
690   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
691   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI, SrcAddr.getElementType(),
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI, DestAddr.getElementType(),
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedAddr);
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
842   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
843   if (AsArraySection) {
844     Size = CGF.Builder.CreatePtrDiff(ElemType,
845                                      OrigAddresses[N].second.getPointer(CGF),
846                                      OrigAddresses[N].first.getPointer(CGF));
847     Size = CGF.Builder.CreateNUWAdd(
848         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
849     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
850   } else {
851     SizeInChars =
852         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
853     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
854   }
855   Sizes.emplace_back(SizeInChars, Size);
856   CodeGenFunction::OpaqueValueMapping OpaqueMap(
857       CGF,
858       cast<OpaqueValueExpr>(
859           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
860       RValue::get(Size));
861   CGF.EmitVariablyModifiedType(PrivateType);
862 }
863 
864 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
865                                          llvm::Value *Size) {
866   const auto *PrivateVD =
867       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
868   QualType PrivateType = PrivateVD->getType();
869   if (!PrivateType->isVariablyModifiedType()) {
870     assert(!Size && !Sizes[N].second &&
871            "Size should be nullptr for non-variably modified reduction "
872            "items.");
873     return;
874   }
875   CodeGenFunction::OpaqueValueMapping OpaqueMap(
876       CGF,
877       cast<OpaqueValueExpr>(
878           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
879       RValue::get(Size));
880   CGF.EmitVariablyModifiedType(PrivateType);
881 }
882 
883 void ReductionCodeGen::emitInitialization(
884     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
885     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
886   assert(SharedAddresses.size() > N && "No variable was generated");
887   const auto *PrivateVD =
888       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
889   const OMPDeclareReductionDecl *DRD =
890       getReductionInit(ClausesData[N].ReductionOp);
891   QualType PrivateType = PrivateVD->getType();
892   PrivateAddr = CGF.Builder.CreateElementBitCast(
893       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
894   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
895     if (DRD && DRD->getInitializer())
896       (void)DefaultInit(CGF);
897     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
898   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
899     (void)DefaultInit(CGF);
900     QualType SharedType = SharedAddresses[N].first.getType();
901     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
902                                      PrivateAddr, SharedAddr, SharedType);
903   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
904              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
905     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
906                          PrivateVD->getType().getQualifiers(),
907                          /*IsInitializer=*/false);
908   }
909 }
910 
911 bool ReductionCodeGen::needCleanups(unsigned N) {
912   const auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916   return DTorKind != QualType::DK_none;
917 }
918 
919 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
920                                     Address PrivateAddr) {
921   const auto *PrivateVD =
922       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
923   QualType PrivateType = PrivateVD->getType();
924   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
925   if (needCleanups(N)) {
926     PrivateAddr = CGF.Builder.CreateElementBitCast(
927         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
928     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
929   }
930 }
931 
932 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
933                           LValue BaseLV) {
934   BaseTy = BaseTy.getNonReferenceType();
935   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
936          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
937     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
938       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
939     } else {
940       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
941       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
942     }
943     BaseTy = BaseTy->getPointeeType();
944   }
945   return CGF.MakeAddrLValue(
946       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
947                                        CGF.ConvertTypeForMem(ElTy)),
948       BaseLV.getType(), BaseLV.getBaseInfo(),
949       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
950 }
951 
952 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
953                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
954                           llvm::Value *Addr) {
955   Address Tmp = Address::invalid();
956   Address TopTmp = Address::invalid();
957   Address MostTopTmp = Address::invalid();
958   BaseTy = BaseTy.getNonReferenceType();
959   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
960          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
961     Tmp = CGF.CreateMemTemp(BaseTy);
962     if (TopTmp.isValid())
963       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
964     else
965       MostTopTmp = Tmp;
966     TopTmp = Tmp;
967     BaseTy = BaseTy->getPointeeType();
968   }
969   llvm::Type *Ty = BaseLVType;
970   if (Tmp.isValid())
971     Ty = Tmp.getElementType();
972   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
973   if (Tmp.isValid()) {
974     CGF.Builder.CreateStore(Addr, Tmp);
975     return MostTopTmp;
976   }
977   return Address::deprecated(Addr, BaseLVAlignment);
978 }
979 
980 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
981   const VarDecl *OrigVD = nullptr;
982   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
983     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
984     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
985       Base = TempOASE->getBase()->IgnoreParenImpCasts();
986     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987       Base = TempASE->getBase()->IgnoreParenImpCasts();
988     DE = cast<DeclRefExpr>(Base);
989     OrigVD = cast<VarDecl>(DE->getDecl());
990   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
991     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
992     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
993       Base = TempASE->getBase()->IgnoreParenImpCasts();
994     DE = cast<DeclRefExpr>(Base);
995     OrigVD = cast<VarDecl>(DE->getDecl());
996   }
997   return OrigVD;
998 }
999 
1000 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1001                                                Address PrivateAddr) {
1002   const DeclRefExpr *DE;
1003   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1004     BaseDecls.emplace_back(OrigVD);
1005     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1006     LValue BaseLValue =
1007         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1008                     OriginalBaseLValue);
1009     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1010     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1011         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1012         SharedAddr.getPointer());
1013     llvm::Value *PrivatePointer =
1014         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1015             PrivateAddr.getPointer(), SharedAddr.getType());
1016     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1017         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1018     return castToBase(CGF, OrigVD->getType(),
1019                       SharedAddresses[N].first.getType(),
1020                       OriginalBaseLValue.getAddress(CGF).getType(),
1021                       OriginalBaseLValue.getAlignment(), Ptr);
1022   }
1023   BaseDecls.emplace_back(
1024       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1025   return PrivateAddr;
1026 }
1027 
1028 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1029   const OMPDeclareReductionDecl *DRD =
1030       getReductionInit(ClausesData[N].ReductionOp);
1031   return DRD && DRD->getInitializer();
1032 }
1033 
1034 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1035   return CGF.EmitLoadOfPointerLValue(
1036       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1037       getThreadIDVariable()->getType()->castAs<PointerType>());
1038 }
1039 
1040 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1041   if (!CGF.HaveInsertPoint())
1042     return;
1043   // 1.2.2 OpenMP Language Terminology
1044   // Structured block - An executable statement with a single entry at the
1045   // top and a single exit at the bottom.
1046   // The point of exit cannot be a branch out of the structured block.
1047   // longjmp() and throw() must not violate the entry/exit criteria.
1048   CGF.EHStack.pushTerminate();
1049   if (S)
1050     CGF.incrementProfileCounter(S);
1051   CodeGen(CGF);
1052   CGF.EHStack.popTerminate();
1053 }
1054 
1055 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1056     CodeGenFunction &CGF) {
1057   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1058                             getThreadIDVariable()->getType(),
1059                             AlignmentSource::Decl);
1060 }
1061 
1062 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1063                                        QualType FieldTy) {
1064   auto *Field = FieldDecl::Create(
1065       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1066       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1067       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1068   Field->setAccess(AS_public);
1069   DC->addDecl(Field);
1070   return Field;
1071 }
1072 
1073 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1074                                  StringRef Separator)
1075     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1076       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1077   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1078 
1079   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1080   OMPBuilder.initialize();
1081   loadOffloadInfoMetadata();
1082 }
1083 
1084 void CGOpenMPRuntime::clear() {
1085   InternalVars.clear();
1086   // Clean non-target variable declarations possibly used only in debug info.
1087   for (const auto &Data : EmittedNonTargetVariables) {
1088     if (!Data.getValue().pointsToAliveValue())
1089       continue;
1090     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1091     if (!GV)
1092       continue;
1093     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1094       continue;
1095     GV->eraseFromParent();
1096   }
1097 }
1098 
1099 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1100   SmallString<128> Buffer;
1101   llvm::raw_svector_ostream OS(Buffer);
1102   StringRef Sep = FirstSeparator;
1103   for (StringRef Part : Parts) {
1104     OS << Sep << Part;
1105     Sep = Separator;
1106   }
1107   return std::string(OS.str());
1108 }
1109 
1110 static llvm::Function *
1111 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1112                           const Expr *CombinerInitializer, const VarDecl *In,
1113                           const VarDecl *Out, bool IsCombiner) {
1114   // void .omp_combiner.(Ty *in, Ty *out);
1115   ASTContext &C = CGM.getContext();
1116   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1117   FunctionArgList Args;
1118   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1119                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1120   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1121                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1122   Args.push_back(&OmpOutParm);
1123   Args.push_back(&OmpInParm);
1124   const CGFunctionInfo &FnInfo =
1125       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1126   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1127   std::string Name = CGM.getOpenMPRuntime().getName(
1128       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1129   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1130                                     Name, &CGM.getModule());
1131   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1132   if (CGM.getLangOpts().Optimize) {
1133     Fn->removeFnAttr(llvm::Attribute::NoInline);
1134     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1135     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1136   }
1137   CodeGenFunction CGF(CGM);
1138   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1139   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1140   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1141                     Out->getLocation());
1142   CodeGenFunction::OMPPrivateScope Scope(CGF);
1143   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1144   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1145     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1146         .getAddress(CGF);
1147   });
1148   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1149   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1150     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1151         .getAddress(CGF);
1152   });
1153   (void)Scope.Privatize();
1154   if (!IsCombiner && Out->hasInit() &&
1155       !CGF.isTrivialInitializer(Out->getInit())) {
1156     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1157                          Out->getType().getQualifiers(),
1158                          /*IsInitializer=*/true);
1159   }
1160   if (CombinerInitializer)
1161     CGF.EmitIgnoredExpr(CombinerInitializer);
1162   Scope.ForceCleanup();
1163   CGF.FinishFunction();
1164   return Fn;
1165 }
1166 
1167 void CGOpenMPRuntime::emitUserDefinedReduction(
1168     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1169   if (UDRMap.count(D) > 0)
1170     return;
1171   llvm::Function *Combiner = emitCombinerOrInitializer(
1172       CGM, D->getType(), D->getCombiner(),
1173       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1174       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1175       /*IsCombiner=*/true);
1176   llvm::Function *Initializer = nullptr;
1177   if (const Expr *Init = D->getInitializer()) {
1178     Initializer = emitCombinerOrInitializer(
1179         CGM, D->getType(),
1180         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1181                                                                      : nullptr,
1182         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1183         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1184         /*IsCombiner=*/false);
1185   }
1186   UDRMap.try_emplace(D, Combiner, Initializer);
1187   if (CGF) {
1188     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1189     Decls.second.push_back(D);
1190   }
1191 }
1192 
1193 std::pair<llvm::Function *, llvm::Function *>
1194 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1195   auto I = UDRMap.find(D);
1196   if (I != UDRMap.end())
1197     return I->second;
1198   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1199   return UDRMap.lookup(D);
1200 }
1201 
1202 namespace {
1203 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1204 // Builder if one is present.
1205 struct PushAndPopStackRAII {
1206   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1207                       bool HasCancel, llvm::omp::Directive Kind)
1208       : OMPBuilder(OMPBuilder) {
1209     if (!OMPBuilder)
1210       return;
1211 
1212     // The following callback is the crucial part of clangs cleanup process.
1213     //
1214     // NOTE:
1215     // Once the OpenMPIRBuilder is used to create parallel regions (and
1216     // similar), the cancellation destination (Dest below) is determined via
1217     // IP. That means if we have variables to finalize we split the block at IP,
1218     // use the new block (=BB) as destination to build a JumpDest (via
1219     // getJumpDestInCurrentScope(BB)) which then is fed to
1220     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1221     // to push & pop an FinalizationInfo object.
1222     // The FiniCB will still be needed but at the point where the
1223     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1224     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1225       assert(IP.getBlock()->end() == IP.getPoint() &&
1226              "Clang CG should cause non-terminated block!");
1227       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1228       CGF.Builder.restoreIP(IP);
1229       CodeGenFunction::JumpDest Dest =
1230           CGF.getOMPCancelDestination(OMPD_parallel);
1231       CGF.EmitBranchThroughCleanup(Dest);
1232     };
1233 
1234     // TODO: Remove this once we emit parallel regions through the
1235     //       OpenMPIRBuilder as it can do this setup internally.
1236     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1237     OMPBuilder->pushFinalizationCB(std::move(FI));
1238   }
1239   ~PushAndPopStackRAII() {
1240     if (OMPBuilder)
1241       OMPBuilder->popFinalizationCB();
1242   }
1243   llvm::OpenMPIRBuilder *OMPBuilder;
1244 };
1245 } // namespace
1246 
1247 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1248     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1249     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1250     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1251   assert(ThreadIDVar->getType()->isPointerType() &&
1252          "thread id variable must be of type kmp_int32 *");
1253   CodeGenFunction CGF(CGM, true);
1254   bool HasCancel = false;
1255   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1256     HasCancel = OPD->hasCancel();
1257   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1258     HasCancel = OPD->hasCancel();
1259   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1260     HasCancel = OPSD->hasCancel();
1261   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1264     HasCancel = OPFD->hasCancel();
1265   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1266     HasCancel = OPFD->hasCancel();
1267   else if (const auto *OPFD =
1268                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD =
1271                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273 
1274   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1275   //       parallel region to make cancellation barriers work properly.
1276   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1277   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1278   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1279                                     HasCancel, OutlinedHelperName);
1280   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1281   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1282 }
1283 
1284 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1285     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1286     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1287   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1288   return emitParallelOrTeamsOutlinedFunction(
1289       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1290 }
1291 
1292 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1293     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1294     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1295   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1296   return emitParallelOrTeamsOutlinedFunction(
1297       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1298 }
1299 
1300 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1301     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1302     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1303     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1304     bool Tied, unsigned &NumberOfParts) {
1305   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1306                                               PrePostActionTy &) {
1307     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1308     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1309     llvm::Value *TaskArgs[] = {
1310         UpLoc, ThreadID,
1311         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1312                                     TaskTVar->getType()->castAs<PointerType>())
1313             .getPointer(CGF)};
1314     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1315                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1316                         TaskArgs);
1317   };
1318   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1319                                                             UntiedCodeGen);
1320   CodeGen.setAction(Action);
1321   assert(!ThreadIDVar->getType()->isPointerType() &&
1322          "thread id variable must be of type kmp_int32 for tasks");
1323   const OpenMPDirectiveKind Region =
1324       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1325                                                       : OMPD_task;
1326   const CapturedStmt *CS = D.getCapturedStmt(Region);
1327   bool HasCancel = false;
1328   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1329     HasCancel = TD->hasCancel();
1330   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1333     HasCancel = TD->hasCancel();
1334   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336 
1337   CodeGenFunction CGF(CGM, true);
1338   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1339                                         InnermostKind, HasCancel, Action);
1340   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1341   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1342   if (!Tied)
1343     NumberOfParts = Action.getNumberOfParts();
1344   return Res;
1345 }
1346 
1347 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1348                              const RecordDecl *RD, const CGRecordLayout &RL,
1349                              ArrayRef<llvm::Constant *> Data) {
1350   llvm::StructType *StructTy = RL.getLLVMType();
1351   unsigned PrevIdx = 0;
1352   ConstantInitBuilder CIBuilder(CGM);
1353   const auto *DI = Data.begin();
1354   for (const FieldDecl *FD : RD->fields()) {
1355     unsigned Idx = RL.getLLVMFieldNo(FD);
1356     // Fill the alignment.
1357     for (unsigned I = PrevIdx; I < Idx; ++I)
1358       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1359     PrevIdx = Idx + 1;
1360     Fields.add(*DI);
1361     ++DI;
1362   }
1363 }
1364 
1365 template <class... As>
1366 static llvm::GlobalVariable *
1367 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1368                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1369                    As &&... Args) {
1370   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1371   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1372   ConstantInitBuilder CIBuilder(CGM);
1373   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1374   buildStructValue(Fields, CGM, RD, RL, Data);
1375   return Fields.finishAndCreateGlobal(
1376       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1377       std::forward<As>(Args)...);
1378 }
1379 
1380 template <typename T>
1381 static void
1382 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1383                                          ArrayRef<llvm::Constant *> Data,
1384                                          T &Parent) {
1385   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1386   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1387   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1388   buildStructValue(Fields, CGM, RD, RL, Data);
1389   Fields.finishAndAddTo(Parent);
1390 }
1391 
1392 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1393                                              bool AtCurrentPoint) {
1394   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1395   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1396 
1397   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1398   if (AtCurrentPoint) {
1399     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1400         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1401   } else {
1402     Elem.second.ServiceInsertPt =
1403         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1404     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1405   }
1406 }
1407 
1408 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1409   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1410   if (Elem.second.ServiceInsertPt) {
1411     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1412     Elem.second.ServiceInsertPt = nullptr;
1413     Ptr->eraseFromParent();
1414   }
1415 }
1416 
1417 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1418                                                   SourceLocation Loc,
1419                                                   SmallString<128> &Buffer) {
1420   llvm::raw_svector_ostream OS(Buffer);
1421   // Build debug location
1422   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1423   OS << ";" << PLoc.getFilename() << ";";
1424   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1425     OS << FD->getQualifiedNameAsString();
1426   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1427   return OS.str();
1428 }
1429 
1430 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1431                                                  SourceLocation Loc,
1432                                                  unsigned Flags) {
1433   uint32_t SrcLocStrSize;
1434   llvm::Constant *SrcLocStr;
1435   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1436       Loc.isInvalid()) {
1437     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1438   } else {
1439     std::string FunctionName;
1440     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1441       FunctionName = FD->getQualifiedNameAsString();
1442     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1443     const char *FileName = PLoc.getFilename();
1444     unsigned Line = PLoc.getLine();
1445     unsigned Column = PLoc.getColumn();
1446     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1447                                                 Column, SrcLocStrSize);
1448   }
1449   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1450   return OMPBuilder.getOrCreateIdent(
1451       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1452 }
1453 
1454 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1455                                           SourceLocation Loc) {
1456   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1457   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1458   // the clang invariants used below might be broken.
1459   if (CGM.getLangOpts().OpenMPIRBuilder) {
1460     SmallString<128> Buffer;
1461     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1462     uint32_t SrcLocStrSize;
1463     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1465     return OMPBuilder.getOrCreateThreadID(
1466         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1467   }
1468 
1469   llvm::Value *ThreadID = nullptr;
1470   // Check whether we've already cached a load of the thread id in this
1471   // function.
1472   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473   if (I != OpenMPLocThreadIDMap.end()) {
1474     ThreadID = I->second.ThreadID;
1475     if (ThreadID != nullptr)
1476       return ThreadID;
1477   }
1478   // If exceptions are enabled, do not use parameter to avoid possible crash.
1479   if (auto *OMPRegionInfo =
1480           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481     if (OMPRegionInfo->getThreadIDVariable()) {
1482       // Check if this an outlined function with thread id passed as argument.
1483       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486           !CGF.getLangOpts().CXXExceptions ||
1487           CGF.Builder.GetInsertBlock() == TopBlock ||
1488           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490               TopBlock ||
1491           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492               CGF.Builder.GetInsertBlock()) {
1493         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494         // If value loaded in entry block, cache it and use it everywhere in
1495         // function.
1496         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498           Elem.second.ThreadID = ThreadID;
1499         }
1500         return ThreadID;
1501       }
1502     }
1503   }
1504 
1505   // This is not an outlined function region - need to call __kmpc_int32
1506   // kmpc_global_thread_num(ident_t *loc).
1507   // Generate thread id value and cache this value for use across the
1508   // function.
1509   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510   if (!Elem.second.ServiceInsertPt)
1511     setLocThreadIdInsertPt(CGF);
1512   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514   llvm::CallInst *Call = CGF.Builder.CreateCall(
1515       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516                                             OMPRTL___kmpc_global_thread_num),
1517       emitUpdateLocation(CGF, Loc));
1518   Call->setCallingConv(CGF.getRuntimeCC());
1519   Elem.second.ThreadID = Call;
1520   return Call;
1521 }
1522 
1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526     clearLocThreadIdInsertPt(CGF);
1527     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528   }
1529   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531       UDRMap.erase(D);
1532     FunctionUDRMap.erase(CGF.CurFn);
1533   }
1534   auto I = FunctionUDMMap.find(CGF.CurFn);
1535   if (I != FunctionUDMMap.end()) {
1536     for(const auto *D : I->second)
1537       UDMMap.erase(D);
1538     FunctionUDMMap.erase(I);
1539   }
1540   LastprivateConditionalToTypes.erase(CGF.CurFn);
1541   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542 }
1543 
1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545   return OMPBuilder.IdentPtr;
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549   if (!Kmpc_MicroTy) {
1550     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554   }
1555   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556 }
1557 
1558 llvm::FunctionCallee
1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1560                                              bool IsGPUDistribute) {
1561   assert((IVSize == 32 || IVSize == 64) &&
1562          "IV size is not compatible with the omp runtime");
1563   StringRef Name;
1564   if (IsGPUDistribute)
1565     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1566                                     : "__kmpc_distribute_static_init_4u")
1567                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1568                                     : "__kmpc_distribute_static_init_8u");
1569   else
1570     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1571                                     : "__kmpc_for_static_init_4u")
1572                         : (IVSigned ? "__kmpc_for_static_init_8"
1573                                     : "__kmpc_for_static_init_8u");
1574 
1575   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1576   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1577   llvm::Type *TypeParams[] = {
1578     getIdentTyPointerTy(),                     // loc
1579     CGM.Int32Ty,                               // tid
1580     CGM.Int32Ty,                               // schedtype
1581     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1582     PtrTy,                                     // p_lower
1583     PtrTy,                                     // p_upper
1584     PtrTy,                                     // p_stride
1585     ITy,                                       // incr
1586     ITy                                        // chunk
1587   };
1588   auto *FnTy =
1589       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1590   return CGM.CreateRuntimeFunction(FnTy, Name);
1591 }
1592 
1593 llvm::FunctionCallee
1594 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1595   assert((IVSize == 32 || IVSize == 64) &&
1596          "IV size is not compatible with the omp runtime");
1597   StringRef Name =
1598       IVSize == 32
1599           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1600           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1601   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1602   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1603                                CGM.Int32Ty,           // tid
1604                                CGM.Int32Ty,           // schedtype
1605                                ITy,                   // lower
1606                                ITy,                   // upper
1607                                ITy,                   // stride
1608                                ITy                    // chunk
1609   };
1610   auto *FnTy =
1611       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1612   return CGM.CreateRuntimeFunction(FnTy, Name);
1613 }
1614 
1615 llvm::FunctionCallee
1616 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1617   assert((IVSize == 32 || IVSize == 64) &&
1618          "IV size is not compatible with the omp runtime");
1619   StringRef Name =
1620       IVSize == 32
1621           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1622           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1623   llvm::Type *TypeParams[] = {
1624       getIdentTyPointerTy(), // loc
1625       CGM.Int32Ty,           // tid
1626   };
1627   auto *FnTy =
1628       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1629   return CGM.CreateRuntimeFunction(FnTy, Name);
1630 }
1631 
1632 llvm::FunctionCallee
1633 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1634   assert((IVSize == 32 || IVSize == 64) &&
1635          "IV size is not compatible with the omp runtime");
1636   StringRef Name =
1637       IVSize == 32
1638           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1639           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1640   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1641   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1642   llvm::Type *TypeParams[] = {
1643     getIdentTyPointerTy(),                     // loc
1644     CGM.Int32Ty,                               // tid
1645     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1646     PtrTy,                                     // p_lower
1647     PtrTy,                                     // p_upper
1648     PtrTy                                      // p_stride
1649   };
1650   auto *FnTy =
1651       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1652   return CGM.CreateRuntimeFunction(FnTy, Name);
1653 }
1654 
1655 /// Obtain information that uniquely identifies a target entry. This
1656 /// consists of the file and device IDs as well as line number associated with
1657 /// the relevant entry source location.
1658 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1659                                      unsigned &DeviceID, unsigned &FileID,
1660                                      unsigned &LineNum) {
1661   SourceManager &SM = C.getSourceManager();
1662 
1663   // The loc should be always valid and have a file ID (the user cannot use
1664   // #pragma directives in macros)
1665 
1666   assert(Loc.isValid() && "Source location is expected to be always valid.");
1667 
1668   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1669   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670 
1671   llvm::sys::fs::UniqueID ID;
1672   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1673     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1674     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1675     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1676       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1677           << PLoc.getFilename() << EC.message();
1678   }
1679 
1680   DeviceID = ID.getDevice();
1681   FileID = ID.getFile();
1682   LineNum = PLoc.getLine();
1683 }
1684 
1685 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1686   if (CGM.getLangOpts().OpenMPSimd)
1687     return Address::invalid();
1688   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1689       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1690   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1691               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1692                HasRequiresUnifiedSharedMemory))) {
1693     SmallString<64> PtrName;
1694     {
1695       llvm::raw_svector_ostream OS(PtrName);
1696       OS << CGM.getMangledName(GlobalDecl(VD));
1697       if (!VD->isExternallyVisible()) {
1698         unsigned DeviceID, FileID, Line;
1699         getTargetEntryUniqueInfo(CGM.getContext(),
1700                                  VD->getCanonicalDecl()->getBeginLoc(),
1701                                  DeviceID, FileID, Line);
1702         OS << llvm::format("_%x", FileID);
1703       }
1704       OS << "_decl_tgt_ref_ptr";
1705     }
1706     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1707     if (!Ptr) {
1708       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1709       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1710                                         PtrName);
1711 
1712       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1713       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1714 
1715       if (!CGM.getLangOpts().OpenMPIsDevice)
1716         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1717       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1718     }
1719     return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD));
1720   }
1721   return Address::invalid();
1722 }
1723 
1724 llvm::Constant *
1725 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1726   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1727          !CGM.getContext().getTargetInfo().isTLSSupported());
1728   // Lookup the entry, lazily creating it if necessary.
1729   std::string Suffix = getName({"cache", ""});
1730   return getOrCreateInternalVariable(
1731       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1732 }
1733 
1734 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1735                                                 const VarDecl *VD,
1736                                                 Address VDAddr,
1737                                                 SourceLocation Loc) {
1738   if (CGM.getLangOpts().OpenMPUseTLS &&
1739       CGM.getContext().getTargetInfo().isTLSSupported())
1740     return VDAddr;
1741 
1742   llvm::Type *VarTy = VDAddr.getElementType();
1743   llvm::Value *Args[] = {
1744       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1745       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1746       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1747       getOrCreateThreadPrivateCache(VD)};
1748   return Address::deprecated(
1749       CGF.EmitRuntimeCall(
1750           OMPBuilder.getOrCreateRuntimeFunction(
1751               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1752           Args),
1753       VDAddr.getAlignment());
1754 }
1755 
1756 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1757     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1758     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1759   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1760   // library.
1761   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1762   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1763                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1764                       OMPLoc);
1765   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1766   // to register constructor/destructor for variable.
1767   llvm::Value *Args[] = {
1768       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1769       Ctor, CopyCtor, Dtor};
1770   CGF.EmitRuntimeCall(
1771       OMPBuilder.getOrCreateRuntimeFunction(
1772           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1773       Args);
1774 }
1775 
1776 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1777     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1778     bool PerformInit, CodeGenFunction *CGF) {
1779   if (CGM.getLangOpts().OpenMPUseTLS &&
1780       CGM.getContext().getTargetInfo().isTLSSupported())
1781     return nullptr;
1782 
1783   VD = VD->getDefinition(CGM.getContext());
1784   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1785     QualType ASTTy = VD->getType();
1786 
1787     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1788     const Expr *Init = VD->getAnyInitializer();
1789     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1790       // Generate function that re-emits the declaration's initializer into the
1791       // threadprivate copy of the variable VD
1792       CodeGenFunction CtorCGF(CGM);
1793       FunctionArgList Args;
1794       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1795                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1796                             ImplicitParamDecl::Other);
1797       Args.push_back(&Dst);
1798 
1799       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1800           CGM.getContext().VoidPtrTy, Args);
1801       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1802       std::string Name = getName({"__kmpc_global_ctor_", ""});
1803       llvm::Function *Fn =
1804           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1805       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1806                             Args, Loc, Loc);
1807       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1808           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1809           CGM.getContext().VoidPtrTy, Dst.getLocation());
1810       Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment());
1811       Arg = CtorCGF.Builder.CreateElementBitCast(
1812           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1813       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1814                                /*IsInitializer=*/true);
1815       ArgVal = CtorCGF.EmitLoadOfScalar(
1816           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1817           CGM.getContext().VoidPtrTy, Dst.getLocation());
1818       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1819       CtorCGF.FinishFunction();
1820       Ctor = Fn;
1821     }
1822     if (VD->getType().isDestructedType() != QualType::DK_none) {
1823       // Generate function that emits destructor call for the threadprivate copy
1824       // of the variable VD
1825       CodeGenFunction DtorCGF(CGM);
1826       FunctionArgList Args;
1827       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1828                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1829                             ImplicitParamDecl::Other);
1830       Args.push_back(&Dst);
1831 
1832       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1833           CGM.getContext().VoidTy, Args);
1834       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1835       std::string Name = getName({"__kmpc_global_dtor_", ""});
1836       llvm::Function *Fn =
1837           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1838       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1839       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1840                             Loc, Loc);
1841       // Create a scope with an artificial location for the body of this function.
1842       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1843       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1844           DtorCGF.GetAddrOfLocalVar(&Dst),
1845           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1846       DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()),
1847                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1848                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1849       DtorCGF.FinishFunction();
1850       Dtor = Fn;
1851     }
1852     // Do not emit init function if it is not required.
1853     if (!Ctor && !Dtor)
1854       return nullptr;
1855 
1856     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1857     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1858                                                /*isVarArg=*/false)
1859                            ->getPointerTo();
1860     // Copying constructor for the threadprivate variable.
1861     // Must be NULL - reserved by runtime, but currently it requires that this
1862     // parameter is always NULL. Otherwise it fires assertion.
1863     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1864     if (Ctor == nullptr) {
1865       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1866                                              /*isVarArg=*/false)
1867                          ->getPointerTo();
1868       Ctor = llvm::Constant::getNullValue(CtorTy);
1869     }
1870     if (Dtor == nullptr) {
1871       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1872                                              /*isVarArg=*/false)
1873                          ->getPointerTo();
1874       Dtor = llvm::Constant::getNullValue(DtorTy);
1875     }
1876     if (!CGF) {
1877       auto *InitFunctionTy =
1878           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1879       std::string Name = getName({"__omp_threadprivate_init_", ""});
1880       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1881           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1882       CodeGenFunction InitCGF(CGM);
1883       FunctionArgList ArgList;
1884       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1885                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1886                             Loc, Loc);
1887       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1888       InitCGF.FinishFunction();
1889       return InitFunction;
1890     }
1891     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1892   }
1893   return nullptr;
1894 }
1895 
1896 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1897                                                      llvm::GlobalVariable *Addr,
1898                                                      bool PerformInit) {
1899   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1900       !CGM.getLangOpts().OpenMPIsDevice)
1901     return false;
1902   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1903       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1904   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1905       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1906        HasRequiresUnifiedSharedMemory))
1907     return CGM.getLangOpts().OpenMPIsDevice;
1908   VD = VD->getDefinition(CGM.getContext());
1909   assert(VD && "Unknown VarDecl");
1910 
1911   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1912     return CGM.getLangOpts().OpenMPIsDevice;
1913 
1914   QualType ASTTy = VD->getType();
1915   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1916 
1917   // Produce the unique prefix to identify the new target regions. We use
1918   // the source location of the variable declaration which we know to not
1919   // conflict with any target region.
1920   unsigned DeviceID;
1921   unsigned FileID;
1922   unsigned Line;
1923   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1924   SmallString<128> Buffer, Out;
1925   {
1926     llvm::raw_svector_ostream OS(Buffer);
1927     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1928        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1929   }
1930 
1931   const Expr *Init = VD->getAnyInitializer();
1932   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1933     llvm::Constant *Ctor;
1934     llvm::Constant *ID;
1935     if (CGM.getLangOpts().OpenMPIsDevice) {
1936       // Generate function that re-emits the declaration's initializer into
1937       // the threadprivate copy of the variable VD
1938       CodeGenFunction CtorCGF(CGM);
1939 
1940       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1941       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1942       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1943           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1944       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1945       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1946                             FunctionArgList(), Loc, Loc);
1947       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1948       CtorCGF.EmitAnyExprToMem(
1949           Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)),
1950           Init->getType().getQualifiers(),
1951           /*IsInitializer=*/true);
1952       CtorCGF.FinishFunction();
1953       Ctor = Fn;
1954       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1955       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1956     } else {
1957       Ctor = new llvm::GlobalVariable(
1958           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1959           llvm::GlobalValue::PrivateLinkage,
1960           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1961       ID = Ctor;
1962     }
1963 
1964     // Register the information for the entry associated with the constructor.
1965     Out.clear();
1966     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1967         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1968         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1969   }
1970   if (VD->getType().isDestructedType() != QualType::DK_none) {
1971     llvm::Constant *Dtor;
1972     llvm::Constant *ID;
1973     if (CGM.getLangOpts().OpenMPIsDevice) {
1974       // Generate function that emits destructor call for the threadprivate
1975       // copy of the variable VD
1976       CodeGenFunction DtorCGF(CGM);
1977 
1978       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1979       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1980       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1981           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1982       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1983       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1984                             FunctionArgList(), Loc, Loc);
1985       // Create a scope with an artificial location for the body of this
1986       // function.
1987       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1988       DtorCGF.emitDestroy(
1989           Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy,
1990           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1991           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1992       DtorCGF.FinishFunction();
1993       Dtor = Fn;
1994       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1995       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1996     } else {
1997       Dtor = new llvm::GlobalVariable(
1998           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1999           llvm::GlobalValue::PrivateLinkage,
2000           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2001       ID = Dtor;
2002     }
2003     // Register the information for the entry associated with the destructor.
2004     Out.clear();
2005     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2006         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2007         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2008   }
2009   return CGM.getLangOpts().OpenMPIsDevice;
2010 }
2011 
2012 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2013                                                           QualType VarType,
2014                                                           StringRef Name) {
2015   std::string Suffix = getName({"artificial", ""});
2016   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2017   llvm::GlobalVariable *GAddr =
2018       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2019   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2020       CGM.getTarget().isTLSSupported()) {
2021     GAddr->setThreadLocal(/*Val=*/true);
2022     return Address(GAddr, GAddr->getValueType(),
2023                    CGM.getContext().getTypeAlignInChars(VarType));
2024   }
2025   std::string CacheSuffix = getName({"cache", ""});
2026   llvm::Value *Args[] = {
2027       emitUpdateLocation(CGF, SourceLocation()),
2028       getThreadID(CGF, SourceLocation()),
2029       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2030       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2031                                 /*isSigned=*/false),
2032       getOrCreateInternalVariable(
2033           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2034   return Address(
2035       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2036           CGF.EmitRuntimeCall(
2037               OMPBuilder.getOrCreateRuntimeFunction(
2038                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2039               Args),
2040           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2041       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2042 }
2043 
2044 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2045                                    const RegionCodeGenTy &ThenGen,
2046                                    const RegionCodeGenTy &ElseGen) {
2047   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2048 
2049   // If the condition constant folds and can be elided, try to avoid emitting
2050   // the condition and the dead arm of the if/else.
2051   bool CondConstant;
2052   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2053     if (CondConstant)
2054       ThenGen(CGF);
2055     else
2056       ElseGen(CGF);
2057     return;
2058   }
2059 
2060   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2061   // emit the conditional branch.
2062   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2063   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2064   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2065   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2066 
2067   // Emit the 'then' code.
2068   CGF.EmitBlock(ThenBlock);
2069   ThenGen(CGF);
2070   CGF.EmitBranch(ContBlock);
2071   // Emit the 'else' code if present.
2072   // There is no need to emit line number for unconditional branch.
2073   (void)ApplyDebugLocation::CreateEmpty(CGF);
2074   CGF.EmitBlock(ElseBlock);
2075   ElseGen(CGF);
2076   // There is no need to emit line number for unconditional branch.
2077   (void)ApplyDebugLocation::CreateEmpty(CGF);
2078   CGF.EmitBranch(ContBlock);
2079   // Emit the continuation block for code after the if.
2080   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2081 }
2082 
2083 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2084                                        llvm::Function *OutlinedFn,
2085                                        ArrayRef<llvm::Value *> CapturedVars,
2086                                        const Expr *IfCond,
2087                                        llvm::Value *NumThreads) {
2088   if (!CGF.HaveInsertPoint())
2089     return;
2090   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2091   auto &M = CGM.getModule();
2092   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2093                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2094     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2095     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2096     llvm::Value *Args[] = {
2097         RTLoc,
2098         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2099         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2100     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2101     RealArgs.append(std::begin(Args), std::end(Args));
2102     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2103 
2104     llvm::FunctionCallee RTLFn =
2105         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2106     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2107   };
2108   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2109                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2110     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2111     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2112     // Build calls:
2113     // __kmpc_serialized_parallel(&Loc, GTid);
2114     llvm::Value *Args[] = {RTLoc, ThreadID};
2115     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2116                             M, OMPRTL___kmpc_serialized_parallel),
2117                         Args);
2118 
2119     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2120     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2121     Address ZeroAddrBound =
2122         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2123                                          /*Name=*/".bound.zero.addr");
2124     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2125     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2126     // ThreadId for serialized parallels is 0.
2127     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2128     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2129     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2130 
2131     // Ensure we do not inline the function. This is trivially true for the ones
2132     // passed to __kmpc_fork_call but the ones called in serialized regions
2133     // could be inlined. This is not a perfect but it is closer to the invariant
2134     // we want, namely, every data environment starts with a new function.
2135     // TODO: We should pass the if condition to the runtime function and do the
2136     //       handling there. Much cleaner code.
2137     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2138     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2139     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2140 
2141     // __kmpc_end_serialized_parallel(&Loc, GTid);
2142     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2143     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2144                             M, OMPRTL___kmpc_end_serialized_parallel),
2145                         EndArgs);
2146   };
2147   if (IfCond) {
2148     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2149   } else {
2150     RegionCodeGenTy ThenRCG(ThenGen);
2151     ThenRCG(CGF);
2152   }
2153 }
2154 
2155 // If we're inside an (outlined) parallel region, use the region info's
2156 // thread-ID variable (it is passed in a first argument of the outlined function
2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2158 // regular serial code region, get thread ID by calling kmp_int32
2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2160 // return the address of that temp.
2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2162                                              SourceLocation Loc) {
2163   if (auto *OMPRegionInfo =
2164           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2165     if (OMPRegionInfo->getThreadIDVariable())
2166       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2167 
2168   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2169   QualType Int32Ty =
2170       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2171   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2172   CGF.EmitStoreOfScalar(ThreadID,
2173                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2174 
2175   return ThreadIDTemp;
2176 }
2177 
2178 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2179     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2180   SmallString<256> Buffer;
2181   llvm::raw_svector_ostream Out(Buffer);
2182   Out << Name;
2183   StringRef RuntimeName = Out.str();
2184   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2185   if (Elem.second) {
2186     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2187            "OMP internal variable has different type than requested");
2188     return &*Elem.second;
2189   }
2190 
2191   return Elem.second = new llvm::GlobalVariable(
2192              CGM.getModule(), Ty, /*IsConstant*/ false,
2193              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2194              Elem.first(), /*InsertBefore=*/nullptr,
2195              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2196 }
2197 
2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2199   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2200   std::string Name = getName({Prefix, "var"});
2201   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2202 }
2203 
2204 namespace {
2205 /// Common pre(post)-action for different OpenMP constructs.
2206 class CommonActionTy final : public PrePostActionTy {
2207   llvm::FunctionCallee EnterCallee;
2208   ArrayRef<llvm::Value *> EnterArgs;
2209   llvm::FunctionCallee ExitCallee;
2210   ArrayRef<llvm::Value *> ExitArgs;
2211   bool Conditional;
2212   llvm::BasicBlock *ContBlock = nullptr;
2213 
2214 public:
2215   CommonActionTy(llvm::FunctionCallee EnterCallee,
2216                  ArrayRef<llvm::Value *> EnterArgs,
2217                  llvm::FunctionCallee ExitCallee,
2218                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2219       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2220         ExitArgs(ExitArgs), Conditional(Conditional) {}
2221   void Enter(CodeGenFunction &CGF) override {
2222     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2223     if (Conditional) {
2224       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2225       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2226       ContBlock = CGF.createBasicBlock("omp_if.end");
2227       // Generate the branch (If-stmt)
2228       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2229       CGF.EmitBlock(ThenBlock);
2230     }
2231   }
2232   void Done(CodeGenFunction &CGF) {
2233     // Emit the rest of blocks/branches
2234     CGF.EmitBranch(ContBlock);
2235     CGF.EmitBlock(ContBlock, true);
2236   }
2237   void Exit(CodeGenFunction &CGF) override {
2238     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2239   }
2240 };
2241 } // anonymous namespace
2242 
2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2244                                          StringRef CriticalName,
2245                                          const RegionCodeGenTy &CriticalOpGen,
2246                                          SourceLocation Loc, const Expr *Hint) {
2247   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2248   // CriticalOpGen();
2249   // __kmpc_end_critical(ident_t *, gtid, Lock);
2250   // Prepare arguments and build a call to __kmpc_critical
2251   if (!CGF.HaveInsertPoint())
2252     return;
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2254                          getCriticalRegionLock(CriticalName)};
2255   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2256                                                 std::end(Args));
2257   if (Hint) {
2258     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2259         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2260   }
2261   CommonActionTy Action(
2262       OMPBuilder.getOrCreateRuntimeFunction(
2263           CGM.getModule(),
2264           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2265       EnterArgs,
2266       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2267                                             OMPRTL___kmpc_end_critical),
2268       Args);
2269   CriticalOpGen.setAction(Action);
2270   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2271 }
2272 
2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2274                                        const RegionCodeGenTy &MasterOpGen,
2275                                        SourceLocation Loc) {
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   // if(__kmpc_master(ident_t *, gtid)) {
2279   //   MasterOpGen();
2280   //   __kmpc_end_master(ident_t *, gtid);
2281   // }
2282   // Prepare arguments and build a call to __kmpc_master
2283   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2284   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_master),
2286                         Args,
2287                         OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_end_master),
2289                         Args,
2290                         /*Conditional=*/true);
2291   MasterOpGen.setAction(Action);
2292   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2293   Action.Done(CGF);
2294 }
2295 
2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2297                                        const RegionCodeGenTy &MaskedOpGen,
2298                                        SourceLocation Loc, const Expr *Filter) {
2299   if (!CGF.HaveInsertPoint())
2300     return;
2301   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2302   //   MaskedOpGen();
2303   //   __kmpc_end_masked(iden_t *, gtid);
2304   // }
2305   // Prepare arguments and build a call to __kmpc_masked
2306   llvm::Value *FilterVal = Filter
2307                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2308                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2309   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2310                          FilterVal};
2311   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2312                             getThreadID(CGF, Loc)};
2313   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_masked),
2315                         Args,
2316                         OMPBuilder.getOrCreateRuntimeFunction(
2317                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2318                         ArgsEnd,
2319                         /*Conditional=*/true);
2320   MaskedOpGen.setAction(Action);
2321   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2322   Action.Done(CGF);
2323 }
2324 
2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2326                                         SourceLocation Loc) {
2327   if (!CGF.HaveInsertPoint())
2328     return;
2329   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330     OMPBuilder.createTaskyield(CGF.Builder);
2331   } else {
2332     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2333     llvm::Value *Args[] = {
2334         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2335         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2336     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2337                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2338                         Args);
2339   }
2340 
2341   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2342     Region->emitUntiedSwitch(CGF);
2343 }
2344 
2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2346                                           const RegionCodeGenTy &TaskgroupOpGen,
2347                                           SourceLocation Loc) {
2348   if (!CGF.HaveInsertPoint())
2349     return;
2350   // __kmpc_taskgroup(ident_t *, gtid);
2351   // TaskgroupOpGen();
2352   // __kmpc_end_taskgroup(ident_t *, gtid);
2353   // Prepare arguments and build a call to __kmpc_taskgroup
2354   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2355   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2357                         Args,
2358                         OMPBuilder.getOrCreateRuntimeFunction(
2359                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2360                         Args);
2361   TaskgroupOpGen.setAction(Action);
2362   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2363 }
2364 
2365 /// Given an array of pointers to variables, project the address of a
2366 /// given variable.
2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2368                                       unsigned Index, const VarDecl *Var) {
2369   // Pull out the pointer to the variable.
2370   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2371   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2372 
2373   Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var));
2374   Addr = CGF.Builder.CreateElementBitCast(
2375       Addr, CGF.ConvertTypeForMem(Var->getType()));
2376   return Addr;
2377 }
2378 
2379 static llvm::Value *emitCopyprivateCopyFunction(
2380     CodeGenModule &CGM, llvm::Type *ArgsType,
2381     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2382     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2383     SourceLocation Loc) {
2384   ASTContext &C = CGM.getContext();
2385   // void copy_func(void *LHSArg, void *RHSArg);
2386   FunctionArgList Args;
2387   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390                            ImplicitParamDecl::Other);
2391   Args.push_back(&LHSArg);
2392   Args.push_back(&RHSArg);
2393   const auto &CGFI =
2394       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2395   std::string Name =
2396       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2397   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2398                                     llvm::GlobalValue::InternalLinkage, Name,
2399                                     &CGM.getModule());
2400   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2401   Fn->setDoesNotRecurse();
2402   CodeGenFunction CGF(CGM);
2403   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2404   // Dest = (void*[n])(LHSArg);
2405   // Src = (void*[n])(RHSArg);
2406   Address LHS = Address::deprecated(
2407       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2408           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
2409       CGF.getPointerAlign());
2410   Address RHS = Address::deprecated(
2411       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2412           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
2413       CGF.getPointerAlign());
2414   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2415   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2416   // ...
2417   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2418   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2419     const auto *DestVar =
2420         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2421     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2422 
2423     const auto *SrcVar =
2424         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2425     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2426 
2427     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2428     QualType Type = VD->getType();
2429     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2430   }
2431   CGF.FinishFunction();
2432   return Fn;
2433 }
2434 
2435 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2436                                        const RegionCodeGenTy &SingleOpGen,
2437                                        SourceLocation Loc,
2438                                        ArrayRef<const Expr *> CopyprivateVars,
2439                                        ArrayRef<const Expr *> SrcExprs,
2440                                        ArrayRef<const Expr *> DstExprs,
2441                                        ArrayRef<const Expr *> AssignmentOps) {
2442   if (!CGF.HaveInsertPoint())
2443     return;
2444   assert(CopyprivateVars.size() == SrcExprs.size() &&
2445          CopyprivateVars.size() == DstExprs.size() &&
2446          CopyprivateVars.size() == AssignmentOps.size());
2447   ASTContext &C = CGM.getContext();
2448   // int32 did_it = 0;
2449   // if(__kmpc_single(ident_t *, gtid)) {
2450   //   SingleOpGen();
2451   //   __kmpc_end_single(ident_t *, gtid);
2452   //   did_it = 1;
2453   // }
2454   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2455   // <copy_func>, did_it);
2456 
2457   Address DidIt = Address::invalid();
2458   if (!CopyprivateVars.empty()) {
2459     // int32 did_it = 0;
2460     QualType KmpInt32Ty =
2461         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2462     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2463     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2464   }
2465   // Prepare arguments and build a call to __kmpc_single
2466   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2467   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2468                             CGM.getModule(), OMPRTL___kmpc_single),
2469                         Args,
2470                         OMPBuilder.getOrCreateRuntimeFunction(
2471                             CGM.getModule(), OMPRTL___kmpc_end_single),
2472                         Args,
2473                         /*Conditional=*/true);
2474   SingleOpGen.setAction(Action);
2475   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2476   if (DidIt.isValid()) {
2477     // did_it = 1;
2478     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2479   }
2480   Action.Done(CGF);
2481   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2482   // <copy_func>, did_it);
2483   if (DidIt.isValid()) {
2484     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2485     QualType CopyprivateArrayTy = C.getConstantArrayType(
2486         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2487         /*IndexTypeQuals=*/0);
2488     // Create a list of all private variables for copyprivate.
2489     Address CopyprivateList =
2490         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2491     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2492       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2493       CGF.Builder.CreateStore(
2494           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2495               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2496               CGF.VoidPtrTy),
2497           Elem);
2498     }
2499     // Build function that copies private values from single region to all other
2500     // threads in the corresponding parallel region.
2501     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2502         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2503         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2504     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2505     Address CL =
2506       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2507                                                       CGF.VoidPtrTy);
2508     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2509     llvm::Value *Args[] = {
2510         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2511         getThreadID(CGF, Loc),        // i32 <gtid>
2512         BufSize,                      // size_t <buf_size>
2513         CL.getPointer(),              // void *<copyprivate list>
2514         CpyFn,                        // void (*) (void *, void *) <copy_func>
2515         DidItVal                      // i32 did_it
2516     };
2517     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2518                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2519                         Args);
2520   }
2521 }
2522 
2523 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2524                                         const RegionCodeGenTy &OrderedOpGen,
2525                                         SourceLocation Loc, bool IsThreads) {
2526   if (!CGF.HaveInsertPoint())
2527     return;
2528   // __kmpc_ordered(ident_t *, gtid);
2529   // OrderedOpGen();
2530   // __kmpc_end_ordered(ident_t *, gtid);
2531   // Prepare arguments and build a call to __kmpc_ordered
2532   if (IsThreads) {
2533     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2534     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2535                               CGM.getModule(), OMPRTL___kmpc_ordered),
2536                           Args,
2537                           OMPBuilder.getOrCreateRuntimeFunction(
2538                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2539                           Args);
2540     OrderedOpGen.setAction(Action);
2541     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2542     return;
2543   }
2544   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2545 }
2546 
2547 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2548   unsigned Flags;
2549   if (Kind == OMPD_for)
2550     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2551   else if (Kind == OMPD_sections)
2552     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2553   else if (Kind == OMPD_single)
2554     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2555   else if (Kind == OMPD_barrier)
2556     Flags = OMP_IDENT_BARRIER_EXPL;
2557   else
2558     Flags = OMP_IDENT_BARRIER_IMPL;
2559   return Flags;
2560 }
2561 
2562 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2563     CodeGenFunction &CGF, const OMPLoopDirective &S,
2564     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2565   // Check if the loop directive is actually a doacross loop directive. In this
2566   // case choose static, 1 schedule.
2567   if (llvm::any_of(
2568           S.getClausesOfKind<OMPOrderedClause>(),
2569           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2570     ScheduleKind = OMPC_SCHEDULE_static;
2571     // Chunk size is 1 in this case.
2572     llvm::APInt ChunkSize(32, 1);
2573     ChunkExpr = IntegerLiteral::Create(
2574         CGF.getContext(), ChunkSize,
2575         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2576         SourceLocation());
2577   }
2578 }
2579 
2580 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2581                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2582                                       bool ForceSimpleCall) {
2583   // Check if we should use the OMPBuilder
2584   auto *OMPRegionInfo =
2585       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2586   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2587     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2588         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2589     return;
2590   }
2591 
2592   if (!CGF.HaveInsertPoint())
2593     return;
2594   // Build call __kmpc_cancel_barrier(loc, thread_id);
2595   // Build call __kmpc_barrier(loc, thread_id);
2596   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2597   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2598   // thread_id);
2599   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2600                          getThreadID(CGF, Loc)};
2601   if (OMPRegionInfo) {
2602     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2603       llvm::Value *Result = CGF.EmitRuntimeCall(
2604           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2605                                                 OMPRTL___kmpc_cancel_barrier),
2606           Args);
2607       if (EmitChecks) {
2608         // if (__kmpc_cancel_barrier()) {
2609         //   exit from construct;
2610         // }
2611         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2612         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2613         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2614         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2615         CGF.EmitBlock(ExitBB);
2616         //   exit from construct;
2617         CodeGenFunction::JumpDest CancelDestination =
2618             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2619         CGF.EmitBranchThroughCleanup(CancelDestination);
2620         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2621       }
2622       return;
2623     }
2624   }
2625   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2626                           CGM.getModule(), OMPRTL___kmpc_barrier),
2627                       Args);
2628 }
2629 
2630 /// Map the OpenMP loop schedule to the runtime enumeration.
2631 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2632                                           bool Chunked, bool Ordered) {
2633   switch (ScheduleKind) {
2634   case OMPC_SCHEDULE_static:
2635     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2636                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2637   case OMPC_SCHEDULE_dynamic:
2638     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2639   case OMPC_SCHEDULE_guided:
2640     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2641   case OMPC_SCHEDULE_runtime:
2642     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2643   case OMPC_SCHEDULE_auto:
2644     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2645   case OMPC_SCHEDULE_unknown:
2646     assert(!Chunked && "chunk was specified but schedule kind not known");
2647     return Ordered ? OMP_ord_static : OMP_sch_static;
2648   }
2649   llvm_unreachable("Unexpected runtime schedule");
2650 }
2651 
2652 /// Map the OpenMP distribute schedule to the runtime enumeration.
2653 static OpenMPSchedType
2654 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2655   // only static is allowed for dist_schedule
2656   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2657 }
2658 
2659 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2660                                          bool Chunked) const {
2661   OpenMPSchedType Schedule =
2662       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2663   return Schedule == OMP_sch_static;
2664 }
2665 
2666 bool CGOpenMPRuntime::isStaticNonchunked(
2667     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2668   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2669   return Schedule == OMP_dist_sch_static;
2670 }
2671 
2672 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2673                                       bool Chunked) const {
2674   OpenMPSchedType Schedule =
2675       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2676   return Schedule == OMP_sch_static_chunked;
2677 }
2678 
2679 bool CGOpenMPRuntime::isStaticChunked(
2680     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2681   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2682   return Schedule == OMP_dist_sch_static_chunked;
2683 }
2684 
2685 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2686   OpenMPSchedType Schedule =
2687       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2688   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2689   return Schedule != OMP_sch_static;
2690 }
2691 
2692 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2693                                   OpenMPScheduleClauseModifier M1,
2694                                   OpenMPScheduleClauseModifier M2) {
2695   int Modifier = 0;
2696   switch (M1) {
2697   case OMPC_SCHEDULE_MODIFIER_monotonic:
2698     Modifier = OMP_sch_modifier_monotonic;
2699     break;
2700   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2701     Modifier = OMP_sch_modifier_nonmonotonic;
2702     break;
2703   case OMPC_SCHEDULE_MODIFIER_simd:
2704     if (Schedule == OMP_sch_static_chunked)
2705       Schedule = OMP_sch_static_balanced_chunked;
2706     break;
2707   case OMPC_SCHEDULE_MODIFIER_last:
2708   case OMPC_SCHEDULE_MODIFIER_unknown:
2709     break;
2710   }
2711   switch (M2) {
2712   case OMPC_SCHEDULE_MODIFIER_monotonic:
2713     Modifier = OMP_sch_modifier_monotonic;
2714     break;
2715   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2716     Modifier = OMP_sch_modifier_nonmonotonic;
2717     break;
2718   case OMPC_SCHEDULE_MODIFIER_simd:
2719     if (Schedule == OMP_sch_static_chunked)
2720       Schedule = OMP_sch_static_balanced_chunked;
2721     break;
2722   case OMPC_SCHEDULE_MODIFIER_last:
2723   case OMPC_SCHEDULE_MODIFIER_unknown:
2724     break;
2725   }
2726   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2727   // If the static schedule kind is specified or if the ordered clause is
2728   // specified, and if the nonmonotonic modifier is not specified, the effect is
2729   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2730   // modifier is specified, the effect is as if the nonmonotonic modifier is
2731   // specified.
2732   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2733     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2734           Schedule == OMP_sch_static_balanced_chunked ||
2735           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2736           Schedule == OMP_dist_sch_static_chunked ||
2737           Schedule == OMP_dist_sch_static))
2738       Modifier = OMP_sch_modifier_nonmonotonic;
2739   }
2740   return Schedule | Modifier;
2741 }
2742 
2743 void CGOpenMPRuntime::emitForDispatchInit(
2744     CodeGenFunction &CGF, SourceLocation Loc,
2745     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2746     bool Ordered, const DispatchRTInput &DispatchValues) {
2747   if (!CGF.HaveInsertPoint())
2748     return;
2749   OpenMPSchedType Schedule = getRuntimeSchedule(
2750       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2751   assert(Ordered ||
2752          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2753           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2754           Schedule != OMP_sch_static_balanced_chunked));
2755   // Call __kmpc_dispatch_init(
2756   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2757   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2758   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2759 
2760   // If the Chunk was not specified in the clause - use default value 1.
2761   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2762                                             : CGF.Builder.getIntN(IVSize, 1);
2763   llvm::Value *Args[] = {
2764       emitUpdateLocation(CGF, Loc),
2765       getThreadID(CGF, Loc),
2766       CGF.Builder.getInt32(addMonoNonMonoModifier(
2767           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2768       DispatchValues.LB,                                     // Lower
2769       DispatchValues.UB,                                     // Upper
2770       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2771       Chunk                                                  // Chunk
2772   };
2773   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2774 }
2775 
2776 static void emitForStaticInitCall(
2777     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2778     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2779     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2780     const CGOpenMPRuntime::StaticRTInput &Values) {
2781   if (!CGF.HaveInsertPoint())
2782     return;
2783 
2784   assert(!Values.Ordered);
2785   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2786          Schedule == OMP_sch_static_balanced_chunked ||
2787          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2788          Schedule == OMP_dist_sch_static ||
2789          Schedule == OMP_dist_sch_static_chunked);
2790 
2791   // Call __kmpc_for_static_init(
2792   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2793   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2794   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2795   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2796   llvm::Value *Chunk = Values.Chunk;
2797   if (Chunk == nullptr) {
2798     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2799             Schedule == OMP_dist_sch_static) &&
2800            "expected static non-chunked schedule");
2801     // If the Chunk was not specified in the clause - use default value 1.
2802     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2803   } else {
2804     assert((Schedule == OMP_sch_static_chunked ||
2805             Schedule == OMP_sch_static_balanced_chunked ||
2806             Schedule == OMP_ord_static_chunked ||
2807             Schedule == OMP_dist_sch_static_chunked) &&
2808            "expected static chunked schedule");
2809   }
2810   llvm::Value *Args[] = {
2811       UpdateLocation,
2812       ThreadId,
2813       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2814                                                   M2)), // Schedule type
2815       Values.IL.getPointer(),                           // &isLastIter
2816       Values.LB.getPointer(),                           // &LB
2817       Values.UB.getPointer(),                           // &UB
2818       Values.ST.getPointer(),                           // &Stride
2819       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2820       Chunk                                             // Chunk
2821   };
2822   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2823 }
2824 
2825 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2826                                         SourceLocation Loc,
2827                                         OpenMPDirectiveKind DKind,
2828                                         const OpenMPScheduleTy &ScheduleKind,
2829                                         const StaticRTInput &Values) {
2830   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2831       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2832   assert(isOpenMPWorksharingDirective(DKind) &&
2833          "Expected loop-based or sections-based directive.");
2834   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2835                                              isOpenMPLoopDirective(DKind)
2836                                                  ? OMP_IDENT_WORK_LOOP
2837                                                  : OMP_IDENT_WORK_SECTIONS);
2838   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2839   llvm::FunctionCallee StaticInitFunction =
2840       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2841   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2842   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2843                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2844 }
2845 
2846 void CGOpenMPRuntime::emitDistributeStaticInit(
2847     CodeGenFunction &CGF, SourceLocation Loc,
2848     OpenMPDistScheduleClauseKind SchedKind,
2849     const CGOpenMPRuntime::StaticRTInput &Values) {
2850   OpenMPSchedType ScheduleNum =
2851       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2852   llvm::Value *UpdatedLocation =
2853       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2854   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2855   llvm::FunctionCallee StaticInitFunction;
2856   bool isGPUDistribute =
2857       CGM.getLangOpts().OpenMPIsDevice &&
2858       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2859   StaticInitFunction = createForStaticInitFunction(
2860       Values.IVSize, Values.IVSigned, isGPUDistribute);
2861 
2862   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2863                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2864                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2865 }
2866 
2867 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2868                                           SourceLocation Loc,
2869                                           OpenMPDirectiveKind DKind) {
2870   if (!CGF.HaveInsertPoint())
2871     return;
2872   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2873   llvm::Value *Args[] = {
2874       emitUpdateLocation(CGF, Loc,
2875                          isOpenMPDistributeDirective(DKind)
2876                              ? OMP_IDENT_WORK_DISTRIBUTE
2877                              : isOpenMPLoopDirective(DKind)
2878                                    ? OMP_IDENT_WORK_LOOP
2879                                    : OMP_IDENT_WORK_SECTIONS),
2880       getThreadID(CGF, Loc)};
2881   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2882   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2883       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2884     CGF.EmitRuntimeCall(
2885         OMPBuilder.getOrCreateRuntimeFunction(
2886             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2887         Args);
2888   else
2889     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2890                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2891                         Args);
2892 }
2893 
2894 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2895                                                  SourceLocation Loc,
2896                                                  unsigned IVSize,
2897                                                  bool IVSigned) {
2898   if (!CGF.HaveInsertPoint())
2899     return;
2900   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2901   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2902   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2903 }
2904 
2905 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2906                                           SourceLocation Loc, unsigned IVSize,
2907                                           bool IVSigned, Address IL,
2908                                           Address LB, Address UB,
2909                                           Address ST) {
2910   // Call __kmpc_dispatch_next(
2911   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2912   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2913   //          kmp_int[32|64] *p_stride);
2914   llvm::Value *Args[] = {
2915       emitUpdateLocation(CGF, Loc),
2916       getThreadID(CGF, Loc),
2917       IL.getPointer(), // &isLastIter
2918       LB.getPointer(), // &Lower
2919       UB.getPointer(), // &Upper
2920       ST.getPointer()  // &Stride
2921   };
2922   llvm::Value *Call =
2923       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2924   return CGF.EmitScalarConversion(
2925       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2926       CGF.getContext().BoolTy, Loc);
2927 }
2928 
2929 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2930                                            llvm::Value *NumThreads,
2931                                            SourceLocation Loc) {
2932   if (!CGF.HaveInsertPoint())
2933     return;
2934   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2935   llvm::Value *Args[] = {
2936       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2937       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2938   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2939                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2940                       Args);
2941 }
2942 
2943 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2944                                          ProcBindKind ProcBind,
2945                                          SourceLocation Loc) {
2946   if (!CGF.HaveInsertPoint())
2947     return;
2948   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2949   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2950   llvm::Value *Args[] = {
2951       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2952       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2953   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2954                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2955                       Args);
2956 }
2957 
2958 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2959                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2960   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2961     OMPBuilder.createFlush(CGF.Builder);
2962   } else {
2963     if (!CGF.HaveInsertPoint())
2964       return;
2965     // Build call void __kmpc_flush(ident_t *loc)
2966     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2967                             CGM.getModule(), OMPRTL___kmpc_flush),
2968                         emitUpdateLocation(CGF, Loc));
2969   }
2970 }
2971 
2972 namespace {
2973 /// Indexes of fields for type kmp_task_t.
2974 enum KmpTaskTFields {
2975   /// List of shared variables.
2976   KmpTaskTShareds,
2977   /// Task routine.
2978   KmpTaskTRoutine,
2979   /// Partition id for the untied tasks.
2980   KmpTaskTPartId,
2981   /// Function with call of destructors for private variables.
2982   Data1,
2983   /// Task priority.
2984   Data2,
2985   /// (Taskloops only) Lower bound.
2986   KmpTaskTLowerBound,
2987   /// (Taskloops only) Upper bound.
2988   KmpTaskTUpperBound,
2989   /// (Taskloops only) Stride.
2990   KmpTaskTStride,
2991   /// (Taskloops only) Is last iteration flag.
2992   KmpTaskTLastIter,
2993   /// (Taskloops only) Reduction data.
2994   KmpTaskTReductions,
2995 };
2996 } // anonymous namespace
2997 
2998 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2999   return OffloadEntriesTargetRegion.empty() &&
3000          OffloadEntriesDeviceGlobalVar.empty();
3001 }
3002 
3003 /// Initialize target region entry.
3004 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3005     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3006                                     StringRef ParentName, unsigned LineNum,
3007                                     unsigned Order) {
3008   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3009                                              "only required for the device "
3010                                              "code generation.");
3011   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3012       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3013                                    OMPTargetRegionEntryTargetRegion);
3014   ++OffloadingEntriesNum;
3015 }
3016 
3017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3018     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3019                                   StringRef ParentName, unsigned LineNum,
3020                                   llvm::Constant *Addr, llvm::Constant *ID,
3021                                   OMPTargetRegionEntryKind Flags) {
3022   // If we are emitting code for a target, the entry is already initialized,
3023   // only has to be registered.
3024   if (CGM.getLangOpts().OpenMPIsDevice) {
3025     // This could happen if the device compilation is invoked standalone.
3026     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3027       return;
3028     auto &Entry =
3029         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3030     Entry.setAddress(Addr);
3031     Entry.setID(ID);
3032     Entry.setFlags(Flags);
3033   } else {
3034     if (Flags ==
3035             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3036         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3037                                  /*IgnoreAddressId*/ true))
3038       return;
3039     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3040            "Target region entry already registered!");
3041     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3042     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3043     ++OffloadingEntriesNum;
3044   }
3045 }
3046 
3047 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3048     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3049     bool IgnoreAddressId) const {
3050   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3051   if (PerDevice == OffloadEntriesTargetRegion.end())
3052     return false;
3053   auto PerFile = PerDevice->second.find(FileID);
3054   if (PerFile == PerDevice->second.end())
3055     return false;
3056   auto PerParentName = PerFile->second.find(ParentName);
3057   if (PerParentName == PerFile->second.end())
3058     return false;
3059   auto PerLine = PerParentName->second.find(LineNum);
3060   if (PerLine == PerParentName->second.end())
3061     return false;
3062   // Fail if this entry is already registered.
3063   if (!IgnoreAddressId &&
3064       (PerLine->second.getAddress() || PerLine->second.getID()))
3065     return false;
3066   return true;
3067 }
3068 
3069 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3070     const OffloadTargetRegionEntryInfoActTy &Action) {
3071   // Scan all target region entries and perform the provided action.
3072   for (const auto &D : OffloadEntriesTargetRegion)
3073     for (const auto &F : D.second)
3074       for (const auto &P : F.second)
3075         for (const auto &L : P.second)
3076           Action(D.first, F.first, P.first(), L.first, L.second);
3077 }
3078 
3079 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3080     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3081                                        OMPTargetGlobalVarEntryKind Flags,
3082                                        unsigned Order) {
3083   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3084                                              "only required for the device "
3085                                              "code generation.");
3086   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3087   ++OffloadingEntriesNum;
3088 }
3089 
3090 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3091     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3092                                      CharUnits VarSize,
3093                                      OMPTargetGlobalVarEntryKind Flags,
3094                                      llvm::GlobalValue::LinkageTypes Linkage) {
3095   if (CGM.getLangOpts().OpenMPIsDevice) {
3096     // This could happen if the device compilation is invoked standalone.
3097     if (!hasDeviceGlobalVarEntryInfo(VarName))
3098       return;
3099     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3100     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3101       if (Entry.getVarSize().isZero()) {
3102         Entry.setVarSize(VarSize);
3103         Entry.setLinkage(Linkage);
3104       }
3105       return;
3106     }
3107     Entry.setVarSize(VarSize);
3108     Entry.setLinkage(Linkage);
3109     Entry.setAddress(Addr);
3110   } else {
3111     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3112       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3113       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3114              "Entry not initialized!");
3115       if (Entry.getVarSize().isZero()) {
3116         Entry.setVarSize(VarSize);
3117         Entry.setLinkage(Linkage);
3118       }
3119       return;
3120     }
3121     OffloadEntriesDeviceGlobalVar.try_emplace(
3122         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3123     ++OffloadingEntriesNum;
3124   }
3125 }
3126 
3127 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3128     actOnDeviceGlobalVarEntriesInfo(
3129         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3130   // Scan all target region entries and perform the provided action.
3131   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3132     Action(E.getKey(), E.getValue());
3133 }
3134 
3135 void CGOpenMPRuntime::createOffloadEntry(
3136     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3137     llvm::GlobalValue::LinkageTypes Linkage) {
3138   StringRef Name = Addr->getName();
3139   llvm::Module &M = CGM.getModule();
3140   llvm::LLVMContext &C = M.getContext();
3141 
3142   // Create constant string with the name.
3143   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3144 
3145   std::string StringName = getName({"omp_offloading", "entry_name"});
3146   auto *Str = new llvm::GlobalVariable(
3147       M, StrPtrInit->getType(), /*isConstant=*/true,
3148       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3149   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3150 
3151   llvm::Constant *Data[] = {
3152       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3153       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3154       llvm::ConstantInt::get(CGM.SizeTy, Size),
3155       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3156       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3157   std::string EntryName = getName({"omp_offloading", "entry", ""});
3158   llvm::GlobalVariable *Entry = createGlobalStruct(
3159       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3160       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3161 
3162   // The entry has to be created in the section the linker expects it to be.
3163   Entry->setSection("omp_offloading_entries");
3164 }
3165 
3166 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3167   // Emit the offloading entries and metadata so that the device codegen side
3168   // can easily figure out what to emit. The produced metadata looks like
3169   // this:
3170   //
3171   // !omp_offload.info = !{!1, ...}
3172   //
3173   // Right now we only generate metadata for function that contain target
3174   // regions.
3175 
3176   // If we are in simd mode or there are no entries, we don't need to do
3177   // anything.
3178   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3179     return;
3180 
3181   llvm::Module &M = CGM.getModule();
3182   llvm::LLVMContext &C = M.getContext();
3183   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3184                          SourceLocation, StringRef>,
3185               16>
3186       OrderedEntries(OffloadEntriesInfoManager.size());
3187   llvm::SmallVector<StringRef, 16> ParentFunctions(
3188       OffloadEntriesInfoManager.size());
3189 
3190   // Auxiliary methods to create metadata values and strings.
3191   auto &&GetMDInt = [this](unsigned V) {
3192     return llvm::ConstantAsMetadata::get(
3193         llvm::ConstantInt::get(CGM.Int32Ty, V));
3194   };
3195 
3196   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3197 
3198   // Create the offloading info metadata node.
3199   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3200 
3201   // Create function that emits metadata for each target region entry;
3202   auto &&TargetRegionMetadataEmitter =
3203       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3204        &GetMDString](
3205           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3206           unsigned Line,
3207           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3208         // Generate metadata for target regions. Each entry of this metadata
3209         // contains:
3210         // - Entry 0 -> Kind of this type of metadata (0).
3211         // - Entry 1 -> Device ID of the file where the entry was identified.
3212         // - Entry 2 -> File ID of the file where the entry was identified.
3213         // - Entry 3 -> Mangled name of the function where the entry was
3214         // identified.
3215         // - Entry 4 -> Line in the file where the entry was identified.
3216         // - Entry 5 -> Order the entry was created.
3217         // The first element of the metadata node is the kind.
3218         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3219                                  GetMDInt(FileID),      GetMDString(ParentName),
3220                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3221 
3222         SourceLocation Loc;
3223         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3224                   E = CGM.getContext().getSourceManager().fileinfo_end();
3225              I != E; ++I) {
3226           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3227               I->getFirst()->getUniqueID().getFile() == FileID) {
3228             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3229                 I->getFirst(), Line, 1);
3230             break;
3231           }
3232         }
3233         // Save this entry in the right position of the ordered entries array.
3234         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3235         ParentFunctions[E.getOrder()] = ParentName;
3236 
3237         // Add metadata to the named metadata node.
3238         MD->addOperand(llvm::MDNode::get(C, Ops));
3239       };
3240 
3241   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3242       TargetRegionMetadataEmitter);
3243 
3244   // Create function that emits metadata for each device global variable entry;
3245   auto &&DeviceGlobalVarMetadataEmitter =
3246       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3247        MD](StringRef MangledName,
3248            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3249                &E) {
3250         // Generate metadata for global variables. Each entry of this metadata
3251         // contains:
3252         // - Entry 0 -> Kind of this type of metadata (1).
3253         // - Entry 1 -> Mangled name of the variable.
3254         // - Entry 2 -> Declare target kind.
3255         // - Entry 3 -> Order the entry was created.
3256         // The first element of the metadata node is the kind.
3257         llvm::Metadata *Ops[] = {
3258             GetMDInt(E.getKind()), GetMDString(MangledName),
3259             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3260 
3261         // Save this entry in the right position of the ordered entries array.
3262         OrderedEntries[E.getOrder()] =
3263             std::make_tuple(&E, SourceLocation(), MangledName);
3264 
3265         // Add metadata to the named metadata node.
3266         MD->addOperand(llvm::MDNode::get(C, Ops));
3267       };
3268 
3269   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3270       DeviceGlobalVarMetadataEmitter);
3271 
3272   for (const auto &E : OrderedEntries) {
3273     assert(std::get<0>(E) && "All ordered entries must exist!");
3274     if (const auto *CE =
3275             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3276                 std::get<0>(E))) {
3277       if (!CE->getID() || !CE->getAddress()) {
3278         // Do not blame the entry if the parent funtion is not emitted.
3279         StringRef FnName = ParentFunctions[CE->getOrder()];
3280         if (!CGM.GetGlobalValue(FnName))
3281           continue;
3282         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3283             DiagnosticsEngine::Error,
3284             "Offloading entry for target region in %0 is incorrect: either the "
3285             "address or the ID is invalid.");
3286         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3287         continue;
3288       }
3289       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3290                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3291     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3292                                              OffloadEntryInfoDeviceGlobalVar>(
3293                    std::get<0>(E))) {
3294       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3295           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3296               CE->getFlags());
3297       switch (Flags) {
3298       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3299         if (CGM.getLangOpts().OpenMPIsDevice &&
3300             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3301           continue;
3302         if (!CE->getAddress()) {
3303           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3304               DiagnosticsEngine::Error, "Offloading entry for declare target "
3305                                         "variable %0 is incorrect: the "
3306                                         "address is invalid.");
3307           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3308           continue;
3309         }
3310         // The vaiable has no definition - no need to add the entry.
3311         if (CE->getVarSize().isZero())
3312           continue;
3313         break;
3314       }
3315       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3316         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3317                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3318                "Declaret target link address is set.");
3319         if (CGM.getLangOpts().OpenMPIsDevice)
3320           continue;
3321         if (!CE->getAddress()) {
3322           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3323               DiagnosticsEngine::Error,
3324               "Offloading entry for declare target variable is incorrect: the "
3325               "address is invalid.");
3326           CGM.getDiags().Report(DiagID);
3327           continue;
3328         }
3329         break;
3330       }
3331       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3332                          CE->getVarSize().getQuantity(), Flags,
3333                          CE->getLinkage());
3334     } else {
3335       llvm_unreachable("Unsupported entry kind.");
3336     }
3337   }
3338 }
3339 
3340 /// Loads all the offload entries information from the host IR
3341 /// metadata.
3342 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3343   // If we are in target mode, load the metadata from the host IR. This code has
3344   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3345 
3346   if (!CGM.getLangOpts().OpenMPIsDevice)
3347     return;
3348 
3349   if (CGM.getLangOpts().OMPHostIRFile.empty())
3350     return;
3351 
3352   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3353   if (auto EC = Buf.getError()) {
3354     CGM.getDiags().Report(diag::err_cannot_open_file)
3355         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3356     return;
3357   }
3358 
3359   llvm::LLVMContext C;
3360   auto ME = expectedToErrorOrAndEmitErrors(
3361       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3362 
3363   if (auto EC = ME.getError()) {
3364     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3365         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3366     CGM.getDiags().Report(DiagID)
3367         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3368     return;
3369   }
3370 
3371   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3372   if (!MD)
3373     return;
3374 
3375   for (llvm::MDNode *MN : MD->operands()) {
3376     auto &&GetMDInt = [MN](unsigned Idx) {
3377       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3378       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3379     };
3380 
3381     auto &&GetMDString = [MN](unsigned Idx) {
3382       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3383       return V->getString();
3384     };
3385 
3386     switch (GetMDInt(0)) {
3387     default:
3388       llvm_unreachable("Unexpected metadata!");
3389       break;
3390     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3391         OffloadingEntryInfoTargetRegion:
3392       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3393           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3394           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3395           /*Order=*/GetMDInt(5));
3396       break;
3397     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3398         OffloadingEntryInfoDeviceGlobalVar:
3399       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3400           /*MangledName=*/GetMDString(1),
3401           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3402               /*Flags=*/GetMDInt(2)),
3403           /*Order=*/GetMDInt(3));
3404       break;
3405     }
3406   }
3407 }
3408 
3409 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3410   if (!KmpRoutineEntryPtrTy) {
3411     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3412     ASTContext &C = CGM.getContext();
3413     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3414     FunctionProtoType::ExtProtoInfo EPI;
3415     KmpRoutineEntryPtrQTy = C.getPointerType(
3416         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3417     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3418   }
3419 }
3420 
3421 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3422   // Make sure the type of the entry is already created. This is the type we
3423   // have to create:
3424   // struct __tgt_offload_entry{
3425   //   void      *addr;       // Pointer to the offload entry info.
3426   //                          // (function or global)
3427   //   char      *name;       // Name of the function or global.
3428   //   size_t     size;       // Size of the entry info (0 if it a function).
3429   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3430   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3431   // };
3432   if (TgtOffloadEntryQTy.isNull()) {
3433     ASTContext &C = CGM.getContext();
3434     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3435     RD->startDefinition();
3436     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3437     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3438     addFieldToRecordDecl(C, RD, C.getSizeType());
3439     addFieldToRecordDecl(
3440         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3441     addFieldToRecordDecl(
3442         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3443     RD->completeDefinition();
3444     RD->addAttr(PackedAttr::CreateImplicit(C));
3445     TgtOffloadEntryQTy = C.getRecordType(RD);
3446   }
3447   return TgtOffloadEntryQTy;
3448 }
3449 
3450 namespace {
3451 struct PrivateHelpersTy {
3452   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3453                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3454       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3455         PrivateElemInit(PrivateElemInit) {}
3456   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3457   const Expr *OriginalRef = nullptr;
3458   const VarDecl *Original = nullptr;
3459   const VarDecl *PrivateCopy = nullptr;
3460   const VarDecl *PrivateElemInit = nullptr;
3461   bool isLocalPrivate() const {
3462     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3463   }
3464 };
3465 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3466 } // anonymous namespace
3467 
3468 static bool isAllocatableDecl(const VarDecl *VD) {
3469   const VarDecl *CVD = VD->getCanonicalDecl();
3470   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3471     return false;
3472   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3473   // Use the default allocation.
3474   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3475            !AA->getAllocator());
3476 }
3477 
3478 static RecordDecl *
3479 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3480   if (!Privates.empty()) {
3481     ASTContext &C = CGM.getContext();
3482     // Build struct .kmp_privates_t. {
3483     //         /*  private vars  */
3484     //       };
3485     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3486     RD->startDefinition();
3487     for (const auto &Pair : Privates) {
3488       const VarDecl *VD = Pair.second.Original;
3489       QualType Type = VD->getType().getNonReferenceType();
3490       // If the private variable is a local variable with lvalue ref type,
3491       // allocate the pointer instead of the pointee type.
3492       if (Pair.second.isLocalPrivate()) {
3493         if (VD->getType()->isLValueReferenceType())
3494           Type = C.getPointerType(Type);
3495         if (isAllocatableDecl(VD))
3496           Type = C.getPointerType(Type);
3497       }
3498       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3499       if (VD->hasAttrs()) {
3500         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3501              E(VD->getAttrs().end());
3502              I != E; ++I)
3503           FD->addAttr(*I);
3504       }
3505     }
3506     RD->completeDefinition();
3507     return RD;
3508   }
3509   return nullptr;
3510 }
3511 
3512 static RecordDecl *
3513 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3514                          QualType KmpInt32Ty,
3515                          QualType KmpRoutineEntryPointerQTy) {
3516   ASTContext &C = CGM.getContext();
3517   // Build struct kmp_task_t {
3518   //         void *              shareds;
3519   //         kmp_routine_entry_t routine;
3520   //         kmp_int32           part_id;
3521   //         kmp_cmplrdata_t data1;
3522   //         kmp_cmplrdata_t data2;
3523   // For taskloops additional fields:
3524   //         kmp_uint64          lb;
3525   //         kmp_uint64          ub;
3526   //         kmp_int64           st;
3527   //         kmp_int32           liter;
3528   //         void *              reductions;
3529   //       };
3530   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3531   UD->startDefinition();
3532   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3533   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3534   UD->completeDefinition();
3535   QualType KmpCmplrdataTy = C.getRecordType(UD);
3536   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3537   RD->startDefinition();
3538   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3539   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3540   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3541   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3542   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3543   if (isOpenMPTaskLoopDirective(Kind)) {
3544     QualType KmpUInt64Ty =
3545         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3546     QualType KmpInt64Ty =
3547         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3548     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3551     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3552     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3553   }
3554   RD->completeDefinition();
3555   return RD;
3556 }
3557 
3558 static RecordDecl *
3559 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3560                                      ArrayRef<PrivateDataTy> Privates) {
3561   ASTContext &C = CGM.getContext();
3562   // Build struct kmp_task_t_with_privates {
3563   //         kmp_task_t task_data;
3564   //         .kmp_privates_t. privates;
3565   //       };
3566   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3567   RD->startDefinition();
3568   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3569   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3570     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3571   RD->completeDefinition();
3572   return RD;
3573 }
3574 
3575 /// Emit a proxy function which accepts kmp_task_t as the second
3576 /// argument.
3577 /// \code
3578 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3579 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3580 ///   For taskloops:
3581 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3582 ///   tt->reductions, tt->shareds);
3583 ///   return 0;
3584 /// }
3585 /// \endcode
3586 static llvm::Function *
3587 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3588                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3589                       QualType KmpTaskTWithPrivatesPtrQTy,
3590                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3591                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3592                       llvm::Value *TaskPrivatesMap) {
3593   ASTContext &C = CGM.getContext();
3594   FunctionArgList Args;
3595   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3596                             ImplicitParamDecl::Other);
3597   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3598                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3599                                 ImplicitParamDecl::Other);
3600   Args.push_back(&GtidArg);
3601   Args.push_back(&TaskTypeArg);
3602   const auto &TaskEntryFnInfo =
3603       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3604   llvm::FunctionType *TaskEntryTy =
3605       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3606   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3607   auto *TaskEntry = llvm::Function::Create(
3608       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3609   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3610   TaskEntry->setDoesNotRecurse();
3611   CodeGenFunction CGF(CGM);
3612   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3613                     Loc, Loc);
3614 
3615   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3616   // tt,
3617   // For taskloops:
3618   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3619   // tt->task_data.shareds);
3620   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3621       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3622   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3623       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3624       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3625   const auto *KmpTaskTWithPrivatesQTyRD =
3626       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3627   LValue Base =
3628       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3629   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3630   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3631   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3632   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3633 
3634   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3635   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3636   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3637       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3638       CGF.ConvertTypeForMem(SharedsPtrTy));
3639 
3640   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3641   llvm::Value *PrivatesParam;
3642   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3643     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3644     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3645         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3646   } else {
3647     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3648   }
3649 
3650   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3651                                TaskPrivatesMap,
3652                                CGF.Builder
3653                                    .CreatePointerBitCastOrAddrSpaceCast(
3654                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3655                                    .getPointer()};
3656   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3657                                           std::end(CommonArgs));
3658   if (isOpenMPTaskLoopDirective(Kind)) {
3659     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3660     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3661     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3662     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3663     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3664     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3665     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3666     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3667     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3668     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3669     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3670     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3671     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3672     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3673     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3674     CallArgs.push_back(LBParam);
3675     CallArgs.push_back(UBParam);
3676     CallArgs.push_back(StParam);
3677     CallArgs.push_back(LIParam);
3678     CallArgs.push_back(RParam);
3679   }
3680   CallArgs.push_back(SharedsParam);
3681 
3682   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3683                                                   CallArgs);
3684   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3685                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3686   CGF.FinishFunction();
3687   return TaskEntry;
3688 }
3689 
3690 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3691                                             SourceLocation Loc,
3692                                             QualType KmpInt32Ty,
3693                                             QualType KmpTaskTWithPrivatesPtrQTy,
3694                                             QualType KmpTaskTWithPrivatesQTy) {
3695   ASTContext &C = CGM.getContext();
3696   FunctionArgList Args;
3697   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3698                             ImplicitParamDecl::Other);
3699   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3700                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3701                                 ImplicitParamDecl::Other);
3702   Args.push_back(&GtidArg);
3703   Args.push_back(&TaskTypeArg);
3704   const auto &DestructorFnInfo =
3705       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3706   llvm::FunctionType *DestructorFnTy =
3707       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3708   std::string Name =
3709       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3710   auto *DestructorFn =
3711       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3712                              Name, &CGM.getModule());
3713   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3714                                     DestructorFnInfo);
3715   DestructorFn->setDoesNotRecurse();
3716   CodeGenFunction CGF(CGM);
3717   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3718                     Args, Loc, Loc);
3719 
3720   LValue Base = CGF.EmitLoadOfPointerLValue(
3721       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3722       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3723   const auto *KmpTaskTWithPrivatesQTyRD =
3724       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3725   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3726   Base = CGF.EmitLValueForField(Base, *FI);
3727   for (const auto *Field :
3728        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3729     if (QualType::DestructionKind DtorKind =
3730             Field->getType().isDestructedType()) {
3731       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3732       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3733     }
3734   }
3735   CGF.FinishFunction();
3736   return DestructorFn;
3737 }
3738 
3739 /// Emit a privates mapping function for correct handling of private and
3740 /// firstprivate variables.
3741 /// \code
3742 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3743 /// **noalias priv1,...,  <tyn> **noalias privn) {
3744 ///   *priv1 = &.privates.priv1;
3745 ///   ...;
3746 ///   *privn = &.privates.privn;
3747 /// }
3748 /// \endcode
3749 static llvm::Value *
3750 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3751                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3752                                ArrayRef<PrivateDataTy> Privates) {
3753   ASTContext &C = CGM.getContext();
3754   FunctionArgList Args;
3755   ImplicitParamDecl TaskPrivatesArg(
3756       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3757       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3758       ImplicitParamDecl::Other);
3759   Args.push_back(&TaskPrivatesArg);
3760   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3761   unsigned Counter = 1;
3762   for (const Expr *E : Data.PrivateVars) {
3763     Args.push_back(ImplicitParamDecl::Create(
3764         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765         C.getPointerType(C.getPointerType(E->getType()))
3766             .withConst()
3767             .withRestrict(),
3768         ImplicitParamDecl::Other));
3769     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3770     PrivateVarsPos[VD] = Counter;
3771     ++Counter;
3772   }
3773   for (const Expr *E : Data.FirstprivateVars) {
3774     Args.push_back(ImplicitParamDecl::Create(
3775         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3776         C.getPointerType(C.getPointerType(E->getType()))
3777             .withConst()
3778             .withRestrict(),
3779         ImplicitParamDecl::Other));
3780     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3781     PrivateVarsPos[VD] = Counter;
3782     ++Counter;
3783   }
3784   for (const Expr *E : Data.LastprivateVars) {
3785     Args.push_back(ImplicitParamDecl::Create(
3786         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3787         C.getPointerType(C.getPointerType(E->getType()))
3788             .withConst()
3789             .withRestrict(),
3790         ImplicitParamDecl::Other));
3791     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3792     PrivateVarsPos[VD] = Counter;
3793     ++Counter;
3794   }
3795   for (const VarDecl *VD : Data.PrivateLocals) {
3796     QualType Ty = VD->getType().getNonReferenceType();
3797     if (VD->getType()->isLValueReferenceType())
3798       Ty = C.getPointerType(Ty);
3799     if (isAllocatableDecl(VD))
3800       Ty = C.getPointerType(Ty);
3801     Args.push_back(ImplicitParamDecl::Create(
3802         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3803         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3804         ImplicitParamDecl::Other));
3805     PrivateVarsPos[VD] = Counter;
3806     ++Counter;
3807   }
3808   const auto &TaskPrivatesMapFnInfo =
3809       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3810   llvm::FunctionType *TaskPrivatesMapTy =
3811       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3812   std::string Name =
3813       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3814   auto *TaskPrivatesMap = llvm::Function::Create(
3815       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3816       &CGM.getModule());
3817   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3818                                     TaskPrivatesMapFnInfo);
3819   if (CGM.getLangOpts().Optimize) {
3820     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3821     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3822     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3823   }
3824   CodeGenFunction CGF(CGM);
3825   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3826                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3827 
3828   // *privi = &.privates.privi;
3829   LValue Base = CGF.EmitLoadOfPointerLValue(
3830       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3831       TaskPrivatesArg.getType()->castAs<PointerType>());
3832   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3833   Counter = 0;
3834   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3835     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3836     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3837     LValue RefLVal =
3838         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3839     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3840         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3841     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3842     ++Counter;
3843   }
3844   CGF.FinishFunction();
3845   return TaskPrivatesMap;
3846 }
3847 
3848 /// Emit initialization for private variables in task-based directives.
3849 static void emitPrivatesInit(CodeGenFunction &CGF,
3850                              const OMPExecutableDirective &D,
3851                              Address KmpTaskSharedsPtr, LValue TDBase,
3852                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3853                              QualType SharedsTy, QualType SharedsPtrTy,
3854                              const OMPTaskDataTy &Data,
3855                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3856   ASTContext &C = CGF.getContext();
3857   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3858   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3859   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3860                                  ? OMPD_taskloop
3861                                  : OMPD_task;
3862   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3863   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3864   LValue SrcBase;
3865   bool IsTargetTask =
3866       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3867       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3868   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3869   // PointersArray, SizesArray, and MappersArray. The original variables for
3870   // these arrays are not captured and we get their addresses explicitly.
3871   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3872       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3873     SrcBase = CGF.MakeAddrLValue(
3874         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3875             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3876         SharedsTy);
3877   }
3878   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3879   for (const PrivateDataTy &Pair : Privates) {
3880     // Do not initialize private locals.
3881     if (Pair.second.isLocalPrivate()) {
3882       ++FI;
3883       continue;
3884     }
3885     const VarDecl *VD = Pair.second.PrivateCopy;
3886     const Expr *Init = VD->getAnyInitializer();
3887     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3888                              !CGF.isTrivialInitializer(Init)))) {
3889       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3890       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3891         const VarDecl *OriginalVD = Pair.second.Original;
3892         // Check if the variable is the target-based BasePointersArray,
3893         // PointersArray, SizesArray, or MappersArray.
3894         LValue SharedRefLValue;
3895         QualType Type = PrivateLValue.getType();
3896         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3897         if (IsTargetTask && !SharedField) {
3898           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3899                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3900                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3901                          ->getNumParams() == 0 &&
3902                  isa<TranslationUnitDecl>(
3903                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3904                          ->getDeclContext()) &&
3905                  "Expected artificial target data variable.");
3906           SharedRefLValue =
3907               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3908         } else if (ForDup) {
3909           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3910           SharedRefLValue = CGF.MakeAddrLValue(
3911               SharedRefLValue.getAddress(CGF).withAlignment(
3912                   C.getDeclAlign(OriginalVD)),
3913               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3914               SharedRefLValue.getTBAAInfo());
3915         } else if (CGF.LambdaCaptureFields.count(
3916                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3917                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3918           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3919         } else {
3920           // Processing for implicitly captured variables.
3921           InlinedOpenMPRegionRAII Region(
3922               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3923               /*HasCancel=*/false, /*NoInheritance=*/true);
3924           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3925         }
3926         if (Type->isArrayType()) {
3927           // Initialize firstprivate array.
3928           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3929             // Perform simple memcpy.
3930             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3931           } else {
3932             // Initialize firstprivate array using element-by-element
3933             // initialization.
3934             CGF.EmitOMPAggregateAssign(
3935                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3936                 Type,
3937                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3938                                                   Address SrcElement) {
3939                   // Clean up any temporaries needed by the initialization.
3940                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3941                   InitScope.addPrivate(
3942                       Elem, [SrcElement]() -> Address { return SrcElement; });
3943                   (void)InitScope.Privatize();
3944                   // Emit initialization for single element.
3945                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3946                       CGF, &CapturesInfo);
3947                   CGF.EmitAnyExprToMem(Init, DestElement,
3948                                        Init->getType().getQualifiers(),
3949                                        /*IsInitializer=*/false);
3950                 });
3951           }
3952         } else {
3953           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3954           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3955             return SharedRefLValue.getAddress(CGF);
3956           });
3957           (void)InitScope.Privatize();
3958           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3959           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3960                              /*capturedByInit=*/false);
3961         }
3962       } else {
3963         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3964       }
3965     }
3966     ++FI;
3967   }
3968 }
3969 
3970 /// Check if duplication function is required for taskloops.
3971 static bool checkInitIsRequired(CodeGenFunction &CGF,
3972                                 ArrayRef<PrivateDataTy> Privates) {
3973   bool InitRequired = false;
3974   for (const PrivateDataTy &Pair : Privates) {
3975     if (Pair.second.isLocalPrivate())
3976       continue;
3977     const VarDecl *VD = Pair.second.PrivateCopy;
3978     const Expr *Init = VD->getAnyInitializer();
3979     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3980                                     !CGF.isTrivialInitializer(Init));
3981     if (InitRequired)
3982       break;
3983   }
3984   return InitRequired;
3985 }
3986 
3987 
3988 /// Emit task_dup function (for initialization of
3989 /// private/firstprivate/lastprivate vars and last_iter flag)
3990 /// \code
3991 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3992 /// lastpriv) {
3993 /// // setup lastprivate flag
3994 ///    task_dst->last = lastpriv;
3995 /// // could be constructor calls here...
3996 /// }
3997 /// \endcode
3998 static llvm::Value *
3999 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4000                     const OMPExecutableDirective &D,
4001                     QualType KmpTaskTWithPrivatesPtrQTy,
4002                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4003                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4004                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4005                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4006   ASTContext &C = CGM.getContext();
4007   FunctionArgList Args;
4008   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4009                            KmpTaskTWithPrivatesPtrQTy,
4010                            ImplicitParamDecl::Other);
4011   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4012                            KmpTaskTWithPrivatesPtrQTy,
4013                            ImplicitParamDecl::Other);
4014   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4015                                 ImplicitParamDecl::Other);
4016   Args.push_back(&DstArg);
4017   Args.push_back(&SrcArg);
4018   Args.push_back(&LastprivArg);
4019   const auto &TaskDupFnInfo =
4020       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4021   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4022   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4023   auto *TaskDup = llvm::Function::Create(
4024       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4025   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4026   TaskDup->setDoesNotRecurse();
4027   CodeGenFunction CGF(CGM);
4028   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4029                     Loc);
4030 
4031   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4032       CGF.GetAddrOfLocalVar(&DstArg),
4033       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4034   // task_dst->liter = lastpriv;
4035   if (WithLastIter) {
4036     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4037     LValue Base = CGF.EmitLValueForField(
4038         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4039     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4040     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4041         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4042     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4043   }
4044 
4045   // Emit initial values for private copies (if any).
4046   assert(!Privates.empty());
4047   Address KmpTaskSharedsPtr = Address::invalid();
4048   if (!Data.FirstprivateVars.empty()) {
4049     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4050         CGF.GetAddrOfLocalVar(&SrcArg),
4051         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4052     LValue Base = CGF.EmitLValueForField(
4053         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4054     KmpTaskSharedsPtr = Address::deprecated(
4055         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4056                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4057                                                   KmpTaskTShareds)),
4058                              Loc),
4059         CGM.getNaturalTypeAlignment(SharedsTy));
4060   }
4061   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4062                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4063   CGF.FinishFunction();
4064   return TaskDup;
4065 }
4066 
4067 /// Checks if destructor function is required to be generated.
4068 /// \return true if cleanups are required, false otherwise.
4069 static bool
4070 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4071                          ArrayRef<PrivateDataTy> Privates) {
4072   for (const PrivateDataTy &P : Privates) {
4073     if (P.second.isLocalPrivate())
4074       continue;
4075     QualType Ty = P.second.Original->getType().getNonReferenceType();
4076     if (Ty.isDestructedType())
4077       return true;
4078   }
4079   return false;
4080 }
4081 
4082 namespace {
4083 /// Loop generator for OpenMP iterator expression.
4084 class OMPIteratorGeneratorScope final
4085     : public CodeGenFunction::OMPPrivateScope {
4086   CodeGenFunction &CGF;
4087   const OMPIteratorExpr *E = nullptr;
4088   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4089   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4090   OMPIteratorGeneratorScope() = delete;
4091   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4092 
4093 public:
4094   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4095       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4096     if (!E)
4097       return;
4098     SmallVector<llvm::Value *, 4> Uppers;
4099     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4100       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4101       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4102       addPrivate(VD, [&CGF, VD]() {
4103         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4104       });
4105       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4106       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4107         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4108                                  "counter.addr");
4109       });
4110     }
4111     Privatize();
4112 
4113     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4114       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4115       LValue CLVal =
4116           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4117                              HelperData.CounterVD->getType());
4118       // Counter = 0;
4119       CGF.EmitStoreOfScalar(
4120           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4121           CLVal);
4122       CodeGenFunction::JumpDest &ContDest =
4123           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4124       CodeGenFunction::JumpDest &ExitDest =
4125           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4126       // N = <number-of_iterations>;
4127       llvm::Value *N = Uppers[I];
4128       // cont:
4129       // if (Counter < N) goto body; else goto exit;
4130       CGF.EmitBlock(ContDest.getBlock());
4131       auto *CVal =
4132           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4133       llvm::Value *Cmp =
4134           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4135               ? CGF.Builder.CreateICmpSLT(CVal, N)
4136               : CGF.Builder.CreateICmpULT(CVal, N);
4137       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4138       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4139       // body:
4140       CGF.EmitBlock(BodyBB);
4141       // Iteri = Begini + Counter * Stepi;
4142       CGF.EmitIgnoredExpr(HelperData.Update);
4143     }
4144   }
4145   ~OMPIteratorGeneratorScope() {
4146     if (!E)
4147       return;
4148     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4149       // Counter = Counter + 1;
4150       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4151       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4152       // goto cont;
4153       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4154       // exit:
4155       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4156     }
4157   }
4158 };
4159 } // namespace
4160 
4161 static std::pair<llvm::Value *, llvm::Value *>
4162 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4163   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4164   llvm::Value *Addr;
4165   if (OASE) {
4166     const Expr *Base = OASE->getBase();
4167     Addr = CGF.EmitScalarExpr(Base);
4168   } else {
4169     Addr = CGF.EmitLValue(E).getPointer(CGF);
4170   }
4171   llvm::Value *SizeVal;
4172   QualType Ty = E->getType();
4173   if (OASE) {
4174     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4175     for (const Expr *SE : OASE->getDimensions()) {
4176       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4177       Sz = CGF.EmitScalarConversion(
4178           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4179       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4180     }
4181   } else if (const auto *ASE =
4182                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4183     LValue UpAddrLVal =
4184         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4185     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4186     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4187         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4188     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4189     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4190     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4191   } else {
4192     SizeVal = CGF.getTypeSize(Ty);
4193   }
4194   return std::make_pair(Addr, SizeVal);
4195 }
4196 
4197 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4198 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4199   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4200   if (KmpTaskAffinityInfoTy.isNull()) {
4201     RecordDecl *KmpAffinityInfoRD =
4202         C.buildImplicitRecord("kmp_task_affinity_info_t");
4203     KmpAffinityInfoRD->startDefinition();
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4206     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4207     KmpAffinityInfoRD->completeDefinition();
4208     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4209   }
4210 }
4211 
4212 CGOpenMPRuntime::TaskResultTy
4213 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4214                               const OMPExecutableDirective &D,
4215                               llvm::Function *TaskFunction, QualType SharedsTy,
4216                               Address Shareds, const OMPTaskDataTy &Data) {
4217   ASTContext &C = CGM.getContext();
4218   llvm::SmallVector<PrivateDataTy, 4> Privates;
4219   // Aggregate privates and sort them by the alignment.
4220   const auto *I = Data.PrivateCopies.begin();
4221   for (const Expr *E : Data.PrivateVars) {
4222     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4223     Privates.emplace_back(
4224         C.getDeclAlign(VD),
4225         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4226                          /*PrivateElemInit=*/nullptr));
4227     ++I;
4228   }
4229   I = Data.FirstprivateCopies.begin();
4230   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4231   for (const Expr *E : Data.FirstprivateVars) {
4232     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4233     Privates.emplace_back(
4234         C.getDeclAlign(VD),
4235         PrivateHelpersTy(
4236             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4237             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4238     ++I;
4239     ++IElemInitRef;
4240   }
4241   I = Data.LastprivateCopies.begin();
4242   for (const Expr *E : Data.LastprivateVars) {
4243     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4244     Privates.emplace_back(
4245         C.getDeclAlign(VD),
4246         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4247                          /*PrivateElemInit=*/nullptr));
4248     ++I;
4249   }
4250   for (const VarDecl *VD : Data.PrivateLocals) {
4251     if (isAllocatableDecl(VD))
4252       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4253     else
4254       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4255   }
4256   llvm::stable_sort(Privates,
4257                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4258                       return L.first > R.first;
4259                     });
4260   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4261   // Build type kmp_routine_entry_t (if not built yet).
4262   emitKmpRoutineEntryT(KmpInt32Ty);
4263   // Build type kmp_task_t (if not built yet).
4264   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4265     if (SavedKmpTaskloopTQTy.isNull()) {
4266       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4267           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4268     }
4269     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4270   } else {
4271     assert((D.getDirectiveKind() == OMPD_task ||
4272             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4273             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4274            "Expected taskloop, task or target directive");
4275     if (SavedKmpTaskTQTy.isNull()) {
4276       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4277           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4278     }
4279     KmpTaskTQTy = SavedKmpTaskTQTy;
4280   }
4281   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4282   // Build particular struct kmp_task_t for the given task.
4283   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4284       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4285   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4286   QualType KmpTaskTWithPrivatesPtrQTy =
4287       C.getPointerType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4289   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4290       KmpTaskTWithPrivatesTy->getPointerTo();
4291   llvm::Value *KmpTaskTWithPrivatesTySize =
4292       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4293   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4294 
4295   // Emit initial values for private copies (if any).
4296   llvm::Value *TaskPrivatesMap = nullptr;
4297   llvm::Type *TaskPrivatesMapTy =
4298       std::next(TaskFunction->arg_begin(), 3)->getType();
4299   if (!Privates.empty()) {
4300     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4301     TaskPrivatesMap =
4302         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4303     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4304         TaskPrivatesMap, TaskPrivatesMapTy);
4305   } else {
4306     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4307         cast<llvm::PointerType>(TaskPrivatesMapTy));
4308   }
4309   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4310   // kmp_task_t *tt);
4311   llvm::Function *TaskEntry = emitProxyTaskFunction(
4312       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4313       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4314       TaskPrivatesMap);
4315 
4316   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4317   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4318   // kmp_routine_entry_t *task_entry);
4319   // Task flags. Format is taken from
4320   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4321   // description of kmp_tasking_flags struct.
4322   enum {
4323     TiedFlag = 0x1,
4324     FinalFlag = 0x2,
4325     DestructorsFlag = 0x8,
4326     PriorityFlag = 0x20,
4327     DetachableFlag = 0x40,
4328   };
4329   unsigned Flags = Data.Tied ? TiedFlag : 0;
4330   bool NeedsCleanup = false;
4331   if (!Privates.empty()) {
4332     NeedsCleanup =
4333         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4334     if (NeedsCleanup)
4335       Flags = Flags | DestructorsFlag;
4336   }
4337   if (Data.Priority.getInt())
4338     Flags = Flags | PriorityFlag;
4339   if (D.hasClausesOfKind<OMPDetachClause>())
4340     Flags = Flags | DetachableFlag;
4341   llvm::Value *TaskFlags =
4342       Data.Final.getPointer()
4343           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4344                                      CGF.Builder.getInt32(FinalFlag),
4345                                      CGF.Builder.getInt32(/*C=*/0))
4346           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4347   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4348   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4349   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4350       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4351       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4352           TaskEntry, KmpRoutineEntryPtrTy)};
4353   llvm::Value *NewTask;
4354   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4355     // Check if we have any device clause associated with the directive.
4356     const Expr *Device = nullptr;
4357     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4358       Device = C->getDevice();
4359     // Emit device ID if any otherwise use default value.
4360     llvm::Value *DeviceID;
4361     if (Device)
4362       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4363                                            CGF.Int64Ty, /*isSigned=*/true);
4364     else
4365       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4366     AllocArgs.push_back(DeviceID);
4367     NewTask = CGF.EmitRuntimeCall(
4368         OMPBuilder.getOrCreateRuntimeFunction(
4369             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4370         AllocArgs);
4371   } else {
4372     NewTask =
4373         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4374                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4375                             AllocArgs);
4376   }
4377   // Emit detach clause initialization.
4378   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4379   // task_descriptor);
4380   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4381     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4382     LValue EvtLVal = CGF.EmitLValue(Evt);
4383 
4384     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4385     // int gtid, kmp_task_t *task);
4386     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4387     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4388     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4389     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4390         OMPBuilder.getOrCreateRuntimeFunction(
4391             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4392         {Loc, Tid, NewTask});
4393     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4394                                       Evt->getExprLoc());
4395     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4396   }
4397   // Process affinity clauses.
4398   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4399     // Process list of affinity data.
4400     ASTContext &C = CGM.getContext();
4401     Address AffinitiesArray = Address::invalid();
4402     // Calculate number of elements to form the array of affinity data.
4403     llvm::Value *NumOfElements = nullptr;
4404     unsigned NumAffinities = 0;
4405     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4406       if (const Expr *Modifier = C->getModifier()) {
4407         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4408         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4409           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4410           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4411           NumOfElements =
4412               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4413         }
4414       } else {
4415         NumAffinities += C->varlist_size();
4416       }
4417     }
4418     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4419     // Fields ids in kmp_task_affinity_info record.
4420     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4421 
4422     QualType KmpTaskAffinityInfoArrayTy;
4423     if (NumOfElements) {
4424       NumOfElements = CGF.Builder.CreateNUWAdd(
4425           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4426       auto *OVE = new (C) OpaqueValueExpr(
4427           Loc,
4428           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4429           VK_PRValue);
4430       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4431                                                     RValue::get(NumOfElements));
4432       KmpTaskAffinityInfoArrayTy =
4433           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4434                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4435       // Properly emit variable-sized array.
4436       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4437                                            ImplicitParamDecl::Other);
4438       CGF.EmitVarDecl(*PD);
4439       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4440       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4441                                                 /*isSigned=*/false);
4442     } else {
4443       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4444           KmpTaskAffinityInfoTy,
4445           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4446           ArrayType::Normal, /*IndexTypeQuals=*/0);
4447       AffinitiesArray =
4448           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4449       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4450       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4451                                              /*isSigned=*/false);
4452     }
4453 
4454     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4455     // Fill array by elements without iterators.
4456     unsigned Pos = 0;
4457     bool HasIterator = false;
4458     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4459       if (C->getModifier()) {
4460         HasIterator = true;
4461         continue;
4462       }
4463       for (const Expr *E : C->varlists()) {
4464         llvm::Value *Addr;
4465         llvm::Value *Size;
4466         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4467         LValue Base =
4468             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4469                                KmpTaskAffinityInfoTy);
4470         // affs[i].base_addr = &<Affinities[i].second>;
4471         LValue BaseAddrLVal = CGF.EmitLValueForField(
4472             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4473         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4474                               BaseAddrLVal);
4475         // affs[i].len = sizeof(<Affinities[i].second>);
4476         LValue LenLVal = CGF.EmitLValueForField(
4477             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4478         CGF.EmitStoreOfScalar(Size, LenLVal);
4479         ++Pos;
4480       }
4481     }
4482     LValue PosLVal;
4483     if (HasIterator) {
4484       PosLVal = CGF.MakeAddrLValue(
4485           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4486           C.getSizeType());
4487       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4488     }
4489     // Process elements with iterators.
4490     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4491       const Expr *Modifier = C->getModifier();
4492       if (!Modifier)
4493         continue;
4494       OMPIteratorGeneratorScope IteratorScope(
4495           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4496       for (const Expr *E : C->varlists()) {
4497         llvm::Value *Addr;
4498         llvm::Value *Size;
4499         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4500         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4501         LValue Base = CGF.MakeAddrLValue(
4502             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4503         // affs[i].base_addr = &<Affinities[i].second>;
4504         LValue BaseAddrLVal = CGF.EmitLValueForField(
4505             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4506         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4507                               BaseAddrLVal);
4508         // affs[i].len = sizeof(<Affinities[i].second>);
4509         LValue LenLVal = CGF.EmitLValueForField(
4510             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4511         CGF.EmitStoreOfScalar(Size, LenLVal);
4512         Idx = CGF.Builder.CreateNUWAdd(
4513             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4514         CGF.EmitStoreOfScalar(Idx, PosLVal);
4515       }
4516     }
4517     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4518     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4519     // naffins, kmp_task_affinity_info_t *affin_list);
4520     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4521     llvm::Value *GTid = getThreadID(CGF, Loc);
4522     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4523         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4524     // FIXME: Emit the function and ignore its result for now unless the
4525     // runtime function is properly implemented.
4526     (void)CGF.EmitRuntimeCall(
4527         OMPBuilder.getOrCreateRuntimeFunction(
4528             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4529         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4530   }
4531   llvm::Value *NewTaskNewTaskTTy =
4532       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4533           NewTask, KmpTaskTWithPrivatesPtrTy);
4534   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4535                                                KmpTaskTWithPrivatesQTy);
4536   LValue TDBase =
4537       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4538   // Fill the data in the resulting kmp_task_t record.
4539   // Copy shareds if there are any.
4540   Address KmpTaskSharedsPtr = Address::invalid();
4541   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4542     KmpTaskSharedsPtr = Address::deprecated(
4543         CGF.EmitLoadOfScalar(
4544             CGF.EmitLValueForField(
4545                 TDBase,
4546                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4547             Loc),
4548         CGM.getNaturalTypeAlignment(SharedsTy));
4549     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4550     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4551     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4552   }
4553   // Emit initial values for private copies (if any).
4554   TaskResultTy Result;
4555   if (!Privates.empty()) {
4556     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4557                      SharedsTy, SharedsPtrTy, Data, Privates,
4558                      /*ForDup=*/false);
4559     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4560         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4561       Result.TaskDupFn = emitTaskDupFunction(
4562           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4563           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4564           /*WithLastIter=*/!Data.LastprivateVars.empty());
4565     }
4566   }
4567   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4568   enum { Priority = 0, Destructors = 1 };
4569   // Provide pointer to function with destructors for privates.
4570   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4571   const RecordDecl *KmpCmplrdataUD =
4572       (*FI)->getType()->getAsUnionType()->getDecl();
4573   if (NeedsCleanup) {
4574     llvm::Value *DestructorFn = emitDestructorsFunction(
4575         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4576         KmpTaskTWithPrivatesQTy);
4577     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4578     LValue DestructorsLV = CGF.EmitLValueForField(
4579         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4580     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4581                               DestructorFn, KmpRoutineEntryPtrTy),
4582                           DestructorsLV);
4583   }
4584   // Set priority.
4585   if (Data.Priority.getInt()) {
4586     LValue Data2LV = CGF.EmitLValueForField(
4587         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4588     LValue PriorityLV = CGF.EmitLValueForField(
4589         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4590     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4591   }
4592   Result.NewTask = NewTask;
4593   Result.TaskEntry = TaskEntry;
4594   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4595   Result.TDBase = TDBase;
4596   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4597   return Result;
4598 }
4599 
4600 namespace {
4601 /// Dependence kind for RTL.
4602 enum RTLDependenceKindTy {
4603   DepIn = 0x01,
4604   DepInOut = 0x3,
4605   DepMutexInOutSet = 0x4,
4606   DepInOutSet = 0x8
4607 };
4608 /// Fields ids in kmp_depend_info record.
4609 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4610 } // namespace
4611 
4612 /// Translates internal dependency kind into the runtime kind.
4613 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4614   RTLDependenceKindTy DepKind;
4615   switch (K) {
4616   case OMPC_DEPEND_in:
4617     DepKind = DepIn;
4618     break;
4619   // Out and InOut dependencies must use the same code.
4620   case OMPC_DEPEND_out:
4621   case OMPC_DEPEND_inout:
4622     DepKind = DepInOut;
4623     break;
4624   case OMPC_DEPEND_mutexinoutset:
4625     DepKind = DepMutexInOutSet;
4626     break;
4627   case OMPC_DEPEND_inoutset:
4628     DepKind = DepInOutSet;
4629     break;
4630   case OMPC_DEPEND_source:
4631   case OMPC_DEPEND_sink:
4632   case OMPC_DEPEND_depobj:
4633   case OMPC_DEPEND_unknown:
4634     llvm_unreachable("Unknown task dependence type");
4635   }
4636   return DepKind;
4637 }
4638 
4639 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4640 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4641                            QualType &FlagsTy) {
4642   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4643   if (KmpDependInfoTy.isNull()) {
4644     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4645     KmpDependInfoRD->startDefinition();
4646     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4647     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4648     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4649     KmpDependInfoRD->completeDefinition();
4650     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4651   }
4652 }
4653 
4654 std::pair<llvm::Value *, LValue>
4655 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4656                                    SourceLocation Loc) {
4657   ASTContext &C = CGM.getContext();
4658   QualType FlagsTy;
4659   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4660   RecordDecl *KmpDependInfoRD =
4661       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4662   LValue Base = CGF.EmitLoadOfPointerLValue(
4663       DepobjLVal.getAddress(CGF),
4664       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4665   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4666   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4667           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4668   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4669                             Base.getTBAAInfo());
4670   Address DepObjAddr = CGF.Builder.CreateGEP(
4671       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4672   LValue NumDepsBase = CGF.MakeAddrLValue(
4673       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4674   // NumDeps = deps[i].base_addr;
4675   LValue BaseAddrLVal = CGF.EmitLValueForField(
4676       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4677   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4678   return std::make_pair(NumDeps, Base);
4679 }
4680 
4681 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4682                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4683                            const OMPTaskDataTy::DependData &Data,
4684                            Address DependenciesArray) {
4685   CodeGenModule &CGM = CGF.CGM;
4686   ASTContext &C = CGM.getContext();
4687   QualType FlagsTy;
4688   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4689   RecordDecl *KmpDependInfoRD =
4690       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4691   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4692 
4693   OMPIteratorGeneratorScope IteratorScope(
4694       CGF, cast_or_null<OMPIteratorExpr>(
4695                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4696                                  : nullptr));
4697   for (const Expr *E : Data.DepExprs) {
4698     llvm::Value *Addr;
4699     llvm::Value *Size;
4700     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4701     LValue Base;
4702     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4703       Base = CGF.MakeAddrLValue(
4704           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4705     } else {
4706       LValue &PosLVal = *Pos.get<LValue *>();
4707       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4708       Base = CGF.MakeAddrLValue(
4709           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4710     }
4711     // deps[i].base_addr = &<Dependencies[i].second>;
4712     LValue BaseAddrLVal = CGF.EmitLValueForField(
4713         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4714     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4715                           BaseAddrLVal);
4716     // deps[i].len = sizeof(<Dependencies[i].second>);
4717     LValue LenLVal = CGF.EmitLValueForField(
4718         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4719     CGF.EmitStoreOfScalar(Size, LenLVal);
4720     // deps[i].flags = <Dependencies[i].first>;
4721     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4722     LValue FlagsLVal = CGF.EmitLValueForField(
4723         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4724     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4725                           FlagsLVal);
4726     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4727       ++(*P);
4728     } else {
4729       LValue &PosLVal = *Pos.get<LValue *>();
4730       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4731       Idx = CGF.Builder.CreateNUWAdd(Idx,
4732                                      llvm::ConstantInt::get(Idx->getType(), 1));
4733       CGF.EmitStoreOfScalar(Idx, PosLVal);
4734     }
4735   }
4736 }
4737 
4738 static SmallVector<llvm::Value *, 4>
4739 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4740                         const OMPTaskDataTy::DependData &Data) {
4741   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4742          "Expected depobj dependecy kind.");
4743   SmallVector<llvm::Value *, 4> Sizes;
4744   SmallVector<LValue, 4> SizeLVals;
4745   ASTContext &C = CGF.getContext();
4746   QualType FlagsTy;
4747   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4748   RecordDecl *KmpDependInfoRD =
4749       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4750   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4751   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4752   {
4753     OMPIteratorGeneratorScope IteratorScope(
4754         CGF, cast_or_null<OMPIteratorExpr>(
4755                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4756                                    : nullptr));
4757     for (const Expr *E : Data.DepExprs) {
4758       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4759       LValue Base = CGF.EmitLoadOfPointerLValue(
4760           DepobjLVal.getAddress(CGF),
4761           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4762       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4763           Base.getAddress(CGF), KmpDependInfoPtrT);
4764       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4765                                 Base.getTBAAInfo());
4766       Address DepObjAddr = CGF.Builder.CreateGEP(
4767           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4768       LValue NumDepsBase = CGF.MakeAddrLValue(
4769           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4770       // NumDeps = deps[i].base_addr;
4771       LValue BaseAddrLVal = CGF.EmitLValueForField(
4772           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4773       llvm::Value *NumDeps =
4774           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4775       LValue NumLVal = CGF.MakeAddrLValue(
4776           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4777           C.getUIntPtrType());
4778       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4779                               NumLVal.getAddress(CGF));
4780       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4781       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4782       CGF.EmitStoreOfScalar(Add, NumLVal);
4783       SizeLVals.push_back(NumLVal);
4784     }
4785   }
4786   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4787     llvm::Value *Size =
4788         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4789     Sizes.push_back(Size);
4790   }
4791   return Sizes;
4792 }
4793 
4794 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4795                                LValue PosLVal,
4796                                const OMPTaskDataTy::DependData &Data,
4797                                Address DependenciesArray) {
4798   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4799          "Expected depobj dependecy kind.");
4800   ASTContext &C = CGF.getContext();
4801   QualType FlagsTy;
4802   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4803   RecordDecl *KmpDependInfoRD =
4804       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4805   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4806   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4807   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4808   {
4809     OMPIteratorGeneratorScope IteratorScope(
4810         CGF, cast_or_null<OMPIteratorExpr>(
4811                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4812                                    : nullptr));
4813     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4814       const Expr *E = Data.DepExprs[I];
4815       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4816       LValue Base = CGF.EmitLoadOfPointerLValue(
4817           DepobjLVal.getAddress(CGF),
4818           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4819       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4820           Base.getAddress(CGF), KmpDependInfoPtrT);
4821       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4822                                 Base.getTBAAInfo());
4823 
4824       // Get number of elements in a single depobj.
4825       Address DepObjAddr = CGF.Builder.CreateGEP(
4826           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4827       LValue NumDepsBase = CGF.MakeAddrLValue(
4828           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4829       // NumDeps = deps[i].base_addr;
4830       LValue BaseAddrLVal = CGF.EmitLValueForField(
4831           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4832       llvm::Value *NumDeps =
4833           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4834 
4835       // memcopy dependency data.
4836       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4837           ElSize,
4838           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4839       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4840       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4841       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4842 
4843       // Increase pos.
4844       // pos += size;
4845       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4846       CGF.EmitStoreOfScalar(Add, PosLVal);
4847     }
4848   }
4849 }
4850 
4851 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4852     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4853     SourceLocation Loc) {
4854   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4855         return D.DepExprs.empty();
4856       }))
4857     return std::make_pair(nullptr, Address::invalid());
4858   // Process list of dependencies.
4859   ASTContext &C = CGM.getContext();
4860   Address DependenciesArray = Address::invalid();
4861   llvm::Value *NumOfElements = nullptr;
4862   unsigned NumDependencies = std::accumulate(
4863       Dependencies.begin(), Dependencies.end(), 0,
4864       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4865         return D.DepKind == OMPC_DEPEND_depobj
4866                    ? V
4867                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4868       });
4869   QualType FlagsTy;
4870   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4871   bool HasDepobjDeps = false;
4872   bool HasRegularWithIterators = false;
4873   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4874   llvm::Value *NumOfRegularWithIterators =
4875       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4876   // Calculate number of depobj dependecies and regular deps with the iterators.
4877   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4878     if (D.DepKind == OMPC_DEPEND_depobj) {
4879       SmallVector<llvm::Value *, 4> Sizes =
4880           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4881       for (llvm::Value *Size : Sizes) {
4882         NumOfDepobjElements =
4883             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4884       }
4885       HasDepobjDeps = true;
4886       continue;
4887     }
4888     // Include number of iterations, if any.
4889 
4890     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4891       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4892         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4893         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4894         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4895             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4896         NumOfRegularWithIterators =
4897             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4898       }
4899       HasRegularWithIterators = true;
4900       continue;
4901     }
4902   }
4903 
4904   QualType KmpDependInfoArrayTy;
4905   if (HasDepobjDeps || HasRegularWithIterators) {
4906     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4907                                            /*isSigned=*/false);
4908     if (HasDepobjDeps) {
4909       NumOfElements =
4910           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4911     }
4912     if (HasRegularWithIterators) {
4913       NumOfElements =
4914           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4915     }
4916     auto *OVE = new (C) OpaqueValueExpr(
4917         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4918         VK_PRValue);
4919     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4920                                                   RValue::get(NumOfElements));
4921     KmpDependInfoArrayTy =
4922         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4923                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4924     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4925     // Properly emit variable-sized array.
4926     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4927                                          ImplicitParamDecl::Other);
4928     CGF.EmitVarDecl(*PD);
4929     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4930     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4931                                               /*isSigned=*/false);
4932   } else {
4933     KmpDependInfoArrayTy = C.getConstantArrayType(
4934         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4935         ArrayType::Normal, /*IndexTypeQuals=*/0);
4936     DependenciesArray =
4937         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4938     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4939     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4940                                            /*isSigned=*/false);
4941   }
4942   unsigned Pos = 0;
4943   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4944     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4945         Dependencies[I].IteratorExpr)
4946       continue;
4947     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4948                    DependenciesArray);
4949   }
4950   // Copy regular dependecies with iterators.
4951   LValue PosLVal = CGF.MakeAddrLValue(
4952       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4953   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4954   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4955     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4956         !Dependencies[I].IteratorExpr)
4957       continue;
4958     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4959                    DependenciesArray);
4960   }
4961   // Copy final depobj arrays without iterators.
4962   if (HasDepobjDeps) {
4963     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4964       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4965         continue;
4966       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4967                          DependenciesArray);
4968     }
4969   }
4970   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4971       DependenciesArray, CGF.VoidPtrTy);
4972   return std::make_pair(NumOfElements, DependenciesArray);
4973 }
4974 
4975 Address CGOpenMPRuntime::emitDepobjDependClause(
4976     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4977     SourceLocation Loc) {
4978   if (Dependencies.DepExprs.empty())
4979     return Address::invalid();
4980   // Process list of dependencies.
4981   ASTContext &C = CGM.getContext();
4982   Address DependenciesArray = Address::invalid();
4983   unsigned NumDependencies = Dependencies.DepExprs.size();
4984   QualType FlagsTy;
4985   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4986   RecordDecl *KmpDependInfoRD =
4987       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4988 
4989   llvm::Value *Size;
4990   // Define type kmp_depend_info[<Dependencies.size()>];
4991   // For depobj reserve one extra element to store the number of elements.
4992   // It is required to handle depobj(x) update(in) construct.
4993   // kmp_depend_info[<Dependencies.size()>] deps;
4994   llvm::Value *NumDepsVal;
4995   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4996   if (const auto *IE =
4997           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4998     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4999     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5000       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5001       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5002       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5003     }
5004     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5005                                     NumDepsVal);
5006     CharUnits SizeInBytes =
5007         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5008     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5009     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5010     NumDepsVal =
5011         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5012   } else {
5013     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5014         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5015         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5016     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5017     Size = CGM.getSize(Sz.alignTo(Align));
5018     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5019   }
5020   // Need to allocate on the dynamic memory.
5021   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5022   // Use default allocator.
5023   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5024   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5025 
5026   llvm::Value *Addr =
5027       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5028                               CGM.getModule(), OMPRTL___kmpc_alloc),
5029                           Args, ".dep.arr.addr");
5030   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5031       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5032   DependenciesArray = Address::deprecated(Addr, Align);
5033   // Write number of elements in the first element of array for depobj.
5034   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5035   // deps[i].base_addr = NumDependencies;
5036   LValue BaseAddrLVal = CGF.EmitLValueForField(
5037       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5038   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5039   llvm::PointerUnion<unsigned *, LValue *> Pos;
5040   unsigned Idx = 1;
5041   LValue PosLVal;
5042   if (Dependencies.IteratorExpr) {
5043     PosLVal = CGF.MakeAddrLValue(
5044         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5045         C.getSizeType());
5046     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5047                           /*IsInit=*/true);
5048     Pos = &PosLVal;
5049   } else {
5050     Pos = &Idx;
5051   }
5052   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5053   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5054       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5055   return DependenciesArray;
5056 }
5057 
5058 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5059                                         SourceLocation Loc) {
5060   ASTContext &C = CGM.getContext();
5061   QualType FlagsTy;
5062   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5063   LValue Base = CGF.EmitLoadOfPointerLValue(
5064       DepobjLVal.getAddress(CGF),
5065       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5066   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5067   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5068       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5069   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5070       Addr.getElementType(), Addr.getPointer(),
5071       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5072   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5073                                                                CGF.VoidPtrTy);
5074   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5075   // Use default allocator.
5076   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5077   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5078 
5079   // _kmpc_free(gtid, addr, nullptr);
5080   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5081                                 CGM.getModule(), OMPRTL___kmpc_free),
5082                             Args);
5083 }
5084 
5085 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5086                                        OpenMPDependClauseKind NewDepKind,
5087                                        SourceLocation Loc) {
5088   ASTContext &C = CGM.getContext();
5089   QualType FlagsTy;
5090   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5091   RecordDecl *KmpDependInfoRD =
5092       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5093   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5094   llvm::Value *NumDeps;
5095   LValue Base;
5096   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5097 
5098   Address Begin = Base.getAddress(CGF);
5099   // Cast from pointer to array type to pointer to single element.
5100   llvm::Value *End = CGF.Builder.CreateGEP(
5101       Begin.getElementType(), Begin.getPointer(), NumDeps);
5102   // The basic structure here is a while-do loop.
5103   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5104   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5105   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5106   CGF.EmitBlock(BodyBB);
5107   llvm::PHINode *ElementPHI =
5108       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5109   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5110   Begin = Begin.withPointer(ElementPHI);
5111   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5112                             Base.getTBAAInfo());
5113   // deps[i].flags = NewDepKind;
5114   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5115   LValue FlagsLVal = CGF.EmitLValueForField(
5116       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5117   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5118                         FlagsLVal);
5119 
5120   // Shift the address forward by one element.
5121   Address ElementNext =
5122       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5123   ElementPHI->addIncoming(ElementNext.getPointer(),
5124                           CGF.Builder.GetInsertBlock());
5125   llvm::Value *IsEmpty =
5126       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5127   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5128   // Done.
5129   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5130 }
5131 
5132 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5133                                    const OMPExecutableDirective &D,
5134                                    llvm::Function *TaskFunction,
5135                                    QualType SharedsTy, Address Shareds,
5136                                    const Expr *IfCond,
5137                                    const OMPTaskDataTy &Data) {
5138   if (!CGF.HaveInsertPoint())
5139     return;
5140 
5141   TaskResultTy Result =
5142       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5143   llvm::Value *NewTask = Result.NewTask;
5144   llvm::Function *TaskEntry = Result.TaskEntry;
5145   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5146   LValue TDBase = Result.TDBase;
5147   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5148   // Process list of dependences.
5149   Address DependenciesArray = Address::invalid();
5150   llvm::Value *NumOfElements;
5151   std::tie(NumOfElements, DependenciesArray) =
5152       emitDependClause(CGF, Data.Dependences, Loc);
5153 
5154   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5155   // libcall.
5156   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5157   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5158   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5159   // list is not empty
5160   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5161   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5162   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5163   llvm::Value *DepTaskArgs[7];
5164   if (!Data.Dependences.empty()) {
5165     DepTaskArgs[0] = UpLoc;
5166     DepTaskArgs[1] = ThreadID;
5167     DepTaskArgs[2] = NewTask;
5168     DepTaskArgs[3] = NumOfElements;
5169     DepTaskArgs[4] = DependenciesArray.getPointer();
5170     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5171     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5172   }
5173   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5174                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5175     if (!Data.Tied) {
5176       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5177       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5178       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5179     }
5180     if (!Data.Dependences.empty()) {
5181       CGF.EmitRuntimeCall(
5182           OMPBuilder.getOrCreateRuntimeFunction(
5183               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5184           DepTaskArgs);
5185     } else {
5186       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5187                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5188                           TaskArgs);
5189     }
5190     // Check if parent region is untied and build return for untied task;
5191     if (auto *Region =
5192             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5193       Region->emitUntiedSwitch(CGF);
5194   };
5195 
5196   llvm::Value *DepWaitTaskArgs[6];
5197   if (!Data.Dependences.empty()) {
5198     DepWaitTaskArgs[0] = UpLoc;
5199     DepWaitTaskArgs[1] = ThreadID;
5200     DepWaitTaskArgs[2] = NumOfElements;
5201     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5202     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5203     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5204   }
5205   auto &M = CGM.getModule();
5206   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5207                         TaskEntry, &Data, &DepWaitTaskArgs,
5208                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5209     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5210     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5211     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5212     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5213     // is specified.
5214     if (!Data.Dependences.empty())
5215       CGF.EmitRuntimeCall(
5216           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5217           DepWaitTaskArgs);
5218     // Call proxy_task_entry(gtid, new_task);
5219     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5220                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5221       Action.Enter(CGF);
5222       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5223       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5224                                                           OutlinedFnArgs);
5225     };
5226 
5227     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5228     // kmp_task_t *new_task);
5229     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5230     // kmp_task_t *new_task);
5231     RegionCodeGenTy RCG(CodeGen);
5232     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5233                               M, OMPRTL___kmpc_omp_task_begin_if0),
5234                           TaskArgs,
5235                           OMPBuilder.getOrCreateRuntimeFunction(
5236                               M, OMPRTL___kmpc_omp_task_complete_if0),
5237                           TaskArgs);
5238     RCG.setAction(Action);
5239     RCG(CGF);
5240   };
5241 
5242   if (IfCond) {
5243     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5244   } else {
5245     RegionCodeGenTy ThenRCG(ThenCodeGen);
5246     ThenRCG(CGF);
5247   }
5248 }
5249 
5250 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5251                                        const OMPLoopDirective &D,
5252                                        llvm::Function *TaskFunction,
5253                                        QualType SharedsTy, Address Shareds,
5254                                        const Expr *IfCond,
5255                                        const OMPTaskDataTy &Data) {
5256   if (!CGF.HaveInsertPoint())
5257     return;
5258   TaskResultTy Result =
5259       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5260   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5261   // libcall.
5262   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5263   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5264   // sched, kmp_uint64 grainsize, void *task_dup);
5265   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5266   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5267   llvm::Value *IfVal;
5268   if (IfCond) {
5269     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5270                                       /*isSigned=*/true);
5271   } else {
5272     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5273   }
5274 
5275   LValue LBLVal = CGF.EmitLValueForField(
5276       Result.TDBase,
5277       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5278   const auto *LBVar =
5279       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5280   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5281                        LBLVal.getQuals(),
5282                        /*IsInitializer=*/true);
5283   LValue UBLVal = CGF.EmitLValueForField(
5284       Result.TDBase,
5285       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5286   const auto *UBVar =
5287       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5288   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5289                        UBLVal.getQuals(),
5290                        /*IsInitializer=*/true);
5291   LValue StLVal = CGF.EmitLValueForField(
5292       Result.TDBase,
5293       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5294   const auto *StVar =
5295       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5296   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5297                        StLVal.getQuals(),
5298                        /*IsInitializer=*/true);
5299   // Store reductions address.
5300   LValue RedLVal = CGF.EmitLValueForField(
5301       Result.TDBase,
5302       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5303   if (Data.Reductions) {
5304     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5305   } else {
5306     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5307                                CGF.getContext().VoidPtrTy);
5308   }
5309   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5310   llvm::Value *TaskArgs[] = {
5311       UpLoc,
5312       ThreadID,
5313       Result.NewTask,
5314       IfVal,
5315       LBLVal.getPointer(CGF),
5316       UBLVal.getPointer(CGF),
5317       CGF.EmitLoadOfScalar(StLVal, Loc),
5318       llvm::ConstantInt::getSigned(
5319           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5320       llvm::ConstantInt::getSigned(
5321           CGF.IntTy, Data.Schedule.getPointer()
5322                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5323                          : NoSchedule),
5324       Data.Schedule.getPointer()
5325           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5326                                       /*isSigned=*/false)
5327           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5328       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5329                              Result.TaskDupFn, CGF.VoidPtrTy)
5330                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5331   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5332                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5333                       TaskArgs);
5334 }
5335 
5336 /// Emit reduction operation for each element of array (required for
5337 /// array sections) LHS op = RHS.
5338 /// \param Type Type of array.
5339 /// \param LHSVar Variable on the left side of the reduction operation
5340 /// (references element of array in original variable).
5341 /// \param RHSVar Variable on the right side of the reduction operation
5342 /// (references element of array in original variable).
5343 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5344 /// RHSVar.
5345 static void EmitOMPAggregateReduction(
5346     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5347     const VarDecl *RHSVar,
5348     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5349                                   const Expr *, const Expr *)> &RedOpGen,
5350     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5351     const Expr *UpExpr = nullptr) {
5352   // Perform element-by-element initialization.
5353   QualType ElementTy;
5354   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5355   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5356 
5357   // Drill down to the base element type on both arrays.
5358   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5359   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5360 
5361   llvm::Value *RHSBegin = RHSAddr.getPointer();
5362   llvm::Value *LHSBegin = LHSAddr.getPointer();
5363   // Cast from pointer to array type to pointer to single element.
5364   llvm::Value *LHSEnd =
5365       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5366   // The basic structure here is a while-do loop.
5367   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5368   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5369   llvm::Value *IsEmpty =
5370       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5371   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5372 
5373   // Enter the loop body, making that address the current address.
5374   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5375   CGF.EmitBlock(BodyBB);
5376 
5377   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5378 
5379   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5380       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5381   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5382   Address RHSElementCurrent = Address::deprecated(
5383       RHSElementPHI,
5384       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5385 
5386   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5387       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5388   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5389   Address LHSElementCurrent = Address::deprecated(
5390       LHSElementPHI,
5391       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5392 
5393   // Emit copy.
5394   CodeGenFunction::OMPPrivateScope Scope(CGF);
5395   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5396   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5397   Scope.Privatize();
5398   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5399   Scope.ForceCleanup();
5400 
5401   // Shift the address forward by one element.
5402   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5403       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5404       "omp.arraycpy.dest.element");
5405   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5406       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5407       "omp.arraycpy.src.element");
5408   // Check whether we've reached the end.
5409   llvm::Value *Done =
5410       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5411   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5412   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5413   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5414 
5415   // Done.
5416   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5417 }
5418 
5419 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5420 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5421 /// UDR combiner function.
5422 static void emitReductionCombiner(CodeGenFunction &CGF,
5423                                   const Expr *ReductionOp) {
5424   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5425     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5426       if (const auto *DRE =
5427               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5428         if (const auto *DRD =
5429                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5430           std::pair<llvm::Function *, llvm::Function *> Reduction =
5431               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5432           RValue Func = RValue::get(Reduction.first);
5433           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5434           CGF.EmitIgnoredExpr(ReductionOp);
5435           return;
5436         }
5437   CGF.EmitIgnoredExpr(ReductionOp);
5438 }
5439 
5440 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5441     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5442     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5443     ArrayRef<const Expr *> ReductionOps) {
5444   ASTContext &C = CGM.getContext();
5445 
5446   // void reduction_func(void *LHSArg, void *RHSArg);
5447   FunctionArgList Args;
5448   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5449                            ImplicitParamDecl::Other);
5450   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5451                            ImplicitParamDecl::Other);
5452   Args.push_back(&LHSArg);
5453   Args.push_back(&RHSArg);
5454   const auto &CGFI =
5455       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5456   std::string Name = getName({"omp", "reduction", "reduction_func"});
5457   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5458                                     llvm::GlobalValue::InternalLinkage, Name,
5459                                     &CGM.getModule());
5460   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5461   Fn->setDoesNotRecurse();
5462   CodeGenFunction CGF(CGM);
5463   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5464 
5465   // Dst = (void*[n])(LHSArg);
5466   // Src = (void*[n])(RHSArg);
5467   Address LHS = Address::deprecated(
5468       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5469           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
5470       CGF.getPointerAlign());
5471   Address RHS = Address::deprecated(
5472       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5473           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
5474       CGF.getPointerAlign());
5475 
5476   //  ...
5477   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5478   //  ...
5479   CodeGenFunction::OMPPrivateScope Scope(CGF);
5480   const auto *IPriv = Privates.begin();
5481   unsigned Idx = 0;
5482   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5483     const auto *RHSVar =
5484         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5485     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5486       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5487     });
5488     const auto *LHSVar =
5489         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5490     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5491       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5492     });
5493     QualType PrivTy = (*IPriv)->getType();
5494     if (PrivTy->isVariablyModifiedType()) {
5495       // Get array size and emit VLA type.
5496       ++Idx;
5497       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5498       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5499       const VariableArrayType *VLA =
5500           CGF.getContext().getAsVariableArrayType(PrivTy);
5501       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5502       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5503           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5504       CGF.EmitVariablyModifiedType(PrivTy);
5505     }
5506   }
5507   Scope.Privatize();
5508   IPriv = Privates.begin();
5509   const auto *ILHS = LHSExprs.begin();
5510   const auto *IRHS = RHSExprs.begin();
5511   for (const Expr *E : ReductionOps) {
5512     if ((*IPriv)->getType()->isArrayType()) {
5513       // Emit reduction for array section.
5514       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5515       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5516       EmitOMPAggregateReduction(
5517           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5518           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5519             emitReductionCombiner(CGF, E);
5520           });
5521     } else {
5522       // Emit reduction for array subscript or single variable.
5523       emitReductionCombiner(CGF, E);
5524     }
5525     ++IPriv;
5526     ++ILHS;
5527     ++IRHS;
5528   }
5529   Scope.ForceCleanup();
5530   CGF.FinishFunction();
5531   return Fn;
5532 }
5533 
5534 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5535                                                   const Expr *ReductionOp,
5536                                                   const Expr *PrivateRef,
5537                                                   const DeclRefExpr *LHS,
5538                                                   const DeclRefExpr *RHS) {
5539   if (PrivateRef->getType()->isArrayType()) {
5540     // Emit reduction for array section.
5541     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5542     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5543     EmitOMPAggregateReduction(
5544         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5545         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5546           emitReductionCombiner(CGF, ReductionOp);
5547         });
5548   } else {
5549     // Emit reduction for array subscript or single variable.
5550     emitReductionCombiner(CGF, ReductionOp);
5551   }
5552 }
5553 
5554 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5555                                     ArrayRef<const Expr *> Privates,
5556                                     ArrayRef<const Expr *> LHSExprs,
5557                                     ArrayRef<const Expr *> RHSExprs,
5558                                     ArrayRef<const Expr *> ReductionOps,
5559                                     ReductionOptionsTy Options) {
5560   if (!CGF.HaveInsertPoint())
5561     return;
5562 
5563   bool WithNowait = Options.WithNowait;
5564   bool SimpleReduction = Options.SimpleReduction;
5565 
5566   // Next code should be emitted for reduction:
5567   //
5568   // static kmp_critical_name lock = { 0 };
5569   //
5570   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5571   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5572   //  ...
5573   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5574   //  *(Type<n>-1*)rhs[<n>-1]);
5575   // }
5576   //
5577   // ...
5578   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5579   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5580   // RedList, reduce_func, &<lock>)) {
5581   // case 1:
5582   //  ...
5583   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5584   //  ...
5585   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5586   // break;
5587   // case 2:
5588   //  ...
5589   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5590   //  ...
5591   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5592   // break;
5593   // default:;
5594   // }
5595   //
5596   // if SimpleReduction is true, only the next code is generated:
5597   //  ...
5598   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5599   //  ...
5600 
5601   ASTContext &C = CGM.getContext();
5602 
5603   if (SimpleReduction) {
5604     CodeGenFunction::RunCleanupsScope Scope(CGF);
5605     const auto *IPriv = Privates.begin();
5606     const auto *ILHS = LHSExprs.begin();
5607     const auto *IRHS = RHSExprs.begin();
5608     for (const Expr *E : ReductionOps) {
5609       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5610                                   cast<DeclRefExpr>(*IRHS));
5611       ++IPriv;
5612       ++ILHS;
5613       ++IRHS;
5614     }
5615     return;
5616   }
5617 
5618   // 1. Build a list of reduction variables.
5619   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5620   auto Size = RHSExprs.size();
5621   for (const Expr *E : Privates) {
5622     if (E->getType()->isVariablyModifiedType())
5623       // Reserve place for array size.
5624       ++Size;
5625   }
5626   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5627   QualType ReductionArrayTy =
5628       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5629                              /*IndexTypeQuals=*/0);
5630   Address ReductionList =
5631       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5632   const auto *IPriv = Privates.begin();
5633   unsigned Idx = 0;
5634   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5635     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5636     CGF.Builder.CreateStore(
5637         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5638             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5639         Elem);
5640     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5641       // Store array size.
5642       ++Idx;
5643       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5644       llvm::Value *Size = CGF.Builder.CreateIntCast(
5645           CGF.getVLASize(
5646                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5647               .NumElts,
5648           CGF.SizeTy, /*isSigned=*/false);
5649       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5650                               Elem);
5651     }
5652   }
5653 
5654   // 2. Emit reduce_func().
5655   llvm::Function *ReductionFn = emitReductionFunction(
5656       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5657       LHSExprs, RHSExprs, ReductionOps);
5658 
5659   // 3. Create static kmp_critical_name lock = { 0 };
5660   std::string Name = getName({"reduction"});
5661   llvm::Value *Lock = getCriticalRegionLock(Name);
5662 
5663   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5664   // RedList, reduce_func, &<lock>);
5665   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5666   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5667   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5668   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5669       ReductionList.getPointer(), CGF.VoidPtrTy);
5670   llvm::Value *Args[] = {
5671       IdentTLoc,                             // ident_t *<loc>
5672       ThreadId,                              // i32 <gtid>
5673       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5674       ReductionArrayTySize,                  // size_type sizeof(RedList)
5675       RL,                                    // void *RedList
5676       ReductionFn, // void (*) (void *, void *) <reduce_func>
5677       Lock         // kmp_critical_name *&<lock>
5678   };
5679   llvm::Value *Res = CGF.EmitRuntimeCall(
5680       OMPBuilder.getOrCreateRuntimeFunction(
5681           CGM.getModule(),
5682           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5683       Args);
5684 
5685   // 5. Build switch(res)
5686   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5687   llvm::SwitchInst *SwInst =
5688       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5689 
5690   // 6. Build case 1:
5691   //  ...
5692   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5693   //  ...
5694   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5695   // break;
5696   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5697   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5698   CGF.EmitBlock(Case1BB);
5699 
5700   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5701   llvm::Value *EndArgs[] = {
5702       IdentTLoc, // ident_t *<loc>
5703       ThreadId,  // i32 <gtid>
5704       Lock       // kmp_critical_name *&<lock>
5705   };
5706   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5707                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5708     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5709     const auto *IPriv = Privates.begin();
5710     const auto *ILHS = LHSExprs.begin();
5711     const auto *IRHS = RHSExprs.begin();
5712     for (const Expr *E : ReductionOps) {
5713       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5714                                      cast<DeclRefExpr>(*IRHS));
5715       ++IPriv;
5716       ++ILHS;
5717       ++IRHS;
5718     }
5719   };
5720   RegionCodeGenTy RCG(CodeGen);
5721   CommonActionTy Action(
5722       nullptr, llvm::None,
5723       OMPBuilder.getOrCreateRuntimeFunction(
5724           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5725                                       : OMPRTL___kmpc_end_reduce),
5726       EndArgs);
5727   RCG.setAction(Action);
5728   RCG(CGF);
5729 
5730   CGF.EmitBranch(DefaultBB);
5731 
5732   // 7. Build case 2:
5733   //  ...
5734   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5735   //  ...
5736   // break;
5737   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5738   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5739   CGF.EmitBlock(Case2BB);
5740 
5741   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5742                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5743     const auto *ILHS = LHSExprs.begin();
5744     const auto *IRHS = RHSExprs.begin();
5745     const auto *IPriv = Privates.begin();
5746     for (const Expr *E : ReductionOps) {
5747       const Expr *XExpr = nullptr;
5748       const Expr *EExpr = nullptr;
5749       const Expr *UpExpr = nullptr;
5750       BinaryOperatorKind BO = BO_Comma;
5751       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5752         if (BO->getOpcode() == BO_Assign) {
5753           XExpr = BO->getLHS();
5754           UpExpr = BO->getRHS();
5755         }
5756       }
5757       // Try to emit update expression as a simple atomic.
5758       const Expr *RHSExpr = UpExpr;
5759       if (RHSExpr) {
5760         // Analyze RHS part of the whole expression.
5761         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5762                 RHSExpr->IgnoreParenImpCasts())) {
5763           // If this is a conditional operator, analyze its condition for
5764           // min/max reduction operator.
5765           RHSExpr = ACO->getCond();
5766         }
5767         if (const auto *BORHS =
5768                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5769           EExpr = BORHS->getRHS();
5770           BO = BORHS->getOpcode();
5771         }
5772       }
5773       if (XExpr) {
5774         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5775         auto &&AtomicRedGen = [BO, VD,
5776                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5777                                     const Expr *EExpr, const Expr *UpExpr) {
5778           LValue X = CGF.EmitLValue(XExpr);
5779           RValue E;
5780           if (EExpr)
5781             E = CGF.EmitAnyExpr(EExpr);
5782           CGF.EmitOMPAtomicSimpleUpdateExpr(
5783               X, E, BO, /*IsXLHSInRHSPart=*/true,
5784               llvm::AtomicOrdering::Monotonic, Loc,
5785               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5786                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5787                 PrivateScope.addPrivate(
5788                     VD, [&CGF, VD, XRValue, Loc]() {
5789                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5790                       CGF.emitOMPSimpleStore(
5791                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5792                           VD->getType().getNonReferenceType(), Loc);
5793                       return LHSTemp;
5794                     });
5795                 (void)PrivateScope.Privatize();
5796                 return CGF.EmitAnyExpr(UpExpr);
5797               });
5798         };
5799         if ((*IPriv)->getType()->isArrayType()) {
5800           // Emit atomic reduction for array section.
5801           const auto *RHSVar =
5802               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5803           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5804                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5805         } else {
5806           // Emit atomic reduction for array subscript or single variable.
5807           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5808         }
5809       } else {
5810         // Emit as a critical region.
5811         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5812                                            const Expr *, const Expr *) {
5813           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5814           std::string Name = RT.getName({"atomic_reduction"});
5815           RT.emitCriticalRegion(
5816               CGF, Name,
5817               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5818                 Action.Enter(CGF);
5819                 emitReductionCombiner(CGF, E);
5820               },
5821               Loc);
5822         };
5823         if ((*IPriv)->getType()->isArrayType()) {
5824           const auto *LHSVar =
5825               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5826           const auto *RHSVar =
5827               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5828           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5829                                     CritRedGen);
5830         } else {
5831           CritRedGen(CGF, nullptr, nullptr, nullptr);
5832         }
5833       }
5834       ++ILHS;
5835       ++IRHS;
5836       ++IPriv;
5837     }
5838   };
5839   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5840   if (!WithNowait) {
5841     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5842     llvm::Value *EndArgs[] = {
5843         IdentTLoc, // ident_t *<loc>
5844         ThreadId,  // i32 <gtid>
5845         Lock       // kmp_critical_name *&<lock>
5846     };
5847     CommonActionTy Action(nullptr, llvm::None,
5848                           OMPBuilder.getOrCreateRuntimeFunction(
5849                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5850                           EndArgs);
5851     AtomicRCG.setAction(Action);
5852     AtomicRCG(CGF);
5853   } else {
5854     AtomicRCG(CGF);
5855   }
5856 
5857   CGF.EmitBranch(DefaultBB);
5858   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5859 }
5860 
5861 /// Generates unique name for artificial threadprivate variables.
5862 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5863 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5864                                       const Expr *Ref) {
5865   SmallString<256> Buffer;
5866   llvm::raw_svector_ostream Out(Buffer);
5867   const clang::DeclRefExpr *DE;
5868   const VarDecl *D = ::getBaseDecl(Ref, DE);
5869   if (!D)
5870     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5871   D = D->getCanonicalDecl();
5872   std::string Name = CGM.getOpenMPRuntime().getName(
5873       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5874   Out << Prefix << Name << "_"
5875       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5876   return std::string(Out.str());
5877 }
5878 
5879 /// Emits reduction initializer function:
5880 /// \code
5881 /// void @.red_init(void* %arg, void* %orig) {
5882 /// %0 = bitcast void* %arg to <type>*
5883 /// store <type> <init>, <type>* %0
5884 /// ret void
5885 /// }
5886 /// \endcode
5887 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5888                                            SourceLocation Loc,
5889                                            ReductionCodeGen &RCG, unsigned N) {
5890   ASTContext &C = CGM.getContext();
5891   QualType VoidPtrTy = C.VoidPtrTy;
5892   VoidPtrTy.addRestrict();
5893   FunctionArgList Args;
5894   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5895                           ImplicitParamDecl::Other);
5896   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5897                               ImplicitParamDecl::Other);
5898   Args.emplace_back(&Param);
5899   Args.emplace_back(&ParamOrig);
5900   const auto &FnInfo =
5901       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5902   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5903   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5904   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5905                                     Name, &CGM.getModule());
5906   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5907   Fn->setDoesNotRecurse();
5908   CodeGenFunction CGF(CGM);
5909   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5910   Address PrivateAddr = CGF.EmitLoadOfPointer(
5911       CGF.GetAddrOfLocalVar(&Param),
5912       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5913   llvm::Value *Size = nullptr;
5914   // If the size of the reduction item is non-constant, load it from global
5915   // threadprivate variable.
5916   if (RCG.getSizes(N).second) {
5917     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5918         CGF, CGM.getContext().getSizeType(),
5919         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5920     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5921                                 CGM.getContext().getSizeType(), Loc);
5922   }
5923   RCG.emitAggregateType(CGF, N, Size);
5924   Address OrigAddr = Address::invalid();
5925   // If initializer uses initializer from declare reduction construct, emit a
5926   // pointer to the address of the original reduction item (reuired by reduction
5927   // initializer)
5928   if (RCG.usesReductionInitializer(N)) {
5929     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5930     OrigAddr = CGF.EmitLoadOfPointer(
5931         SharedAddr,
5932         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5933   }
5934   // Emit the initializer:
5935   // %0 = bitcast void* %arg to <type>*
5936   // store <type> <init>, <type>* %0
5937   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5938                          [](CodeGenFunction &) { return false; });
5939   CGF.FinishFunction();
5940   return Fn;
5941 }
5942 
5943 /// Emits reduction combiner function:
5944 /// \code
5945 /// void @.red_comb(void* %arg0, void* %arg1) {
5946 /// %lhs = bitcast void* %arg0 to <type>*
5947 /// %rhs = bitcast void* %arg1 to <type>*
5948 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5949 /// store <type> %2, <type>* %lhs
5950 /// ret void
5951 /// }
5952 /// \endcode
5953 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5954                                            SourceLocation Loc,
5955                                            ReductionCodeGen &RCG, unsigned N,
5956                                            const Expr *ReductionOp,
5957                                            const Expr *LHS, const Expr *RHS,
5958                                            const Expr *PrivateRef) {
5959   ASTContext &C = CGM.getContext();
5960   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5961   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5962   FunctionArgList Args;
5963   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5964                                C.VoidPtrTy, ImplicitParamDecl::Other);
5965   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5966                             ImplicitParamDecl::Other);
5967   Args.emplace_back(&ParamInOut);
5968   Args.emplace_back(&ParamIn);
5969   const auto &FnInfo =
5970       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5971   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5972   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5973   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5974                                     Name, &CGM.getModule());
5975   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5976   Fn->setDoesNotRecurse();
5977   CodeGenFunction CGF(CGM);
5978   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5979   llvm::Value *Size = nullptr;
5980   // If the size of the reduction item is non-constant, load it from global
5981   // threadprivate variable.
5982   if (RCG.getSizes(N).second) {
5983     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5984         CGF, CGM.getContext().getSizeType(),
5985         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5986     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5987                                 CGM.getContext().getSizeType(), Loc);
5988   }
5989   RCG.emitAggregateType(CGF, N, Size);
5990   // Remap lhs and rhs variables to the addresses of the function arguments.
5991   // %lhs = bitcast void* %arg0 to <type>*
5992   // %rhs = bitcast void* %arg1 to <type>*
5993   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5994   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5995     // Pull out the pointer to the variable.
5996     Address PtrAddr = CGF.EmitLoadOfPointer(
5997         CGF.GetAddrOfLocalVar(&ParamInOut),
5998         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5999     return CGF.Builder.CreateElementBitCast(
6000         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6001   });
6002   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6003     // Pull out the pointer to the variable.
6004     Address PtrAddr = CGF.EmitLoadOfPointer(
6005         CGF.GetAddrOfLocalVar(&ParamIn),
6006         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6007     return CGF.Builder.CreateElementBitCast(
6008         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6009   });
6010   PrivateScope.Privatize();
6011   // Emit the combiner body:
6012   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6013   // store <type> %2, <type>* %lhs
6014   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6015       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6016       cast<DeclRefExpr>(RHS));
6017   CGF.FinishFunction();
6018   return Fn;
6019 }
6020 
6021 /// Emits reduction finalizer function:
6022 /// \code
6023 /// void @.red_fini(void* %arg) {
6024 /// %0 = bitcast void* %arg to <type>*
6025 /// <destroy>(<type>* %0)
6026 /// ret void
6027 /// }
6028 /// \endcode
6029 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6030                                            SourceLocation Loc,
6031                                            ReductionCodeGen &RCG, unsigned N) {
6032   if (!RCG.needCleanups(N))
6033     return nullptr;
6034   ASTContext &C = CGM.getContext();
6035   FunctionArgList Args;
6036   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6037                           ImplicitParamDecl::Other);
6038   Args.emplace_back(&Param);
6039   const auto &FnInfo =
6040       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6041   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6042   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6043   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6044                                     Name, &CGM.getModule());
6045   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6046   Fn->setDoesNotRecurse();
6047   CodeGenFunction CGF(CGM);
6048   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6049   Address PrivateAddr = CGF.EmitLoadOfPointer(
6050       CGF.GetAddrOfLocalVar(&Param),
6051       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6052   llvm::Value *Size = nullptr;
6053   // If the size of the reduction item is non-constant, load it from global
6054   // threadprivate variable.
6055   if (RCG.getSizes(N).second) {
6056     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6057         CGF, CGM.getContext().getSizeType(),
6058         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6059     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6060                                 CGM.getContext().getSizeType(), Loc);
6061   }
6062   RCG.emitAggregateType(CGF, N, Size);
6063   // Emit the finalizer body:
6064   // <destroy>(<type>* %0)
6065   RCG.emitCleanups(CGF, N, PrivateAddr);
6066   CGF.FinishFunction(Loc);
6067   return Fn;
6068 }
6069 
6070 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6071     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6072     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6073   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6074     return nullptr;
6075 
6076   // Build typedef struct:
6077   // kmp_taskred_input {
6078   //   void *reduce_shar; // shared reduction item
6079   //   void *reduce_orig; // original reduction item used for initialization
6080   //   size_t reduce_size; // size of data item
6081   //   void *reduce_init; // data initialization routine
6082   //   void *reduce_fini; // data finalization routine
6083   //   void *reduce_comb; // data combiner routine
6084   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6085   // } kmp_taskred_input_t;
6086   ASTContext &C = CGM.getContext();
6087   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6088   RD->startDefinition();
6089   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6090   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6091   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6092   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6093   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6094   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6095   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6096       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6097   RD->completeDefinition();
6098   QualType RDType = C.getRecordType(RD);
6099   unsigned Size = Data.ReductionVars.size();
6100   llvm::APInt ArraySize(/*numBits=*/64, Size);
6101   QualType ArrayRDType = C.getConstantArrayType(
6102       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6103   // kmp_task_red_input_t .rd_input.[Size];
6104   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6105   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6106                        Data.ReductionCopies, Data.ReductionOps);
6107   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6108     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6109     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6110                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6111     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6112         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6113         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6114         ".rd_input.gep.");
6115     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6116     // ElemLVal.reduce_shar = &Shareds[Cnt];
6117     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6118     RCG.emitSharedOrigLValue(CGF, Cnt);
6119     llvm::Value *CastedShared =
6120         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6121     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6122     // ElemLVal.reduce_orig = &Origs[Cnt];
6123     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6124     llvm::Value *CastedOrig =
6125         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6126     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6127     RCG.emitAggregateType(CGF, Cnt);
6128     llvm::Value *SizeValInChars;
6129     llvm::Value *SizeVal;
6130     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6131     // We use delayed creation/initialization for VLAs and array sections. It is
6132     // required because runtime does not provide the way to pass the sizes of
6133     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6134     // threadprivate global variables are used to store these values and use
6135     // them in the functions.
6136     bool DelayedCreation = !!SizeVal;
6137     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6138                                                /*isSigned=*/false);
6139     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6140     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6141     // ElemLVal.reduce_init = init;
6142     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6143     llvm::Value *InitAddr =
6144         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6145     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6146     // ElemLVal.reduce_fini = fini;
6147     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6148     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6149     llvm::Value *FiniAddr = Fini
6150                                 ? CGF.EmitCastToVoidPtr(Fini)
6151                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6152     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6153     // ElemLVal.reduce_comb = comb;
6154     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6155     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6156         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6157         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6158     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6159     // ElemLVal.flags = 0;
6160     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6161     if (DelayedCreation) {
6162       CGF.EmitStoreOfScalar(
6163           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6164           FlagsLVal);
6165     } else
6166       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6167                                  FlagsLVal.getType());
6168   }
6169   if (Data.IsReductionWithTaskMod) {
6170     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6171     // is_ws, int num, void *data);
6172     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6173     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6174                                                   CGM.IntTy, /*isSigned=*/true);
6175     llvm::Value *Args[] = {
6176         IdentTLoc, GTid,
6177         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6178                                /*isSigned=*/true),
6179         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6180         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6181             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6182     return CGF.EmitRuntimeCall(
6183         OMPBuilder.getOrCreateRuntimeFunction(
6184             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6185         Args);
6186   }
6187   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6188   llvm::Value *Args[] = {
6189       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6190                                 /*isSigned=*/true),
6191       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6192       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6193                                                       CGM.VoidPtrTy)};
6194   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6195                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6196                              Args);
6197 }
6198 
6199 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6200                                             SourceLocation Loc,
6201                                             bool IsWorksharingReduction) {
6202   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6203   // is_ws, int num, void *data);
6204   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6205   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6206                                                 CGM.IntTy, /*isSigned=*/true);
6207   llvm::Value *Args[] = {IdentTLoc, GTid,
6208                          llvm::ConstantInt::get(CGM.IntTy,
6209                                                 IsWorksharingReduction ? 1 : 0,
6210                                                 /*isSigned=*/true)};
6211   (void)CGF.EmitRuntimeCall(
6212       OMPBuilder.getOrCreateRuntimeFunction(
6213           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6214       Args);
6215 }
6216 
6217 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6218                                               SourceLocation Loc,
6219                                               ReductionCodeGen &RCG,
6220                                               unsigned N) {
6221   auto Sizes = RCG.getSizes(N);
6222   // Emit threadprivate global variable if the type is non-constant
6223   // (Sizes.second = nullptr).
6224   if (Sizes.second) {
6225     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6226                                                      /*isSigned=*/false);
6227     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6228         CGF, CGM.getContext().getSizeType(),
6229         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6230     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6231   }
6232 }
6233 
6234 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6235                                               SourceLocation Loc,
6236                                               llvm::Value *ReductionsPtr,
6237                                               LValue SharedLVal) {
6238   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6239   // *d);
6240   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6241                                                    CGM.IntTy,
6242                                                    /*isSigned=*/true),
6243                          ReductionsPtr,
6244                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6245                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6246   return Address::deprecated(
6247       CGF.EmitRuntimeCall(
6248           OMPBuilder.getOrCreateRuntimeFunction(
6249               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6250           Args),
6251       SharedLVal.getAlignment());
6252 }
6253 
6254 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6255                                        const OMPTaskDataTy &Data) {
6256   if (!CGF.HaveInsertPoint())
6257     return;
6258 
6259   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6260     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6261     OMPBuilder.createTaskwait(CGF.Builder);
6262   } else {
6263     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6264     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6265     auto &M = CGM.getModule();
6266     Address DependenciesArray = Address::invalid();
6267     llvm::Value *NumOfElements;
6268     std::tie(NumOfElements, DependenciesArray) =
6269         emitDependClause(CGF, Data.Dependences, Loc);
6270     llvm::Value *DepWaitTaskArgs[6];
6271     if (!Data.Dependences.empty()) {
6272       DepWaitTaskArgs[0] = UpLoc;
6273       DepWaitTaskArgs[1] = ThreadID;
6274       DepWaitTaskArgs[2] = NumOfElements;
6275       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6276       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6277       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6278 
6279       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6280 
6281       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6282       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6283       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6284       // is specified.
6285       CGF.EmitRuntimeCall(
6286           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6287           DepWaitTaskArgs);
6288 
6289     } else {
6290 
6291       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6292       // global_tid);
6293       llvm::Value *Args[] = {UpLoc, ThreadID};
6294       // Ignore return result until untied tasks are supported.
6295       CGF.EmitRuntimeCall(
6296           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6297           Args);
6298     }
6299   }
6300 
6301   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6302     Region->emitUntiedSwitch(CGF);
6303 }
6304 
6305 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6306                                            OpenMPDirectiveKind InnerKind,
6307                                            const RegionCodeGenTy &CodeGen,
6308                                            bool HasCancel) {
6309   if (!CGF.HaveInsertPoint())
6310     return;
6311   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6312                                  InnerKind != OMPD_critical &&
6313                                      InnerKind != OMPD_master &&
6314                                      InnerKind != OMPD_masked);
6315   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6316 }
6317 
6318 namespace {
6319 enum RTCancelKind {
6320   CancelNoreq = 0,
6321   CancelParallel = 1,
6322   CancelLoop = 2,
6323   CancelSections = 3,
6324   CancelTaskgroup = 4
6325 };
6326 } // anonymous namespace
6327 
6328 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6329   RTCancelKind CancelKind = CancelNoreq;
6330   if (CancelRegion == OMPD_parallel)
6331     CancelKind = CancelParallel;
6332   else if (CancelRegion == OMPD_for)
6333     CancelKind = CancelLoop;
6334   else if (CancelRegion == OMPD_sections)
6335     CancelKind = CancelSections;
6336   else {
6337     assert(CancelRegion == OMPD_taskgroup);
6338     CancelKind = CancelTaskgroup;
6339   }
6340   return CancelKind;
6341 }
6342 
6343 void CGOpenMPRuntime::emitCancellationPointCall(
6344     CodeGenFunction &CGF, SourceLocation Loc,
6345     OpenMPDirectiveKind CancelRegion) {
6346   if (!CGF.HaveInsertPoint())
6347     return;
6348   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6349   // global_tid, kmp_int32 cncl_kind);
6350   if (auto *OMPRegionInfo =
6351           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6352     // For 'cancellation point taskgroup', the task region info may not have a
6353     // cancel. This may instead happen in another adjacent task.
6354     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6355       llvm::Value *Args[] = {
6356           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6357           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6358       // Ignore return result until untied tasks are supported.
6359       llvm::Value *Result = CGF.EmitRuntimeCall(
6360           OMPBuilder.getOrCreateRuntimeFunction(
6361               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6362           Args);
6363       // if (__kmpc_cancellationpoint()) {
6364       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6365       //   exit from construct;
6366       // }
6367       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6368       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6369       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6370       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6371       CGF.EmitBlock(ExitBB);
6372       if (CancelRegion == OMPD_parallel)
6373         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6374       // exit from construct;
6375       CodeGenFunction::JumpDest CancelDest =
6376           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6377       CGF.EmitBranchThroughCleanup(CancelDest);
6378       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6379     }
6380   }
6381 }
6382 
6383 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6384                                      const Expr *IfCond,
6385                                      OpenMPDirectiveKind CancelRegion) {
6386   if (!CGF.HaveInsertPoint())
6387     return;
6388   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6389   // kmp_int32 cncl_kind);
6390   auto &M = CGM.getModule();
6391   if (auto *OMPRegionInfo =
6392           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6393     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6394                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6395       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6396       llvm::Value *Args[] = {
6397           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6398           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6399       // Ignore return result until untied tasks are supported.
6400       llvm::Value *Result = CGF.EmitRuntimeCall(
6401           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6402       // if (__kmpc_cancel()) {
6403       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6404       //   exit from construct;
6405       // }
6406       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6407       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6408       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6409       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6410       CGF.EmitBlock(ExitBB);
6411       if (CancelRegion == OMPD_parallel)
6412         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6413       // exit from construct;
6414       CodeGenFunction::JumpDest CancelDest =
6415           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6416       CGF.EmitBranchThroughCleanup(CancelDest);
6417       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6418     };
6419     if (IfCond) {
6420       emitIfClause(CGF, IfCond, ThenGen,
6421                    [](CodeGenFunction &, PrePostActionTy &) {});
6422     } else {
6423       RegionCodeGenTy ThenRCG(ThenGen);
6424       ThenRCG(CGF);
6425     }
6426   }
6427 }
6428 
6429 namespace {
6430 /// Cleanup action for uses_allocators support.
6431 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6432   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6433 
6434 public:
6435   OMPUsesAllocatorsActionTy(
6436       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6437       : Allocators(Allocators) {}
6438   void Enter(CodeGenFunction &CGF) override {
6439     if (!CGF.HaveInsertPoint())
6440       return;
6441     for (const auto &AllocatorData : Allocators) {
6442       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6443           CGF, AllocatorData.first, AllocatorData.second);
6444     }
6445   }
6446   void Exit(CodeGenFunction &CGF) override {
6447     if (!CGF.HaveInsertPoint())
6448       return;
6449     for (const auto &AllocatorData : Allocators) {
6450       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6451                                                         AllocatorData.first);
6452     }
6453   }
6454 };
6455 } // namespace
6456 
6457 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6458     const OMPExecutableDirective &D, StringRef ParentName,
6459     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6460     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6461   assert(!ParentName.empty() && "Invalid target region parent name!");
6462   HasEmittedTargetRegion = true;
6463   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6464   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6465     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6466       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6467       if (!D.AllocatorTraits)
6468         continue;
6469       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6470     }
6471   }
6472   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6473   CodeGen.setAction(UsesAllocatorAction);
6474   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6475                                    IsOffloadEntry, CodeGen);
6476 }
6477 
6478 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6479                                              const Expr *Allocator,
6480                                              const Expr *AllocatorTraits) {
6481   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6482   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6483   // Use default memspace handle.
6484   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6485   llvm::Value *NumTraits = llvm::ConstantInt::get(
6486       CGF.IntTy, cast<ConstantArrayType>(
6487                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6488                      ->getSize()
6489                      .getLimitedValue());
6490   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6491   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6492       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6493   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6494                                            AllocatorTraitsLVal.getBaseInfo(),
6495                                            AllocatorTraitsLVal.getTBAAInfo());
6496   llvm::Value *Traits =
6497       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6498 
6499   llvm::Value *AllocatorVal =
6500       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6501                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6502                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6503   // Store to allocator.
6504   CGF.EmitVarDecl(*cast<VarDecl>(
6505       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6506   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6507   AllocatorVal =
6508       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6509                                Allocator->getType(), Allocator->getExprLoc());
6510   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6511 }
6512 
6513 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6514                                              const Expr *Allocator) {
6515   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6516   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6517   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6518   llvm::Value *AllocatorVal =
6519       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6520   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6521                                           CGF.getContext().VoidPtrTy,
6522                                           Allocator->getExprLoc());
6523   (void)CGF.EmitRuntimeCall(
6524       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6525                                             OMPRTL___kmpc_destroy_allocator),
6526       {ThreadId, AllocatorVal});
6527 }
6528 
6529 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6530     const OMPExecutableDirective &D, StringRef ParentName,
6531     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6532     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6533   // Create a unique name for the entry function using the source location
6534   // information of the current target region. The name will be something like:
6535   //
6536   // __omp_offloading_DD_FFFF_PP_lBB
6537   //
6538   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6539   // mangled name of the function that encloses the target region and BB is the
6540   // line number of the target region.
6541 
6542   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6543                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6544   unsigned DeviceID;
6545   unsigned FileID;
6546   unsigned Line;
6547   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6548                            Line);
6549   SmallString<64> EntryFnName;
6550   {
6551     llvm::raw_svector_ostream OS(EntryFnName);
6552     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6553        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6554   }
6555 
6556   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6557 
6558   CodeGenFunction CGF(CGM, true);
6559   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6560   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6561 
6562   if (BuildOutlinedFn)
6563     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6564 
6565   // If this target outline function is not an offload entry, we don't need to
6566   // register it.
6567   if (!IsOffloadEntry)
6568     return;
6569 
6570   // The target region ID is used by the runtime library to identify the current
6571   // target region, so it only has to be unique and not necessarily point to
6572   // anything. It could be the pointer to the outlined function that implements
6573   // the target region, but we aren't using that so that the compiler doesn't
6574   // need to keep that, and could therefore inline the host function if proven
6575   // worthwhile during optimization. In the other hand, if emitting code for the
6576   // device, the ID has to be the function address so that it can retrieved from
6577   // the offloading entry and launched by the runtime library. We also mark the
6578   // outlined function to have external linkage in case we are emitting code for
6579   // the device, because these functions will be entry points to the device.
6580 
6581   if (CGM.getLangOpts().OpenMPIsDevice) {
6582     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6583     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6584     OutlinedFn->setDSOLocal(false);
6585     if (CGM.getTriple().isAMDGCN())
6586       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6587   } else {
6588     std::string Name = getName({EntryFnName, "region_id"});
6589     OutlinedFnID = new llvm::GlobalVariable(
6590         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6591         llvm::GlobalValue::WeakAnyLinkage,
6592         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6593   }
6594 
6595   // If we do not allow host fallback we still need a named address to use.
6596   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6597   if (!BuildOutlinedFn) {
6598     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6599            "Named kernel already exists?");
6600     TargetRegionEntryAddr = new llvm::GlobalVariable(
6601         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6602         llvm::GlobalValue::InternalLinkage,
6603         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6604   }
6605 
6606   // Register the information for the entry associated with this target region.
6607   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6608       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6609       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6610 
6611   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6612   int32_t DefaultValTeams = -1;
6613   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6614   if (DefaultValTeams > 0 && OutlinedFn) {
6615     OutlinedFn->addFnAttr("omp_target_num_teams",
6616                           std::to_string(DefaultValTeams));
6617   }
6618   int32_t DefaultValThreads = -1;
6619   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6620   if (DefaultValThreads > 0 && OutlinedFn) {
6621     OutlinedFn->addFnAttr("omp_target_thread_limit",
6622                           std::to_string(DefaultValThreads));
6623   }
6624 
6625   if (BuildOutlinedFn)
6626     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6627 }
6628 
6629 /// Checks if the expression is constant or does not have non-trivial function
6630 /// calls.
6631 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6632   // We can skip constant expressions.
6633   // We can skip expressions with trivial calls or simple expressions.
6634   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6635           !E->hasNonTrivialCall(Ctx)) &&
6636          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6637 }
6638 
6639 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6640                                                     const Stmt *Body) {
6641   const Stmt *Child = Body->IgnoreContainers();
6642   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6643     Child = nullptr;
6644     for (const Stmt *S : C->body()) {
6645       if (const auto *E = dyn_cast<Expr>(S)) {
6646         if (isTrivial(Ctx, E))
6647           continue;
6648       }
6649       // Some of the statements can be ignored.
6650       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6651           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6652         continue;
6653       // Analyze declarations.
6654       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6655         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6656               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6657                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6658                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6659                   isa<UsingDirectiveDecl>(D) ||
6660                   isa<OMPDeclareReductionDecl>(D) ||
6661                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6662                 return true;
6663               const auto *VD = dyn_cast<VarDecl>(D);
6664               if (!VD)
6665                 return false;
6666               return VD->hasGlobalStorage() || !VD->isUsed();
6667             }))
6668           continue;
6669       }
6670       // Found multiple children - cannot get the one child only.
6671       if (Child)
6672         return nullptr;
6673       Child = S;
6674     }
6675     if (Child)
6676       Child = Child->IgnoreContainers();
6677   }
6678   return Child;
6679 }
6680 
6681 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6682     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6683     int32_t &DefaultVal) {
6684 
6685   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6686   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6687          "Expected target-based executable directive.");
6688   switch (DirectiveKind) {
6689   case OMPD_target: {
6690     const auto *CS = D.getInnermostCapturedStmt();
6691     const auto *Body =
6692         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6693     const Stmt *ChildStmt =
6694         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6695     if (const auto *NestedDir =
6696             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6697       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6698         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6699           const Expr *NumTeams =
6700               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6701           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6702             if (auto Constant =
6703                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6704               DefaultVal = Constant->getExtValue();
6705           return NumTeams;
6706         }
6707         DefaultVal = 0;
6708         return nullptr;
6709       }
6710       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6711           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6712         DefaultVal = 1;
6713         return nullptr;
6714       }
6715       DefaultVal = 1;
6716       return nullptr;
6717     }
6718     // A value of -1 is used to check if we need to emit no teams region
6719     DefaultVal = -1;
6720     return nullptr;
6721   }
6722   case OMPD_target_teams:
6723   case OMPD_target_teams_distribute:
6724   case OMPD_target_teams_distribute_simd:
6725   case OMPD_target_teams_distribute_parallel_for:
6726   case OMPD_target_teams_distribute_parallel_for_simd: {
6727     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6728       const Expr *NumTeams =
6729           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6730       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6731         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6732           DefaultVal = Constant->getExtValue();
6733       return NumTeams;
6734     }
6735     DefaultVal = 0;
6736     return nullptr;
6737   }
6738   case OMPD_target_parallel:
6739   case OMPD_target_parallel_for:
6740   case OMPD_target_parallel_for_simd:
6741   case OMPD_target_simd:
6742     DefaultVal = 1;
6743     return nullptr;
6744   case OMPD_parallel:
6745   case OMPD_for:
6746   case OMPD_parallel_for:
6747   case OMPD_parallel_master:
6748   case OMPD_parallel_sections:
6749   case OMPD_for_simd:
6750   case OMPD_parallel_for_simd:
6751   case OMPD_cancel:
6752   case OMPD_cancellation_point:
6753   case OMPD_ordered:
6754   case OMPD_threadprivate:
6755   case OMPD_allocate:
6756   case OMPD_task:
6757   case OMPD_simd:
6758   case OMPD_tile:
6759   case OMPD_unroll:
6760   case OMPD_sections:
6761   case OMPD_section:
6762   case OMPD_single:
6763   case OMPD_master:
6764   case OMPD_critical:
6765   case OMPD_taskyield:
6766   case OMPD_barrier:
6767   case OMPD_taskwait:
6768   case OMPD_taskgroup:
6769   case OMPD_atomic:
6770   case OMPD_flush:
6771   case OMPD_depobj:
6772   case OMPD_scan:
6773   case OMPD_teams:
6774   case OMPD_target_data:
6775   case OMPD_target_exit_data:
6776   case OMPD_target_enter_data:
6777   case OMPD_distribute:
6778   case OMPD_distribute_simd:
6779   case OMPD_distribute_parallel_for:
6780   case OMPD_distribute_parallel_for_simd:
6781   case OMPD_teams_distribute:
6782   case OMPD_teams_distribute_simd:
6783   case OMPD_teams_distribute_parallel_for:
6784   case OMPD_teams_distribute_parallel_for_simd:
6785   case OMPD_target_update:
6786   case OMPD_declare_simd:
6787   case OMPD_declare_variant:
6788   case OMPD_begin_declare_variant:
6789   case OMPD_end_declare_variant:
6790   case OMPD_declare_target:
6791   case OMPD_end_declare_target:
6792   case OMPD_declare_reduction:
6793   case OMPD_declare_mapper:
6794   case OMPD_taskloop:
6795   case OMPD_taskloop_simd:
6796   case OMPD_master_taskloop:
6797   case OMPD_master_taskloop_simd:
6798   case OMPD_parallel_master_taskloop:
6799   case OMPD_parallel_master_taskloop_simd:
6800   case OMPD_requires:
6801   case OMPD_metadirective:
6802   case OMPD_unknown:
6803     break;
6804   default:
6805     break;
6806   }
6807   llvm_unreachable("Unexpected directive kind.");
6808 }
6809 
6810 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6811     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6812   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6813          "Clauses associated with the teams directive expected to be emitted "
6814          "only for the host!");
6815   CGBuilderTy &Bld = CGF.Builder;
6816   int32_t DefaultNT = -1;
6817   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6818   if (NumTeams != nullptr) {
6819     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6820 
6821     switch (DirectiveKind) {
6822     case OMPD_target: {
6823       const auto *CS = D.getInnermostCapturedStmt();
6824       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6825       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6826       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6827                                                   /*IgnoreResultAssign*/ true);
6828       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6829                              /*isSigned=*/true);
6830     }
6831     case OMPD_target_teams:
6832     case OMPD_target_teams_distribute:
6833     case OMPD_target_teams_distribute_simd:
6834     case OMPD_target_teams_distribute_parallel_for:
6835     case OMPD_target_teams_distribute_parallel_for_simd: {
6836       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6837       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6838                                                   /*IgnoreResultAssign*/ true);
6839       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6840                              /*isSigned=*/true);
6841     }
6842     default:
6843       break;
6844     }
6845   } else if (DefaultNT == -1) {
6846     return nullptr;
6847   }
6848 
6849   return Bld.getInt32(DefaultNT);
6850 }
6851 
6852 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6853                                   llvm::Value *DefaultThreadLimitVal) {
6854   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6855       CGF.getContext(), CS->getCapturedStmt());
6856   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6857     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6858       llvm::Value *NumThreads = nullptr;
6859       llvm::Value *CondVal = nullptr;
6860       // Handle if clause. If if clause present, the number of threads is
6861       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6862       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6863         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6864         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6865         const OMPIfClause *IfClause = nullptr;
6866         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6867           if (C->getNameModifier() == OMPD_unknown ||
6868               C->getNameModifier() == OMPD_parallel) {
6869             IfClause = C;
6870             break;
6871           }
6872         }
6873         if (IfClause) {
6874           const Expr *Cond = IfClause->getCondition();
6875           bool Result;
6876           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6877             if (!Result)
6878               return CGF.Builder.getInt32(1);
6879           } else {
6880             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6881             if (const auto *PreInit =
6882                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6883               for (const auto *I : PreInit->decls()) {
6884                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6885                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6886                 } else {
6887                   CodeGenFunction::AutoVarEmission Emission =
6888                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6889                   CGF.EmitAutoVarCleanups(Emission);
6890                 }
6891               }
6892             }
6893             CondVal = CGF.EvaluateExprAsBool(Cond);
6894           }
6895         }
6896       }
6897       // Check the value of num_threads clause iff if clause was not specified
6898       // or is not evaluated to false.
6899       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6900         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6901         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6902         const auto *NumThreadsClause =
6903             Dir->getSingleClause<OMPNumThreadsClause>();
6904         CodeGenFunction::LexicalScope Scope(
6905             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6906         if (const auto *PreInit =
6907                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6908           for (const auto *I : PreInit->decls()) {
6909             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6910               CGF.EmitVarDecl(cast<VarDecl>(*I));
6911             } else {
6912               CodeGenFunction::AutoVarEmission Emission =
6913                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6914               CGF.EmitAutoVarCleanups(Emission);
6915             }
6916           }
6917         }
6918         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6919         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6920                                                /*isSigned=*/false);
6921         if (DefaultThreadLimitVal)
6922           NumThreads = CGF.Builder.CreateSelect(
6923               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6924               DefaultThreadLimitVal, NumThreads);
6925       } else {
6926         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6927                                            : CGF.Builder.getInt32(0);
6928       }
6929       // Process condition of the if clause.
6930       if (CondVal) {
6931         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6932                                               CGF.Builder.getInt32(1));
6933       }
6934       return NumThreads;
6935     }
6936     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6937       return CGF.Builder.getInt32(1);
6938     return DefaultThreadLimitVal;
6939   }
6940   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6941                                : CGF.Builder.getInt32(0);
6942 }
6943 
6944 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6945     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6946     int32_t &DefaultVal) {
6947   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6948   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6949          "Expected target-based executable directive.");
6950 
6951   switch (DirectiveKind) {
6952   case OMPD_target:
6953     // Teams have no clause thread_limit
6954     return nullptr;
6955   case OMPD_target_teams:
6956   case OMPD_target_teams_distribute:
6957     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6958       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6959       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6960       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6961         if (auto Constant =
6962                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6963           DefaultVal = Constant->getExtValue();
6964       return ThreadLimit;
6965     }
6966     return nullptr;
6967   case OMPD_target_parallel:
6968   case OMPD_target_parallel_for:
6969   case OMPD_target_parallel_for_simd:
6970   case OMPD_target_teams_distribute_parallel_for:
6971   case OMPD_target_teams_distribute_parallel_for_simd: {
6972     Expr *ThreadLimit = nullptr;
6973     Expr *NumThreads = nullptr;
6974     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6975       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6976       ThreadLimit = ThreadLimitClause->getThreadLimit();
6977       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6978         if (auto Constant =
6979                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6980           DefaultVal = Constant->getExtValue();
6981     }
6982     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6983       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6984       NumThreads = NumThreadsClause->getNumThreads();
6985       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6986         if (auto Constant =
6987                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6988           if (Constant->getExtValue() < DefaultVal) {
6989             DefaultVal = Constant->getExtValue();
6990             ThreadLimit = NumThreads;
6991           }
6992         }
6993       }
6994     }
6995     return ThreadLimit;
6996   }
6997   case OMPD_target_teams_distribute_simd:
6998   case OMPD_target_simd:
6999     DefaultVal = 1;
7000     return nullptr;
7001   case OMPD_parallel:
7002   case OMPD_for:
7003   case OMPD_parallel_for:
7004   case OMPD_parallel_master:
7005   case OMPD_parallel_sections:
7006   case OMPD_for_simd:
7007   case OMPD_parallel_for_simd:
7008   case OMPD_cancel:
7009   case OMPD_cancellation_point:
7010   case OMPD_ordered:
7011   case OMPD_threadprivate:
7012   case OMPD_allocate:
7013   case OMPD_task:
7014   case OMPD_simd:
7015   case OMPD_tile:
7016   case OMPD_unroll:
7017   case OMPD_sections:
7018   case OMPD_section:
7019   case OMPD_single:
7020   case OMPD_master:
7021   case OMPD_critical:
7022   case OMPD_taskyield:
7023   case OMPD_barrier:
7024   case OMPD_taskwait:
7025   case OMPD_taskgroup:
7026   case OMPD_atomic:
7027   case OMPD_flush:
7028   case OMPD_depobj:
7029   case OMPD_scan:
7030   case OMPD_teams:
7031   case OMPD_target_data:
7032   case OMPD_target_exit_data:
7033   case OMPD_target_enter_data:
7034   case OMPD_distribute:
7035   case OMPD_distribute_simd:
7036   case OMPD_distribute_parallel_for:
7037   case OMPD_distribute_parallel_for_simd:
7038   case OMPD_teams_distribute:
7039   case OMPD_teams_distribute_simd:
7040   case OMPD_teams_distribute_parallel_for:
7041   case OMPD_teams_distribute_parallel_for_simd:
7042   case OMPD_target_update:
7043   case OMPD_declare_simd:
7044   case OMPD_declare_variant:
7045   case OMPD_begin_declare_variant:
7046   case OMPD_end_declare_variant:
7047   case OMPD_declare_target:
7048   case OMPD_end_declare_target:
7049   case OMPD_declare_reduction:
7050   case OMPD_declare_mapper:
7051   case OMPD_taskloop:
7052   case OMPD_taskloop_simd:
7053   case OMPD_master_taskloop:
7054   case OMPD_master_taskloop_simd:
7055   case OMPD_parallel_master_taskloop:
7056   case OMPD_parallel_master_taskloop_simd:
7057   case OMPD_requires:
7058   case OMPD_unknown:
7059     break;
7060   default:
7061     break;
7062   }
7063   llvm_unreachable("Unsupported directive kind.");
7064 }
7065 
7066 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7067     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7068   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7069          "Clauses associated with the teams directive expected to be emitted "
7070          "only for the host!");
7071   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7072   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7073          "Expected target-based executable directive.");
7074   CGBuilderTy &Bld = CGF.Builder;
7075   llvm::Value *ThreadLimitVal = nullptr;
7076   llvm::Value *NumThreadsVal = nullptr;
7077   switch (DirectiveKind) {
7078   case OMPD_target: {
7079     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7080     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7081       return NumThreads;
7082     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7083         CGF.getContext(), CS->getCapturedStmt());
7084     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7085       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7086         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7087         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7088         const auto *ThreadLimitClause =
7089             Dir->getSingleClause<OMPThreadLimitClause>();
7090         CodeGenFunction::LexicalScope Scope(
7091             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7092         if (const auto *PreInit =
7093                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7094           for (const auto *I : PreInit->decls()) {
7095             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7096               CGF.EmitVarDecl(cast<VarDecl>(*I));
7097             } else {
7098               CodeGenFunction::AutoVarEmission Emission =
7099                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7100               CGF.EmitAutoVarCleanups(Emission);
7101             }
7102           }
7103         }
7104         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7105             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7106         ThreadLimitVal =
7107             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7108       }
7109       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7110           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7111         CS = Dir->getInnermostCapturedStmt();
7112         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7113             CGF.getContext(), CS->getCapturedStmt());
7114         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7115       }
7116       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7117           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7118         CS = Dir->getInnermostCapturedStmt();
7119         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7120           return NumThreads;
7121       }
7122       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7123         return Bld.getInt32(1);
7124     }
7125     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7126   }
7127   case OMPD_target_teams: {
7128     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7129       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7130       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7131       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7132           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7133       ThreadLimitVal =
7134           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7135     }
7136     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7137     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7138       return NumThreads;
7139     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7140         CGF.getContext(), CS->getCapturedStmt());
7141     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7142       if (Dir->getDirectiveKind() == OMPD_distribute) {
7143         CS = Dir->getInnermostCapturedStmt();
7144         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7145           return NumThreads;
7146       }
7147     }
7148     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7149   }
7150   case OMPD_target_teams_distribute:
7151     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7152       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7153       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7154       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7155           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7156       ThreadLimitVal =
7157           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7158     }
7159     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7160   case OMPD_target_parallel:
7161   case OMPD_target_parallel_for:
7162   case OMPD_target_parallel_for_simd:
7163   case OMPD_target_teams_distribute_parallel_for:
7164   case OMPD_target_teams_distribute_parallel_for_simd: {
7165     llvm::Value *CondVal = nullptr;
7166     // Handle if clause. If if clause present, the number of threads is
7167     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7168     if (D.hasClausesOfKind<OMPIfClause>()) {
7169       const OMPIfClause *IfClause = nullptr;
7170       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7171         if (C->getNameModifier() == OMPD_unknown ||
7172             C->getNameModifier() == OMPD_parallel) {
7173           IfClause = C;
7174           break;
7175         }
7176       }
7177       if (IfClause) {
7178         const Expr *Cond = IfClause->getCondition();
7179         bool Result;
7180         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7181           if (!Result)
7182             return Bld.getInt32(1);
7183         } else {
7184           CodeGenFunction::RunCleanupsScope Scope(CGF);
7185           CondVal = CGF.EvaluateExprAsBool(Cond);
7186         }
7187       }
7188     }
7189     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7190       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7191       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7192       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7193           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7194       ThreadLimitVal =
7195           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7196     }
7197     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7198       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7199       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7200       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7201           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7202       NumThreadsVal =
7203           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7204       ThreadLimitVal = ThreadLimitVal
7205                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7206                                                                 ThreadLimitVal),
7207                                               NumThreadsVal, ThreadLimitVal)
7208                            : NumThreadsVal;
7209     }
7210     if (!ThreadLimitVal)
7211       ThreadLimitVal = Bld.getInt32(0);
7212     if (CondVal)
7213       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7214     return ThreadLimitVal;
7215   }
7216   case OMPD_target_teams_distribute_simd:
7217   case OMPD_target_simd:
7218     return Bld.getInt32(1);
7219   case OMPD_parallel:
7220   case OMPD_for:
7221   case OMPD_parallel_for:
7222   case OMPD_parallel_master:
7223   case OMPD_parallel_sections:
7224   case OMPD_for_simd:
7225   case OMPD_parallel_for_simd:
7226   case OMPD_cancel:
7227   case OMPD_cancellation_point:
7228   case OMPD_ordered:
7229   case OMPD_threadprivate:
7230   case OMPD_allocate:
7231   case OMPD_task:
7232   case OMPD_simd:
7233   case OMPD_tile:
7234   case OMPD_unroll:
7235   case OMPD_sections:
7236   case OMPD_section:
7237   case OMPD_single:
7238   case OMPD_master:
7239   case OMPD_critical:
7240   case OMPD_taskyield:
7241   case OMPD_barrier:
7242   case OMPD_taskwait:
7243   case OMPD_taskgroup:
7244   case OMPD_atomic:
7245   case OMPD_flush:
7246   case OMPD_depobj:
7247   case OMPD_scan:
7248   case OMPD_teams:
7249   case OMPD_target_data:
7250   case OMPD_target_exit_data:
7251   case OMPD_target_enter_data:
7252   case OMPD_distribute:
7253   case OMPD_distribute_simd:
7254   case OMPD_distribute_parallel_for:
7255   case OMPD_distribute_parallel_for_simd:
7256   case OMPD_teams_distribute:
7257   case OMPD_teams_distribute_simd:
7258   case OMPD_teams_distribute_parallel_for:
7259   case OMPD_teams_distribute_parallel_for_simd:
7260   case OMPD_target_update:
7261   case OMPD_declare_simd:
7262   case OMPD_declare_variant:
7263   case OMPD_begin_declare_variant:
7264   case OMPD_end_declare_variant:
7265   case OMPD_declare_target:
7266   case OMPD_end_declare_target:
7267   case OMPD_declare_reduction:
7268   case OMPD_declare_mapper:
7269   case OMPD_taskloop:
7270   case OMPD_taskloop_simd:
7271   case OMPD_master_taskloop:
7272   case OMPD_master_taskloop_simd:
7273   case OMPD_parallel_master_taskloop:
7274   case OMPD_parallel_master_taskloop_simd:
7275   case OMPD_requires:
7276   case OMPD_metadirective:
7277   case OMPD_unknown:
7278     break;
7279   default:
7280     break;
7281   }
7282   llvm_unreachable("Unsupported directive kind.");
7283 }
7284 
7285 namespace {
7286 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7287 
7288 // Utility to handle information from clauses associated with a given
7289 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7290 // It provides a convenient interface to obtain the information and generate
7291 // code for that information.
7292 class MappableExprsHandler {
7293 public:
7294   /// Values for bit flags used to specify the mapping type for
7295   /// offloading.
7296   enum OpenMPOffloadMappingFlags : uint64_t {
7297     /// No flags
7298     OMP_MAP_NONE = 0x0,
7299     /// Allocate memory on the device and move data from host to device.
7300     OMP_MAP_TO = 0x01,
7301     /// Allocate memory on the device and move data from device to host.
7302     OMP_MAP_FROM = 0x02,
7303     /// Always perform the requested mapping action on the element, even
7304     /// if it was already mapped before.
7305     OMP_MAP_ALWAYS = 0x04,
7306     /// Delete the element from the device environment, ignoring the
7307     /// current reference count associated with the element.
7308     OMP_MAP_DELETE = 0x08,
7309     /// The element being mapped is a pointer-pointee pair; both the
7310     /// pointer and the pointee should be mapped.
7311     OMP_MAP_PTR_AND_OBJ = 0x10,
7312     /// This flags signals that the base address of an entry should be
7313     /// passed to the target kernel as an argument.
7314     OMP_MAP_TARGET_PARAM = 0x20,
7315     /// Signal that the runtime library has to return the device pointer
7316     /// in the current position for the data being mapped. Used when we have the
7317     /// use_device_ptr or use_device_addr clause.
7318     OMP_MAP_RETURN_PARAM = 0x40,
7319     /// This flag signals that the reference being passed is a pointer to
7320     /// private data.
7321     OMP_MAP_PRIVATE = 0x80,
7322     /// Pass the element to the device by value.
7323     OMP_MAP_LITERAL = 0x100,
7324     /// Implicit map
7325     OMP_MAP_IMPLICIT = 0x200,
7326     /// Close is a hint to the runtime to allocate memory close to
7327     /// the target device.
7328     OMP_MAP_CLOSE = 0x400,
7329     /// 0x800 is reserved for compatibility with XLC.
7330     /// Produce a runtime error if the data is not already allocated.
7331     OMP_MAP_PRESENT = 0x1000,
7332     // Increment and decrement a separate reference counter so that the data
7333     // cannot be unmapped within the associated region.  Thus, this flag is
7334     // intended to be used on 'target' and 'target data' directives because they
7335     // are inherently structured.  It is not intended to be used on 'target
7336     // enter data' and 'target exit data' directives because they are inherently
7337     // dynamic.
7338     // This is an OpenMP extension for the sake of OpenACC support.
7339     OMP_MAP_OMPX_HOLD = 0x2000,
7340     /// Signal that the runtime library should use args as an array of
7341     /// descriptor_dim pointers and use args_size as dims. Used when we have
7342     /// non-contiguous list items in target update directive
7343     OMP_MAP_NON_CONTIG = 0x100000000000,
7344     /// The 16 MSBs of the flags indicate whether the entry is member of some
7345     /// struct/class.
7346     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7347     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7348   };
7349 
7350   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7351   static unsigned getFlagMemberOffset() {
7352     unsigned Offset = 0;
7353     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7354          Remain = Remain >> 1)
7355       Offset++;
7356     return Offset;
7357   }
7358 
7359   /// Class that holds debugging information for a data mapping to be passed to
7360   /// the runtime library.
7361   class MappingExprInfo {
7362     /// The variable declaration used for the data mapping.
7363     const ValueDecl *MapDecl = nullptr;
7364     /// The original expression used in the map clause, or null if there is
7365     /// none.
7366     const Expr *MapExpr = nullptr;
7367 
7368   public:
7369     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7370         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7371 
7372     const ValueDecl *getMapDecl() const { return MapDecl; }
7373     const Expr *getMapExpr() const { return MapExpr; }
7374   };
7375 
7376   /// Class that associates information with a base pointer to be passed to the
7377   /// runtime library.
7378   class BasePointerInfo {
7379     /// The base pointer.
7380     llvm::Value *Ptr = nullptr;
7381     /// The base declaration that refers to this device pointer, or null if
7382     /// there is none.
7383     const ValueDecl *DevPtrDecl = nullptr;
7384 
7385   public:
7386     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7387         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7388     llvm::Value *operator*() const { return Ptr; }
7389     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7390     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7391   };
7392 
7393   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7394   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7395   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7396   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7397   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7398   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7399   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7400 
7401   /// This structure contains combined information generated for mappable
7402   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7403   /// mappers, and non-contiguous information.
7404   struct MapCombinedInfoTy {
7405     struct StructNonContiguousInfo {
7406       bool IsNonContiguous = false;
7407       MapDimArrayTy Dims;
7408       MapNonContiguousArrayTy Offsets;
7409       MapNonContiguousArrayTy Counts;
7410       MapNonContiguousArrayTy Strides;
7411     };
7412     MapExprsArrayTy Exprs;
7413     MapBaseValuesArrayTy BasePointers;
7414     MapValuesArrayTy Pointers;
7415     MapValuesArrayTy Sizes;
7416     MapFlagsArrayTy Types;
7417     MapMappersArrayTy Mappers;
7418     StructNonContiguousInfo NonContigInfo;
7419 
7420     /// Append arrays in \a CurInfo.
7421     void append(MapCombinedInfoTy &CurInfo) {
7422       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7423       BasePointers.append(CurInfo.BasePointers.begin(),
7424                           CurInfo.BasePointers.end());
7425       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7426       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7427       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7428       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7429       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7430                                  CurInfo.NonContigInfo.Dims.end());
7431       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7432                                     CurInfo.NonContigInfo.Offsets.end());
7433       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7434                                    CurInfo.NonContigInfo.Counts.end());
7435       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7436                                     CurInfo.NonContigInfo.Strides.end());
7437     }
7438   };
7439 
7440   /// Map between a struct and the its lowest & highest elements which have been
7441   /// mapped.
7442   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7443   ///                    HE(FieldIndex, Pointer)}
7444   struct StructRangeInfoTy {
7445     MapCombinedInfoTy PreliminaryMapData;
7446     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7447         0, Address::invalid()};
7448     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7449         0, Address::invalid()};
7450     Address Base = Address::invalid();
7451     Address LB = Address::invalid();
7452     bool IsArraySection = false;
7453     bool HasCompleteRecord = false;
7454   };
7455 
7456 private:
7457   /// Kind that defines how a device pointer has to be returned.
7458   struct MapInfo {
7459     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7460     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7461     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7462     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7463     bool ReturnDevicePointer = false;
7464     bool IsImplicit = false;
7465     const ValueDecl *Mapper = nullptr;
7466     const Expr *VarRef = nullptr;
7467     bool ForDeviceAddr = false;
7468 
7469     MapInfo() = default;
7470     MapInfo(
7471         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7472         OpenMPMapClauseKind MapType,
7473         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7474         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7475         bool ReturnDevicePointer, bool IsImplicit,
7476         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7477         bool ForDeviceAddr = false)
7478         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7479           MotionModifiers(MotionModifiers),
7480           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7481           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7482   };
7483 
7484   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7485   /// member and there is no map information about it, then emission of that
7486   /// entry is deferred until the whole struct has been processed.
7487   struct DeferredDevicePtrEntryTy {
7488     const Expr *IE = nullptr;
7489     const ValueDecl *VD = nullptr;
7490     bool ForDeviceAddr = false;
7491 
7492     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7493                              bool ForDeviceAddr)
7494         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7495   };
7496 
7497   /// The target directive from where the mappable clauses were extracted. It
7498   /// is either a executable directive or a user-defined mapper directive.
7499   llvm::PointerUnion<const OMPExecutableDirective *,
7500                      const OMPDeclareMapperDecl *>
7501       CurDir;
7502 
7503   /// Function the directive is being generated for.
7504   CodeGenFunction &CGF;
7505 
7506   /// Set of all first private variables in the current directive.
7507   /// bool data is set to true if the variable is implicitly marked as
7508   /// firstprivate, false otherwise.
7509   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7510 
7511   /// Map between device pointer declarations and their expression components.
7512   /// The key value for declarations in 'this' is null.
7513   llvm::DenseMap<
7514       const ValueDecl *,
7515       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7516       DevPointersMap;
7517 
7518   /// Map between lambda declarations and their map type.
7519   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7520 
7521   llvm::Value *getExprTypeSize(const Expr *E) const {
7522     QualType ExprTy = E->getType().getCanonicalType();
7523 
7524     // Calculate the size for array shaping expression.
7525     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7526       llvm::Value *Size =
7527           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7528       for (const Expr *SE : OAE->getDimensions()) {
7529         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7530         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7531                                       CGF.getContext().getSizeType(),
7532                                       SE->getExprLoc());
7533         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7534       }
7535       return Size;
7536     }
7537 
7538     // Reference types are ignored for mapping purposes.
7539     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7540       ExprTy = RefTy->getPointeeType().getCanonicalType();
7541 
7542     // Given that an array section is considered a built-in type, we need to
7543     // do the calculation based on the length of the section instead of relying
7544     // on CGF.getTypeSize(E->getType()).
7545     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7546       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7547                             OAE->getBase()->IgnoreParenImpCasts())
7548                             .getCanonicalType();
7549 
7550       // If there is no length associated with the expression and lower bound is
7551       // not specified too, that means we are using the whole length of the
7552       // base.
7553       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7554           !OAE->getLowerBound())
7555         return CGF.getTypeSize(BaseTy);
7556 
7557       llvm::Value *ElemSize;
7558       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7559         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7560       } else {
7561         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7562         assert(ATy && "Expecting array type if not a pointer type.");
7563         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7564       }
7565 
7566       // If we don't have a length at this point, that is because we have an
7567       // array section with a single element.
7568       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7569         return ElemSize;
7570 
7571       if (const Expr *LenExpr = OAE->getLength()) {
7572         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7573         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7574                                              CGF.getContext().getSizeType(),
7575                                              LenExpr->getExprLoc());
7576         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7577       }
7578       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7579              OAE->getLowerBound() && "expected array_section[lb:].");
7580       // Size = sizetype - lb * elemtype;
7581       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7582       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7583       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7584                                        CGF.getContext().getSizeType(),
7585                                        OAE->getLowerBound()->getExprLoc());
7586       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7587       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7588       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7589       LengthVal = CGF.Builder.CreateSelect(
7590           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7591       return LengthVal;
7592     }
7593     return CGF.getTypeSize(ExprTy);
7594   }
7595 
7596   /// Return the corresponding bits for a given map clause modifier. Add
7597   /// a flag marking the map as a pointer if requested. Add a flag marking the
7598   /// map as the first one of a series of maps that relate to the same map
7599   /// expression.
7600   OpenMPOffloadMappingFlags getMapTypeBits(
7601       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7602       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7603       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7604     OpenMPOffloadMappingFlags Bits =
7605         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7606     switch (MapType) {
7607     case OMPC_MAP_alloc:
7608     case OMPC_MAP_release:
7609       // alloc and release is the default behavior in the runtime library,  i.e.
7610       // if we don't pass any bits alloc/release that is what the runtime is
7611       // going to do. Therefore, we don't need to signal anything for these two
7612       // type modifiers.
7613       break;
7614     case OMPC_MAP_to:
7615       Bits |= OMP_MAP_TO;
7616       break;
7617     case OMPC_MAP_from:
7618       Bits |= OMP_MAP_FROM;
7619       break;
7620     case OMPC_MAP_tofrom:
7621       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7622       break;
7623     case OMPC_MAP_delete:
7624       Bits |= OMP_MAP_DELETE;
7625       break;
7626     case OMPC_MAP_unknown:
7627       llvm_unreachable("Unexpected map type!");
7628     }
7629     if (AddPtrFlag)
7630       Bits |= OMP_MAP_PTR_AND_OBJ;
7631     if (AddIsTargetParamFlag)
7632       Bits |= OMP_MAP_TARGET_PARAM;
7633     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7634       Bits |= OMP_MAP_ALWAYS;
7635     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7636       Bits |= OMP_MAP_CLOSE;
7637     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7638         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7639       Bits |= OMP_MAP_PRESENT;
7640     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7641       Bits |= OMP_MAP_OMPX_HOLD;
7642     if (IsNonContiguous)
7643       Bits |= OMP_MAP_NON_CONTIG;
7644     return Bits;
7645   }
7646 
7647   /// Return true if the provided expression is a final array section. A
7648   /// final array section, is one whose length can't be proved to be one.
7649   bool isFinalArraySectionExpression(const Expr *E) const {
7650     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7651 
7652     // It is not an array section and therefore not a unity-size one.
7653     if (!OASE)
7654       return false;
7655 
7656     // An array section with no colon always refer to a single element.
7657     if (OASE->getColonLocFirst().isInvalid())
7658       return false;
7659 
7660     const Expr *Length = OASE->getLength();
7661 
7662     // If we don't have a length we have to check if the array has size 1
7663     // for this dimension. Also, we should always expect a length if the
7664     // base type is pointer.
7665     if (!Length) {
7666       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7667                              OASE->getBase()->IgnoreParenImpCasts())
7668                              .getCanonicalType();
7669       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7670         return ATy->getSize().getSExtValue() != 1;
7671       // If we don't have a constant dimension length, we have to consider
7672       // the current section as having any size, so it is not necessarily
7673       // unitary. If it happen to be unity size, that's user fault.
7674       return true;
7675     }
7676 
7677     // Check if the length evaluates to 1.
7678     Expr::EvalResult Result;
7679     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7680       return true; // Can have more that size 1.
7681 
7682     llvm::APSInt ConstLength = Result.Val.getInt();
7683     return ConstLength.getSExtValue() != 1;
7684   }
7685 
7686   /// Generate the base pointers, section pointers, sizes, map type bits, and
7687   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7688   /// map type, map or motion modifiers, and expression components.
7689   /// \a IsFirstComponent should be set to true if the provided set of
7690   /// components is the first associated with a capture.
7691   void generateInfoForComponentList(
7692       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7693       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7694       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7695       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7696       bool IsFirstComponentList, bool IsImplicit,
7697       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7698       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7699       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7700           OverlappedElements = llvm::None) const {
7701     // The following summarizes what has to be generated for each map and the
7702     // types below. The generated information is expressed in this order:
7703     // base pointer, section pointer, size, flags
7704     // (to add to the ones that come from the map type and modifier).
7705     //
7706     // double d;
7707     // int i[100];
7708     // float *p;
7709     //
7710     // struct S1 {
7711     //   int i;
7712     //   float f[50];
7713     // }
7714     // struct S2 {
7715     //   int i;
7716     //   float f[50];
7717     //   S1 s;
7718     //   double *p;
7719     //   struct S2 *ps;
7720     //   int &ref;
7721     // }
7722     // S2 s;
7723     // S2 *ps;
7724     //
7725     // map(d)
7726     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7727     //
7728     // map(i)
7729     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7730     //
7731     // map(i[1:23])
7732     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7733     //
7734     // map(p)
7735     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7736     //
7737     // map(p[1:24])
7738     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7739     // in unified shared memory mode or for local pointers
7740     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7741     //
7742     // map(s)
7743     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7744     //
7745     // map(s.i)
7746     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7747     //
7748     // map(s.s.f)
7749     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7750     //
7751     // map(s.p)
7752     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7753     //
7754     // map(to: s.p[:22])
7755     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7756     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7757     // &(s.p), &(s.p[0]), 22*sizeof(double),
7758     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7759     // (*) alloc space for struct members, only this is a target parameter
7760     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7761     //      optimizes this entry out, same in the examples below)
7762     // (***) map the pointee (map: to)
7763     //
7764     // map(to: s.ref)
7765     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7766     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7767     // (*) alloc space for struct members, only this is a target parameter
7768     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7769     //      optimizes this entry out, same in the examples below)
7770     // (***) map the pointee (map: to)
7771     //
7772     // map(s.ps)
7773     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7774     //
7775     // map(from: s.ps->s.i)
7776     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7777     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7778     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7779     //
7780     // map(to: s.ps->ps)
7781     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7782     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7783     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7784     //
7785     // map(s.ps->ps->ps)
7786     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7787     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7788     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7789     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7790     //
7791     // map(to: s.ps->ps->s.f[:22])
7792     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7793     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7794     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7795     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7796     //
7797     // map(ps)
7798     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7799     //
7800     // map(ps->i)
7801     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7802     //
7803     // map(ps->s.f)
7804     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7805     //
7806     // map(from: ps->p)
7807     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7808     //
7809     // map(to: ps->p[:22])
7810     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7811     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7812     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7813     //
7814     // map(ps->ps)
7815     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7816     //
7817     // map(from: ps->ps->s.i)
7818     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7819     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7820     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7821     //
7822     // map(from: ps->ps->ps)
7823     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7824     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7825     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7826     //
7827     // map(ps->ps->ps->ps)
7828     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7829     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7830     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7831     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7832     //
7833     // map(to: ps->ps->ps->s.f[:22])
7834     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7835     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7836     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7837     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7838     //
7839     // map(to: s.f[:22]) map(from: s.p[:33])
7840     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7841     //     sizeof(double*) (**), TARGET_PARAM
7842     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7843     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7844     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7845     // (*) allocate contiguous space needed to fit all mapped members even if
7846     //     we allocate space for members not mapped (in this example,
7847     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7848     //     them as well because they fall between &s.f[0] and &s.p)
7849     //
7850     // map(from: s.f[:22]) map(to: ps->p[:33])
7851     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7852     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7853     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7854     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7855     // (*) the struct this entry pertains to is the 2nd element in the list of
7856     //     arguments, hence MEMBER_OF(2)
7857     //
7858     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7859     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7860     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7861     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7862     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7863     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7864     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7865     // (*) the struct this entry pertains to is the 4th element in the list
7866     //     of arguments, hence MEMBER_OF(4)
7867 
7868     // Track if the map information being generated is the first for a capture.
7869     bool IsCaptureFirstInfo = IsFirstComponentList;
7870     // When the variable is on a declare target link or in a to clause with
7871     // unified memory, a reference is needed to hold the host/device address
7872     // of the variable.
7873     bool RequiresReference = false;
7874 
7875     // Scan the components from the base to the complete expression.
7876     auto CI = Components.rbegin();
7877     auto CE = Components.rend();
7878     auto I = CI;
7879 
7880     // Track if the map information being generated is the first for a list of
7881     // components.
7882     bool IsExpressionFirstInfo = true;
7883     bool FirstPointerInComplexData = false;
7884     Address BP = Address::invalid();
7885     const Expr *AssocExpr = I->getAssociatedExpression();
7886     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7887     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7888     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7889 
7890     if (isa<MemberExpr>(AssocExpr)) {
7891       // The base is the 'this' pointer. The content of the pointer is going
7892       // to be the base of the field being mapped.
7893       BP = CGF.LoadCXXThisAddress();
7894     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7895                (OASE &&
7896                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7897       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7898     } else if (OAShE &&
7899                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7900       BP = Address::deprecated(
7901           CGF.EmitScalarExpr(OAShE->getBase()),
7902           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7903     } else {
7904       // The base is the reference to the variable.
7905       // BP = &Var.
7906       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7907       if (const auto *VD =
7908               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7909         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7910                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7911           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7912               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7913                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7914             RequiresReference = true;
7915             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7916           }
7917         }
7918       }
7919 
7920       // If the variable is a pointer and is being dereferenced (i.e. is not
7921       // the last component), the base has to be the pointer itself, not its
7922       // reference. References are ignored for mapping purposes.
7923       QualType Ty =
7924           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7925       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7926         // No need to generate individual map information for the pointer, it
7927         // can be associated with the combined storage if shared memory mode is
7928         // active or the base declaration is not global variable.
7929         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7930         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7931             !VD || VD->hasLocalStorage())
7932           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7933         else
7934           FirstPointerInComplexData = true;
7935         ++I;
7936       }
7937     }
7938 
7939     // Track whether a component of the list should be marked as MEMBER_OF some
7940     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7941     // in a component list should be marked as MEMBER_OF, all subsequent entries
7942     // do not belong to the base struct. E.g.
7943     // struct S2 s;
7944     // s.ps->ps->ps->f[:]
7945     //   (1) (2) (3) (4)
7946     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7947     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7948     // is the pointee of ps(2) which is not member of struct s, so it should not
7949     // be marked as such (it is still PTR_AND_OBJ).
7950     // The variable is initialized to false so that PTR_AND_OBJ entries which
7951     // are not struct members are not considered (e.g. array of pointers to
7952     // data).
7953     bool ShouldBeMemberOf = false;
7954 
7955     // Variable keeping track of whether or not we have encountered a component
7956     // in the component list which is a member expression. Useful when we have a
7957     // pointer or a final array section, in which case it is the previous
7958     // component in the list which tells us whether we have a member expression.
7959     // E.g. X.f[:]
7960     // While processing the final array section "[:]" it is "f" which tells us
7961     // whether we are dealing with a member of a declared struct.
7962     const MemberExpr *EncounteredME = nullptr;
7963 
7964     // Track for the total number of dimension. Start from one for the dummy
7965     // dimension.
7966     uint64_t DimSize = 1;
7967 
7968     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7969     bool IsPrevMemberReference = false;
7970 
7971     for (; I != CE; ++I) {
7972       // If the current component is member of a struct (parent struct) mark it.
7973       if (!EncounteredME) {
7974         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7975         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7976         // as MEMBER_OF the parent struct.
7977         if (EncounteredME) {
7978           ShouldBeMemberOf = true;
7979           // Do not emit as complex pointer if this is actually not array-like
7980           // expression.
7981           if (FirstPointerInComplexData) {
7982             QualType Ty = std::prev(I)
7983                               ->getAssociatedDeclaration()
7984                               ->getType()
7985                               .getNonReferenceType();
7986             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7987             FirstPointerInComplexData = false;
7988           }
7989         }
7990       }
7991 
7992       auto Next = std::next(I);
7993 
7994       // We need to generate the addresses and sizes if this is the last
7995       // component, if the component is a pointer or if it is an array section
7996       // whose length can't be proved to be one. If this is a pointer, it
7997       // becomes the base address for the following components.
7998 
7999       // A final array section, is one whose length can't be proved to be one.
8000       // If the map item is non-contiguous then we don't treat any array section
8001       // as final array section.
8002       bool IsFinalArraySection =
8003           !IsNonContiguous &&
8004           isFinalArraySectionExpression(I->getAssociatedExpression());
8005 
8006       // If we have a declaration for the mapping use that, otherwise use
8007       // the base declaration of the map clause.
8008       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8009                                      ? I->getAssociatedDeclaration()
8010                                      : BaseDecl;
8011       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8012                                                : MapExpr;
8013 
8014       // Get information on whether the element is a pointer. Have to do a
8015       // special treatment for array sections given that they are built-in
8016       // types.
8017       const auto *OASE =
8018           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8019       const auto *OAShE =
8020           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8021       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8022       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8023       bool IsPointer =
8024           OAShE ||
8025           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8026                        .getCanonicalType()
8027                        ->isAnyPointerType()) ||
8028           I->getAssociatedExpression()->getType()->isAnyPointerType();
8029       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8030                                MapDecl &&
8031                                MapDecl->getType()->isLValueReferenceType();
8032       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8033 
8034       if (OASE)
8035         ++DimSize;
8036 
8037       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8038           IsFinalArraySection) {
8039         // If this is not the last component, we expect the pointer to be
8040         // associated with an array expression or member expression.
8041         assert((Next == CE ||
8042                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8043                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8044                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8045                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8046                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8047                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8048                "Unexpected expression");
8049 
8050         Address LB = Address::invalid();
8051         Address LowestElem = Address::invalid();
8052         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8053                                        const MemberExpr *E) {
8054           const Expr *BaseExpr = E->getBase();
8055           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8056           // scalar.
8057           LValue BaseLV;
8058           if (E->isArrow()) {
8059             LValueBaseInfo BaseInfo;
8060             TBAAAccessInfo TBAAInfo;
8061             Address Addr =
8062                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8063             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8064             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8065           } else {
8066             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8067           }
8068           return BaseLV;
8069         };
8070         if (OAShE) {
8071           LowestElem = LB =
8072               Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()),
8073                                   CGF.getContext().getTypeAlignInChars(
8074                                       OAShE->getBase()->getType()));
8075         } else if (IsMemberReference) {
8076           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8077           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8078           LowestElem = CGF.EmitLValueForFieldInitialization(
8079                               BaseLVal, cast<FieldDecl>(MapDecl))
8080                            .getAddress(CGF);
8081           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8082                    .getAddress(CGF);
8083         } else {
8084           LowestElem = LB =
8085               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8086                   .getAddress(CGF);
8087         }
8088 
8089         // If this component is a pointer inside the base struct then we don't
8090         // need to create any entry for it - it will be combined with the object
8091         // it is pointing to into a single PTR_AND_OBJ entry.
8092         bool IsMemberPointerOrAddr =
8093             EncounteredME &&
8094             (((IsPointer || ForDeviceAddr) &&
8095               I->getAssociatedExpression() == EncounteredME) ||
8096              (IsPrevMemberReference && !IsPointer) ||
8097              (IsMemberReference && Next != CE &&
8098               !Next->getAssociatedExpression()->getType()->isPointerType()));
8099         if (!OverlappedElements.empty() && Next == CE) {
8100           // Handle base element with the info for overlapped elements.
8101           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8102           assert(!IsPointer &&
8103                  "Unexpected base element with the pointer type.");
8104           // Mark the whole struct as the struct that requires allocation on the
8105           // device.
8106           PartialStruct.LowestElem = {0, LowestElem};
8107           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8108               I->getAssociatedExpression()->getType());
8109           Address HB = CGF.Builder.CreateConstGEP(
8110               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8111                                                               CGF.VoidPtrTy),
8112               TypeSize.getQuantity() - 1);
8113           PartialStruct.HighestElem = {
8114               std::numeric_limits<decltype(
8115                   PartialStruct.HighestElem.first)>::max(),
8116               HB};
8117           PartialStruct.Base = BP;
8118           PartialStruct.LB = LB;
8119           assert(
8120               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8121               "Overlapped elements must be used only once for the variable.");
8122           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8123           // Emit data for non-overlapped data.
8124           OpenMPOffloadMappingFlags Flags =
8125               OMP_MAP_MEMBER_OF |
8126               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8127                              /*AddPtrFlag=*/false,
8128                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8129           llvm::Value *Size = nullptr;
8130           // Do bitcopy of all non-overlapped structure elements.
8131           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8132                    Component : OverlappedElements) {
8133             Address ComponentLB = Address::invalid();
8134             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8135                  Component) {
8136               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8137                 const auto *FD = dyn_cast<FieldDecl>(VD);
8138                 if (FD && FD->getType()->isLValueReferenceType()) {
8139                   const auto *ME =
8140                       cast<MemberExpr>(MC.getAssociatedExpression());
8141                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8142                   ComponentLB =
8143                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8144                           .getAddress(CGF);
8145                 } else {
8146                   ComponentLB =
8147                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8148                           .getAddress(CGF);
8149                 }
8150                 Size = CGF.Builder.CreatePtrDiff(
8151                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8152                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8153                 break;
8154               }
8155             }
8156             assert(Size && "Failed to determine structure size");
8157             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8158             CombinedInfo.BasePointers.push_back(BP.getPointer());
8159             CombinedInfo.Pointers.push_back(LB.getPointer());
8160             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8161                 Size, CGF.Int64Ty, /*isSigned=*/true));
8162             CombinedInfo.Types.push_back(Flags);
8163             CombinedInfo.Mappers.push_back(nullptr);
8164             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8165                                                                       : 1);
8166             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8167           }
8168           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8169           CombinedInfo.BasePointers.push_back(BP.getPointer());
8170           CombinedInfo.Pointers.push_back(LB.getPointer());
8171           Size = CGF.Builder.CreatePtrDiff(
8172               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8173               CGF.EmitCastToVoidPtr(LB.getPointer()));
8174           CombinedInfo.Sizes.push_back(
8175               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8176           CombinedInfo.Types.push_back(Flags);
8177           CombinedInfo.Mappers.push_back(nullptr);
8178           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8179                                                                     : 1);
8180           break;
8181         }
8182         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8183         if (!IsMemberPointerOrAddr ||
8184             (Next == CE && MapType != OMPC_MAP_unknown)) {
8185           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8186           CombinedInfo.BasePointers.push_back(BP.getPointer());
8187           CombinedInfo.Pointers.push_back(LB.getPointer());
8188           CombinedInfo.Sizes.push_back(
8189               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8190           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8191                                                                     : 1);
8192 
8193           // If Mapper is valid, the last component inherits the mapper.
8194           bool HasMapper = Mapper && Next == CE;
8195           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8196 
8197           // We need to add a pointer flag for each map that comes from the
8198           // same expression except for the first one. We also need to signal
8199           // this map is the first one that relates with the current capture
8200           // (there is a set of entries for each capture).
8201           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8202               MapType, MapModifiers, MotionModifiers, IsImplicit,
8203               !IsExpressionFirstInfo || RequiresReference ||
8204                   FirstPointerInComplexData || IsMemberReference,
8205               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8206 
8207           if (!IsExpressionFirstInfo || IsMemberReference) {
8208             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8209             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8210             if (IsPointer || (IsMemberReference && Next != CE))
8211               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8212                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8213 
8214             if (ShouldBeMemberOf) {
8215               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8216               // should be later updated with the correct value of MEMBER_OF.
8217               Flags |= OMP_MAP_MEMBER_OF;
8218               // From now on, all subsequent PTR_AND_OBJ entries should not be
8219               // marked as MEMBER_OF.
8220               ShouldBeMemberOf = false;
8221             }
8222           }
8223 
8224           CombinedInfo.Types.push_back(Flags);
8225         }
8226 
8227         // If we have encountered a member expression so far, keep track of the
8228         // mapped member. If the parent is "*this", then the value declaration
8229         // is nullptr.
8230         if (EncounteredME) {
8231           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8232           unsigned FieldIndex = FD->getFieldIndex();
8233 
8234           // Update info about the lowest and highest elements for this struct
8235           if (!PartialStruct.Base.isValid()) {
8236             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8237             if (IsFinalArraySection) {
8238               Address HB =
8239                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8240                       .getAddress(CGF);
8241               PartialStruct.HighestElem = {FieldIndex, HB};
8242             } else {
8243               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8244             }
8245             PartialStruct.Base = BP;
8246             PartialStruct.LB = BP;
8247           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8248             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8249           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8250             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8251           }
8252         }
8253 
8254         // Need to emit combined struct for array sections.
8255         if (IsFinalArraySection || IsNonContiguous)
8256           PartialStruct.IsArraySection = true;
8257 
8258         // If we have a final array section, we are done with this expression.
8259         if (IsFinalArraySection)
8260           break;
8261 
8262         // The pointer becomes the base for the next element.
8263         if (Next != CE)
8264           BP = IsMemberReference ? LowestElem : LB;
8265 
8266         IsExpressionFirstInfo = false;
8267         IsCaptureFirstInfo = false;
8268         FirstPointerInComplexData = false;
8269         IsPrevMemberReference = IsMemberReference;
8270       } else if (FirstPointerInComplexData) {
8271         QualType Ty = Components.rbegin()
8272                           ->getAssociatedDeclaration()
8273                           ->getType()
8274                           .getNonReferenceType();
8275         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8276         FirstPointerInComplexData = false;
8277       }
8278     }
8279     // If ran into the whole component - allocate the space for the whole
8280     // record.
8281     if (!EncounteredME)
8282       PartialStruct.HasCompleteRecord = true;
8283 
8284     if (!IsNonContiguous)
8285       return;
8286 
8287     const ASTContext &Context = CGF.getContext();
8288 
8289     // For supporting stride in array section, we need to initialize the first
8290     // dimension size as 1, first offset as 0, and first count as 1
8291     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8292     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8293     MapValuesArrayTy CurStrides;
8294     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8295     uint64_t ElementTypeSize;
8296 
8297     // Collect Size information for each dimension and get the element size as
8298     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8299     // should be [10, 10] and the first stride is 4 btyes.
8300     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8301          Components) {
8302       const Expr *AssocExpr = Component.getAssociatedExpression();
8303       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8304 
8305       if (!OASE)
8306         continue;
8307 
8308       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8309       auto *CAT = Context.getAsConstantArrayType(Ty);
8310       auto *VAT = Context.getAsVariableArrayType(Ty);
8311 
8312       // We need all the dimension size except for the last dimension.
8313       assert((VAT || CAT || &Component == &*Components.begin()) &&
8314              "Should be either ConstantArray or VariableArray if not the "
8315              "first Component");
8316 
8317       // Get element size if CurStrides is empty.
8318       if (CurStrides.empty()) {
8319         const Type *ElementType = nullptr;
8320         if (CAT)
8321           ElementType = CAT->getElementType().getTypePtr();
8322         else if (VAT)
8323           ElementType = VAT->getElementType().getTypePtr();
8324         else
8325           assert(&Component == &*Components.begin() &&
8326                  "Only expect pointer (non CAT or VAT) when this is the "
8327                  "first Component");
8328         // If ElementType is null, then it means the base is a pointer
8329         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8330         // for next iteration.
8331         if (ElementType) {
8332           // For the case that having pointer as base, we need to remove one
8333           // level of indirection.
8334           if (&Component != &*Components.begin())
8335             ElementType = ElementType->getPointeeOrArrayElementType();
8336           ElementTypeSize =
8337               Context.getTypeSizeInChars(ElementType).getQuantity();
8338           CurStrides.push_back(
8339               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8340         }
8341       }
8342       // Get dimension value except for the last dimension since we don't need
8343       // it.
8344       if (DimSizes.size() < Components.size() - 1) {
8345         if (CAT)
8346           DimSizes.push_back(llvm::ConstantInt::get(
8347               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8348         else if (VAT)
8349           DimSizes.push_back(CGF.Builder.CreateIntCast(
8350               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8351               /*IsSigned=*/false));
8352       }
8353     }
8354 
8355     // Skip the dummy dimension since we have already have its information.
8356     auto *DI = DimSizes.begin() + 1;
8357     // Product of dimension.
8358     llvm::Value *DimProd =
8359         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8360 
8361     // Collect info for non-contiguous. Notice that offset, count, and stride
8362     // are only meaningful for array-section, so we insert a null for anything
8363     // other than array-section.
8364     // Also, the size of offset, count, and stride are not the same as
8365     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8366     // count, and stride are the same as the number of non-contiguous
8367     // declaration in target update to/from clause.
8368     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8369          Components) {
8370       const Expr *AssocExpr = Component.getAssociatedExpression();
8371 
8372       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8373         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8374             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8375             /*isSigned=*/false);
8376         CurOffsets.push_back(Offset);
8377         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8378         CurStrides.push_back(CurStrides.back());
8379         continue;
8380       }
8381 
8382       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8383 
8384       if (!OASE)
8385         continue;
8386 
8387       // Offset
8388       const Expr *OffsetExpr = OASE->getLowerBound();
8389       llvm::Value *Offset = nullptr;
8390       if (!OffsetExpr) {
8391         // If offset is absent, then we just set it to zero.
8392         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8393       } else {
8394         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8395                                            CGF.Int64Ty,
8396                                            /*isSigned=*/false);
8397       }
8398       CurOffsets.push_back(Offset);
8399 
8400       // Count
8401       const Expr *CountExpr = OASE->getLength();
8402       llvm::Value *Count = nullptr;
8403       if (!CountExpr) {
8404         // In Clang, once a high dimension is an array section, we construct all
8405         // the lower dimension as array section, however, for case like
8406         // arr[0:2][2], Clang construct the inner dimension as an array section
8407         // but it actually is not in an array section form according to spec.
8408         if (!OASE->getColonLocFirst().isValid() &&
8409             !OASE->getColonLocSecond().isValid()) {
8410           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8411         } else {
8412           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8413           // When the length is absent it defaults to ⌈(size −
8414           // lower-bound)/stride⌉, where size is the size of the array
8415           // dimension.
8416           const Expr *StrideExpr = OASE->getStride();
8417           llvm::Value *Stride =
8418               StrideExpr
8419                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8420                                               CGF.Int64Ty, /*isSigned=*/false)
8421                   : nullptr;
8422           if (Stride)
8423             Count = CGF.Builder.CreateUDiv(
8424                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8425           else
8426             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8427         }
8428       } else {
8429         Count = CGF.EmitScalarExpr(CountExpr);
8430       }
8431       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8432       CurCounts.push_back(Count);
8433 
8434       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8435       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8436       //              Offset      Count     Stride
8437       //    D0          0           1         4    (int)    <- dummy dimension
8438       //    D1          0           2         8    (2 * (1) * 4)
8439       //    D2          1           2         20   (1 * (1 * 5) * 4)
8440       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8441       const Expr *StrideExpr = OASE->getStride();
8442       llvm::Value *Stride =
8443           StrideExpr
8444               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8445                                           CGF.Int64Ty, /*isSigned=*/false)
8446               : nullptr;
8447       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8448       if (Stride)
8449         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8450       else
8451         CurStrides.push_back(DimProd);
8452       if (DI != DimSizes.end())
8453         ++DI;
8454     }
8455 
8456     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8457     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8458     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8459   }
8460 
8461   /// Return the adjusted map modifiers if the declaration a capture refers to
8462   /// appears in a first-private clause. This is expected to be used only with
8463   /// directives that start with 'target'.
8464   MappableExprsHandler::OpenMPOffloadMappingFlags
8465   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8466     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8467 
8468     // A first private variable captured by reference will use only the
8469     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8470     // declaration is known as first-private in this handler.
8471     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8472       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8473         return MappableExprsHandler::OMP_MAP_TO |
8474                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8475       return MappableExprsHandler::OMP_MAP_PRIVATE |
8476              MappableExprsHandler::OMP_MAP_TO;
8477     }
8478     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8479     if (I != LambdasMap.end())
8480       // for map(to: lambda): using user specified map type.
8481       return getMapTypeBits(
8482           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8483           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8484           /*AddPtrFlag=*/false,
8485           /*AddIsTargetParamFlag=*/false,
8486           /*isNonContiguous=*/false);
8487     return MappableExprsHandler::OMP_MAP_TO |
8488            MappableExprsHandler::OMP_MAP_FROM;
8489   }
8490 
8491   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8492     // Rotate by getFlagMemberOffset() bits.
8493     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8494                                                   << getFlagMemberOffset());
8495   }
8496 
8497   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8498                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8499     // If the entry is PTR_AND_OBJ but has not been marked with the special
8500     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8501     // marked as MEMBER_OF.
8502     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8503         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8504       return;
8505 
8506     // Reset the placeholder value to prepare the flag for the assignment of the
8507     // proper MEMBER_OF value.
8508     Flags &= ~OMP_MAP_MEMBER_OF;
8509     Flags |= MemberOfFlag;
8510   }
8511 
8512   void getPlainLayout(const CXXRecordDecl *RD,
8513                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8514                       bool AsBase) const {
8515     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8516 
8517     llvm::StructType *St =
8518         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8519 
8520     unsigned NumElements = St->getNumElements();
8521     llvm::SmallVector<
8522         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8523         RecordLayout(NumElements);
8524 
8525     // Fill bases.
8526     for (const auto &I : RD->bases()) {
8527       if (I.isVirtual())
8528         continue;
8529       const auto *Base = I.getType()->getAsCXXRecordDecl();
8530       // Ignore empty bases.
8531       if (Base->isEmpty() || CGF.getContext()
8532                                  .getASTRecordLayout(Base)
8533                                  .getNonVirtualSize()
8534                                  .isZero())
8535         continue;
8536 
8537       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8538       RecordLayout[FieldIndex] = Base;
8539     }
8540     // Fill in virtual bases.
8541     for (const auto &I : RD->vbases()) {
8542       const auto *Base = I.getType()->getAsCXXRecordDecl();
8543       // Ignore empty bases.
8544       if (Base->isEmpty())
8545         continue;
8546       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8547       if (RecordLayout[FieldIndex])
8548         continue;
8549       RecordLayout[FieldIndex] = Base;
8550     }
8551     // Fill in all the fields.
8552     assert(!RD->isUnion() && "Unexpected union.");
8553     for (const auto *Field : RD->fields()) {
8554       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8555       // will fill in later.)
8556       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8557         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8558         RecordLayout[FieldIndex] = Field;
8559       }
8560     }
8561     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8562              &Data : RecordLayout) {
8563       if (Data.isNull())
8564         continue;
8565       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8566         getPlainLayout(Base, Layout, /*AsBase=*/true);
8567       else
8568         Layout.push_back(Data.get<const FieldDecl *>());
8569     }
8570   }
8571 
8572   /// Generate all the base pointers, section pointers, sizes, map types, and
8573   /// mappers for the extracted mappable expressions (all included in \a
8574   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8575   /// pair of the relevant declaration and index where it occurs is appended to
8576   /// the device pointers info array.
8577   void generateAllInfoForClauses(
8578       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8579       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8580           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8581     // We have to process the component lists that relate with the same
8582     // declaration in a single chunk so that we can generate the map flags
8583     // correctly. Therefore, we organize all lists in a map.
8584     enum MapKind { Present, Allocs, Other, Total };
8585     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8586                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8587         Info;
8588 
8589     // Helper function to fill the information map for the different supported
8590     // clauses.
8591     auto &&InfoGen =
8592         [&Info, &SkipVarSet](
8593             const ValueDecl *D, MapKind Kind,
8594             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8595             OpenMPMapClauseKind MapType,
8596             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8597             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8598             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8599             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8600           if (SkipVarSet.contains(D))
8601             return;
8602           auto It = Info.find(D);
8603           if (It == Info.end())
8604             It = Info
8605                      .insert(std::make_pair(
8606                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8607                      .first;
8608           It->second[Kind].emplace_back(
8609               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8610               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8611         };
8612 
8613     for (const auto *Cl : Clauses) {
8614       const auto *C = dyn_cast<OMPMapClause>(Cl);
8615       if (!C)
8616         continue;
8617       MapKind Kind = Other;
8618       if (llvm::is_contained(C->getMapTypeModifiers(),
8619                              OMPC_MAP_MODIFIER_present))
8620         Kind = Present;
8621       else if (C->getMapType() == OMPC_MAP_alloc)
8622         Kind = Allocs;
8623       const auto *EI = C->getVarRefs().begin();
8624       for (const auto L : C->component_lists()) {
8625         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8626         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8627                 C->getMapTypeModifiers(), llvm::None,
8628                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8629                 E);
8630         ++EI;
8631       }
8632     }
8633     for (const auto *Cl : Clauses) {
8634       const auto *C = dyn_cast<OMPToClause>(Cl);
8635       if (!C)
8636         continue;
8637       MapKind Kind = Other;
8638       if (llvm::is_contained(C->getMotionModifiers(),
8639                              OMPC_MOTION_MODIFIER_present))
8640         Kind = Present;
8641       const auto *EI = C->getVarRefs().begin();
8642       for (const auto L : C->component_lists()) {
8643         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8644                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8645                 C->isImplicit(), std::get<2>(L), *EI);
8646         ++EI;
8647       }
8648     }
8649     for (const auto *Cl : Clauses) {
8650       const auto *C = dyn_cast<OMPFromClause>(Cl);
8651       if (!C)
8652         continue;
8653       MapKind Kind = Other;
8654       if (llvm::is_contained(C->getMotionModifiers(),
8655                              OMPC_MOTION_MODIFIER_present))
8656         Kind = Present;
8657       const auto *EI = C->getVarRefs().begin();
8658       for (const auto L : C->component_lists()) {
8659         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8660                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8661                 C->isImplicit(), std::get<2>(L), *EI);
8662         ++EI;
8663       }
8664     }
8665 
8666     // Look at the use_device_ptr clause information and mark the existing map
8667     // entries as such. If there is no map information for an entry in the
8668     // use_device_ptr list, we create one with map type 'alloc' and zero size
8669     // section. It is the user fault if that was not mapped before. If there is
8670     // no map information and the pointer is a struct member, then we defer the
8671     // emission of that entry until the whole struct has been processed.
8672     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8673                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8674         DeferredInfo;
8675     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8676 
8677     for (const auto *Cl : Clauses) {
8678       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8679       if (!C)
8680         continue;
8681       for (const auto L : C->component_lists()) {
8682         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8683             std::get<1>(L);
8684         assert(!Components.empty() &&
8685                "Not expecting empty list of components!");
8686         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8687         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8688         const Expr *IE = Components.back().getAssociatedExpression();
8689         // If the first component is a member expression, we have to look into
8690         // 'this', which maps to null in the map of map information. Otherwise
8691         // look directly for the information.
8692         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8693 
8694         // We potentially have map information for this declaration already.
8695         // Look for the first set of components that refer to it.
8696         if (It != Info.end()) {
8697           bool Found = false;
8698           for (auto &Data : It->second) {
8699             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8700               return MI.Components.back().getAssociatedDeclaration() == VD;
8701             });
8702             // If we found a map entry, signal that the pointer has to be
8703             // returned and move on to the next declaration. Exclude cases where
8704             // the base pointer is mapped as array subscript, array section or
8705             // array shaping. The base address is passed as a pointer to base in
8706             // this case and cannot be used as a base for use_device_ptr list
8707             // item.
8708             if (CI != Data.end()) {
8709               auto PrevCI = std::next(CI->Components.rbegin());
8710               const auto *VarD = dyn_cast<VarDecl>(VD);
8711               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8712                   isa<MemberExpr>(IE) ||
8713                   !VD->getType().getNonReferenceType()->isPointerType() ||
8714                   PrevCI == CI->Components.rend() ||
8715                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8716                   VarD->hasLocalStorage()) {
8717                 CI->ReturnDevicePointer = true;
8718                 Found = true;
8719                 break;
8720               }
8721             }
8722           }
8723           if (Found)
8724             continue;
8725         }
8726 
8727         // We didn't find any match in our map information - generate a zero
8728         // size array section - if the pointer is a struct member we defer this
8729         // action until the whole struct has been processed.
8730         if (isa<MemberExpr>(IE)) {
8731           // Insert the pointer into Info to be processed by
8732           // generateInfoForComponentList. Because it is a member pointer
8733           // without a pointee, no entry will be generated for it, therefore
8734           // we need to generate one after the whole struct has been processed.
8735           // Nonetheless, generateInfoForComponentList must be called to take
8736           // the pointer into account for the calculation of the range of the
8737           // partial struct.
8738           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8739                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8740                   nullptr);
8741           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8742         } else {
8743           llvm::Value *Ptr =
8744               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8745           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8746           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8747           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8748           UseDevicePtrCombinedInfo.Sizes.push_back(
8749               llvm::Constant::getNullValue(CGF.Int64Ty));
8750           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8751           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8752         }
8753       }
8754     }
8755 
8756     // Look at the use_device_addr clause information and mark the existing map
8757     // entries as such. If there is no map information for an entry in the
8758     // use_device_addr list, we create one with map type 'alloc' and zero size
8759     // section. It is the user fault if that was not mapped before. If there is
8760     // no map information and the pointer is a struct member, then we defer the
8761     // emission of that entry until the whole struct has been processed.
8762     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8763     for (const auto *Cl : Clauses) {
8764       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8765       if (!C)
8766         continue;
8767       for (const auto L : C->component_lists()) {
8768         assert(!std::get<1>(L).empty() &&
8769                "Not expecting empty list of components!");
8770         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8771         if (!Processed.insert(VD).second)
8772           continue;
8773         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8774         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8775         // If the first component is a member expression, we have to look into
8776         // 'this', which maps to null in the map of map information. Otherwise
8777         // look directly for the information.
8778         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8779 
8780         // We potentially have map information for this declaration already.
8781         // Look for the first set of components that refer to it.
8782         if (It != Info.end()) {
8783           bool Found = false;
8784           for (auto &Data : It->second) {
8785             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8786               return MI.Components.back().getAssociatedDeclaration() == VD;
8787             });
8788             // If we found a map entry, signal that the pointer has to be
8789             // returned and move on to the next declaration.
8790             if (CI != Data.end()) {
8791               CI->ReturnDevicePointer = true;
8792               Found = true;
8793               break;
8794             }
8795           }
8796           if (Found)
8797             continue;
8798         }
8799 
8800         // We didn't find any match in our map information - generate a zero
8801         // size array section - if the pointer is a struct member we defer this
8802         // action until the whole struct has been processed.
8803         if (isa<MemberExpr>(IE)) {
8804           // Insert the pointer into Info to be processed by
8805           // generateInfoForComponentList. Because it is a member pointer
8806           // without a pointee, no entry will be generated for it, therefore
8807           // we need to generate one after the whole struct has been processed.
8808           // Nonetheless, generateInfoForComponentList must be called to take
8809           // the pointer into account for the calculation of the range of the
8810           // partial struct.
8811           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8812                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8813                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8814           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8815         } else {
8816           llvm::Value *Ptr;
8817           if (IE->isGLValue())
8818             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8819           else
8820             Ptr = CGF.EmitScalarExpr(IE);
8821           CombinedInfo.Exprs.push_back(VD);
8822           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8823           CombinedInfo.Pointers.push_back(Ptr);
8824           CombinedInfo.Sizes.push_back(
8825               llvm::Constant::getNullValue(CGF.Int64Ty));
8826           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8827           CombinedInfo.Mappers.push_back(nullptr);
8828         }
8829       }
8830     }
8831 
8832     for (const auto &Data : Info) {
8833       StructRangeInfoTy PartialStruct;
8834       // Temporary generated information.
8835       MapCombinedInfoTy CurInfo;
8836       const Decl *D = Data.first;
8837       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8838       for (const auto &M : Data.second) {
8839         for (const MapInfo &L : M) {
8840           assert(!L.Components.empty() &&
8841                  "Not expecting declaration with no component lists.");
8842 
8843           // Remember the current base pointer index.
8844           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8845           CurInfo.NonContigInfo.IsNonContiguous =
8846               L.Components.back().isNonContiguous();
8847           generateInfoForComponentList(
8848               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8849               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8850               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8851 
8852           // If this entry relates with a device pointer, set the relevant
8853           // declaration and add the 'return pointer' flag.
8854           if (L.ReturnDevicePointer) {
8855             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8856                    "Unexpected number of mapped base pointers.");
8857 
8858             const ValueDecl *RelevantVD =
8859                 L.Components.back().getAssociatedDeclaration();
8860             assert(RelevantVD &&
8861                    "No relevant declaration related with device pointer??");
8862 
8863             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8864                 RelevantVD);
8865             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8866           }
8867         }
8868       }
8869 
8870       // Append any pending zero-length pointers which are struct members and
8871       // used with use_device_ptr or use_device_addr.
8872       auto CI = DeferredInfo.find(Data.first);
8873       if (CI != DeferredInfo.end()) {
8874         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8875           llvm::Value *BasePtr;
8876           llvm::Value *Ptr;
8877           if (L.ForDeviceAddr) {
8878             if (L.IE->isGLValue())
8879               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8880             else
8881               Ptr = this->CGF.EmitScalarExpr(L.IE);
8882             BasePtr = Ptr;
8883             // Entry is RETURN_PARAM. Also, set the placeholder value
8884             // MEMBER_OF=FFFF so that the entry is later updated with the
8885             // correct value of MEMBER_OF.
8886             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8887           } else {
8888             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8889             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8890                                              L.IE->getExprLoc());
8891             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8892             // placeholder value MEMBER_OF=FFFF so that the entry is later
8893             // updated with the correct value of MEMBER_OF.
8894             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8895                                     OMP_MAP_MEMBER_OF);
8896           }
8897           CurInfo.Exprs.push_back(L.VD);
8898           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8899           CurInfo.Pointers.push_back(Ptr);
8900           CurInfo.Sizes.push_back(
8901               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8902           CurInfo.Mappers.push_back(nullptr);
8903         }
8904       }
8905       // If there is an entry in PartialStruct it means we have a struct with
8906       // individual members mapped. Emit an extra combined entry.
8907       if (PartialStruct.Base.isValid()) {
8908         CurInfo.NonContigInfo.Dims.push_back(0);
8909         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8910       }
8911 
8912       // We need to append the results of this capture to what we already
8913       // have.
8914       CombinedInfo.append(CurInfo);
8915     }
8916     // Append data for use_device_ptr clauses.
8917     CombinedInfo.append(UseDevicePtrCombinedInfo);
8918   }
8919 
8920 public:
8921   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8922       : CurDir(&Dir), CGF(CGF) {
8923     // Extract firstprivate clause information.
8924     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8925       for (const auto *D : C->varlists())
8926         FirstPrivateDecls.try_emplace(
8927             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8928     // Extract implicit firstprivates from uses_allocators clauses.
8929     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8930       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8931         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8932         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8933           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8934                                         /*Implicit=*/true);
8935         else if (const auto *VD = dyn_cast<VarDecl>(
8936                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8937                          ->getDecl()))
8938           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8939       }
8940     }
8941     // Extract device pointer clause information.
8942     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8943       for (auto L : C->component_lists())
8944         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8945     // Extract map information.
8946     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8947       if (C->getMapType() != OMPC_MAP_to)
8948         continue;
8949       for (auto L : C->component_lists()) {
8950         const ValueDecl *VD = std::get<0>(L);
8951         const auto *RD = VD ? VD->getType()
8952                                   .getCanonicalType()
8953                                   .getNonReferenceType()
8954                                   ->getAsCXXRecordDecl()
8955                             : nullptr;
8956         if (RD && RD->isLambda())
8957           LambdasMap.try_emplace(std::get<0>(L), C);
8958       }
8959     }
8960   }
8961 
8962   /// Constructor for the declare mapper directive.
8963   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8964       : CurDir(&Dir), CGF(CGF) {}
8965 
8966   /// Generate code for the combined entry if we have a partially mapped struct
8967   /// and take care of the mapping flags of the arguments corresponding to
8968   /// individual struct members.
8969   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8970                          MapFlagsArrayTy &CurTypes,
8971                          const StructRangeInfoTy &PartialStruct,
8972                          const ValueDecl *VD = nullptr,
8973                          bool NotTargetParams = true) const {
8974     if (CurTypes.size() == 1 &&
8975         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8976         !PartialStruct.IsArraySection)
8977       return;
8978     Address LBAddr = PartialStruct.LowestElem.second;
8979     Address HBAddr = PartialStruct.HighestElem.second;
8980     if (PartialStruct.HasCompleteRecord) {
8981       LBAddr = PartialStruct.LB;
8982       HBAddr = PartialStruct.LB;
8983     }
8984     CombinedInfo.Exprs.push_back(VD);
8985     // Base is the base of the struct
8986     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8987     // Pointer is the address of the lowest element
8988     llvm::Value *LB = LBAddr.getPointer();
8989     CombinedInfo.Pointers.push_back(LB);
8990     // There should not be a mapper for a combined entry.
8991     CombinedInfo.Mappers.push_back(nullptr);
8992     // Size is (addr of {highest+1} element) - (addr of lowest element)
8993     llvm::Value *HB = HBAddr.getPointer();
8994     llvm::Value *HAddr =
8995         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8996     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8997     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8998     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8999     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9000                                                   /*isSigned=*/false);
9001     CombinedInfo.Sizes.push_back(Size);
9002     // Map type is always TARGET_PARAM, if generate info for captures.
9003     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
9004                                                  : OMP_MAP_TARGET_PARAM);
9005     // If any element has the present modifier, then make sure the runtime
9006     // doesn't attempt to allocate the struct.
9007     if (CurTypes.end() !=
9008         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9009           return Type & OMP_MAP_PRESENT;
9010         }))
9011       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
9012     // Remove TARGET_PARAM flag from the first element
9013     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
9014     // If any element has the ompx_hold modifier, then make sure the runtime
9015     // uses the hold reference count for the struct as a whole so that it won't
9016     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9017     // elements as well so the runtime knows which reference count to check
9018     // when determining whether it's time for device-to-host transfers of
9019     // individual elements.
9020     if (CurTypes.end() !=
9021         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9022           return Type & OMP_MAP_OMPX_HOLD;
9023         })) {
9024       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9025       for (auto &M : CurTypes)
9026         M |= OMP_MAP_OMPX_HOLD;
9027     }
9028 
9029     // All other current entries will be MEMBER_OF the combined entry
9030     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9031     // 0xFFFF in the MEMBER_OF field).
9032     OpenMPOffloadMappingFlags MemberOfFlag =
9033         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9034     for (auto &M : CurTypes)
9035       setCorrectMemberOfFlag(M, MemberOfFlag);
9036   }
9037 
9038   /// Generate all the base pointers, section pointers, sizes, map types, and
9039   /// mappers for the extracted mappable expressions (all included in \a
9040   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9041   /// pair of the relevant declaration and index where it occurs is appended to
9042   /// the device pointers info array.
9043   void generateAllInfo(
9044       MapCombinedInfoTy &CombinedInfo,
9045       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9046           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9047     assert(CurDir.is<const OMPExecutableDirective *>() &&
9048            "Expect a executable directive");
9049     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9050     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9051   }
9052 
9053   /// Generate all the base pointers, section pointers, sizes, map types, and
9054   /// mappers for the extracted map clauses of user-defined mapper (all included
9055   /// in \a CombinedInfo).
9056   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9057     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9058            "Expect a declare mapper directive");
9059     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9060     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9061   }
9062 
9063   /// Emit capture info for lambdas for variables captured by reference.
9064   void generateInfoForLambdaCaptures(
9065       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9066       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9067     const auto *RD = VD->getType()
9068                          .getCanonicalType()
9069                          .getNonReferenceType()
9070                          ->getAsCXXRecordDecl();
9071     if (!RD || !RD->isLambda())
9072       return;
9073     Address VDAddr =
9074         Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD));
9075     LValue VDLVal = CGF.MakeAddrLValue(
9076         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9077     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9078     FieldDecl *ThisCapture = nullptr;
9079     RD->getCaptureFields(Captures, ThisCapture);
9080     if (ThisCapture) {
9081       LValue ThisLVal =
9082           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9083       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9084       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9085                                  VDLVal.getPointer(CGF));
9086       CombinedInfo.Exprs.push_back(VD);
9087       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9088       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9089       CombinedInfo.Sizes.push_back(
9090           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9091                                     CGF.Int64Ty, /*isSigned=*/true));
9092       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9093                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9094       CombinedInfo.Mappers.push_back(nullptr);
9095     }
9096     for (const LambdaCapture &LC : RD->captures()) {
9097       if (!LC.capturesVariable())
9098         continue;
9099       const VarDecl *VD = LC.getCapturedVar();
9100       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9101         continue;
9102       auto It = Captures.find(VD);
9103       assert(It != Captures.end() && "Found lambda capture without field.");
9104       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9105       if (LC.getCaptureKind() == LCK_ByRef) {
9106         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9107         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9108                                    VDLVal.getPointer(CGF));
9109         CombinedInfo.Exprs.push_back(VD);
9110         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9111         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9112         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9113             CGF.getTypeSize(
9114                 VD->getType().getCanonicalType().getNonReferenceType()),
9115             CGF.Int64Ty, /*isSigned=*/true));
9116       } else {
9117         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9118         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9119                                    VDLVal.getPointer(CGF));
9120         CombinedInfo.Exprs.push_back(VD);
9121         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9122         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9123         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9124       }
9125       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9126                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9127       CombinedInfo.Mappers.push_back(nullptr);
9128     }
9129   }
9130 
9131   /// Set correct indices for lambdas captures.
9132   void adjustMemberOfForLambdaCaptures(
9133       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9134       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9135       MapFlagsArrayTy &Types) const {
9136     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9137       // Set correct member_of idx for all implicit lambda captures.
9138       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9139                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9140         continue;
9141       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9142       assert(BasePtr && "Unable to find base lambda address.");
9143       int TgtIdx = -1;
9144       for (unsigned J = I; J > 0; --J) {
9145         unsigned Idx = J - 1;
9146         if (Pointers[Idx] != BasePtr)
9147           continue;
9148         TgtIdx = Idx;
9149         break;
9150       }
9151       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9152       // All other current entries will be MEMBER_OF the combined entry
9153       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9154       // 0xFFFF in the MEMBER_OF field).
9155       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9156       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9157     }
9158   }
9159 
9160   /// Generate the base pointers, section pointers, sizes, map types, and
9161   /// mappers associated to a given capture (all included in \a CombinedInfo).
9162   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9163                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9164                               StructRangeInfoTy &PartialStruct) const {
9165     assert(!Cap->capturesVariableArrayType() &&
9166            "Not expecting to generate map info for a variable array type!");
9167 
9168     // We need to know when we generating information for the first component
9169     const ValueDecl *VD = Cap->capturesThis()
9170                               ? nullptr
9171                               : Cap->getCapturedVar()->getCanonicalDecl();
9172 
9173     // for map(to: lambda): skip here, processing it in
9174     // generateDefaultMapInfo
9175     if (LambdasMap.count(VD))
9176       return;
9177 
9178     // If this declaration appears in a is_device_ptr clause we just have to
9179     // pass the pointer by value. If it is a reference to a declaration, we just
9180     // pass its value.
9181     if (DevPointersMap.count(VD)) {
9182       CombinedInfo.Exprs.push_back(VD);
9183       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9184       CombinedInfo.Pointers.push_back(Arg);
9185       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9186           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9187           /*isSigned=*/true));
9188       CombinedInfo.Types.push_back(
9189           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9190           OMP_MAP_TARGET_PARAM);
9191       CombinedInfo.Mappers.push_back(nullptr);
9192       return;
9193     }
9194 
9195     using MapData =
9196         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9197                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9198                    const ValueDecl *, const Expr *>;
9199     SmallVector<MapData, 4> DeclComponentLists;
9200     assert(CurDir.is<const OMPExecutableDirective *>() &&
9201            "Expect a executable directive");
9202     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9203     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9204       const auto *EI = C->getVarRefs().begin();
9205       for (const auto L : C->decl_component_lists(VD)) {
9206         const ValueDecl *VDecl, *Mapper;
9207         // The Expression is not correct if the mapping is implicit
9208         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9209         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9210         std::tie(VDecl, Components, Mapper) = L;
9211         assert(VDecl == VD && "We got information for the wrong declaration??");
9212         assert(!Components.empty() &&
9213                "Not expecting declaration with no component lists.");
9214         DeclComponentLists.emplace_back(Components, C->getMapType(),
9215                                         C->getMapTypeModifiers(),
9216                                         C->isImplicit(), Mapper, E);
9217         ++EI;
9218       }
9219     }
9220     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9221                                              const MapData &RHS) {
9222       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9223       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9224       bool HasPresent =
9225           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9226       bool HasAllocs = MapType == OMPC_MAP_alloc;
9227       MapModifiers = std::get<2>(RHS);
9228       MapType = std::get<1>(LHS);
9229       bool HasPresentR =
9230           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9231       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9232       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9233     });
9234 
9235     // Find overlapping elements (including the offset from the base element).
9236     llvm::SmallDenseMap<
9237         const MapData *,
9238         llvm::SmallVector<
9239             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9240         4>
9241         OverlappedData;
9242     size_t Count = 0;
9243     for (const MapData &L : DeclComponentLists) {
9244       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9245       OpenMPMapClauseKind MapType;
9246       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9247       bool IsImplicit;
9248       const ValueDecl *Mapper;
9249       const Expr *VarRef;
9250       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9251           L;
9252       ++Count;
9253       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9254         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9255         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9256                  VarRef) = L1;
9257         auto CI = Components.rbegin();
9258         auto CE = Components.rend();
9259         auto SI = Components1.rbegin();
9260         auto SE = Components1.rend();
9261         for (; CI != CE && SI != SE; ++CI, ++SI) {
9262           if (CI->getAssociatedExpression()->getStmtClass() !=
9263               SI->getAssociatedExpression()->getStmtClass())
9264             break;
9265           // Are we dealing with different variables/fields?
9266           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9267             break;
9268         }
9269         // Found overlapping if, at least for one component, reached the head
9270         // of the components list.
9271         if (CI == CE || SI == SE) {
9272           // Ignore it if it is the same component.
9273           if (CI == CE && SI == SE)
9274             continue;
9275           const auto It = (SI == SE) ? CI : SI;
9276           // If one component is a pointer and another one is a kind of
9277           // dereference of this pointer (array subscript, section, dereference,
9278           // etc.), it is not an overlapping.
9279           // Same, if one component is a base and another component is a
9280           // dereferenced pointer memberexpr with the same base.
9281           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9282               (std::prev(It)->getAssociatedDeclaration() &&
9283                std::prev(It)
9284                    ->getAssociatedDeclaration()
9285                    ->getType()
9286                    ->isPointerType()) ||
9287               (It->getAssociatedDeclaration() &&
9288                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9289                std::next(It) != CE && std::next(It) != SE))
9290             continue;
9291           const MapData &BaseData = CI == CE ? L : L1;
9292           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9293               SI == SE ? Components : Components1;
9294           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9295           OverlappedElements.getSecond().push_back(SubData);
9296         }
9297       }
9298     }
9299     // Sort the overlapped elements for each item.
9300     llvm::SmallVector<const FieldDecl *, 4> Layout;
9301     if (!OverlappedData.empty()) {
9302       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9303       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9304       while (BaseType != OrigType) {
9305         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9306         OrigType = BaseType->getPointeeOrArrayElementType();
9307       }
9308 
9309       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9310         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9311       else {
9312         const auto *RD = BaseType->getAsRecordDecl();
9313         Layout.append(RD->field_begin(), RD->field_end());
9314       }
9315     }
9316     for (auto &Pair : OverlappedData) {
9317       llvm::stable_sort(
9318           Pair.getSecond(),
9319           [&Layout](
9320               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9321               OMPClauseMappableExprCommon::MappableExprComponentListRef
9322                   Second) {
9323             auto CI = First.rbegin();
9324             auto CE = First.rend();
9325             auto SI = Second.rbegin();
9326             auto SE = Second.rend();
9327             for (; CI != CE && SI != SE; ++CI, ++SI) {
9328               if (CI->getAssociatedExpression()->getStmtClass() !=
9329                   SI->getAssociatedExpression()->getStmtClass())
9330                 break;
9331               // Are we dealing with different variables/fields?
9332               if (CI->getAssociatedDeclaration() !=
9333                   SI->getAssociatedDeclaration())
9334                 break;
9335             }
9336 
9337             // Lists contain the same elements.
9338             if (CI == CE && SI == SE)
9339               return false;
9340 
9341             // List with less elements is less than list with more elements.
9342             if (CI == CE || SI == SE)
9343               return CI == CE;
9344 
9345             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9346             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9347             if (FD1->getParent() == FD2->getParent())
9348               return FD1->getFieldIndex() < FD2->getFieldIndex();
9349             const auto *It =
9350                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9351                   return FD == FD1 || FD == FD2;
9352                 });
9353             return *It == FD1;
9354           });
9355     }
9356 
9357     // Associated with a capture, because the mapping flags depend on it.
9358     // Go through all of the elements with the overlapped elements.
9359     bool IsFirstComponentList = true;
9360     for (const auto &Pair : OverlappedData) {
9361       const MapData &L = *Pair.getFirst();
9362       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9363       OpenMPMapClauseKind MapType;
9364       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9365       bool IsImplicit;
9366       const ValueDecl *Mapper;
9367       const Expr *VarRef;
9368       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9369           L;
9370       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9371           OverlappedComponents = Pair.getSecond();
9372       generateInfoForComponentList(
9373           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9374           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9375           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9376       IsFirstComponentList = false;
9377     }
9378     // Go through other elements without overlapped elements.
9379     for (const MapData &L : DeclComponentLists) {
9380       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9381       OpenMPMapClauseKind MapType;
9382       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9383       bool IsImplicit;
9384       const ValueDecl *Mapper;
9385       const Expr *VarRef;
9386       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9387           L;
9388       auto It = OverlappedData.find(&L);
9389       if (It == OverlappedData.end())
9390         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9391                                      Components, CombinedInfo, PartialStruct,
9392                                      IsFirstComponentList, IsImplicit, Mapper,
9393                                      /*ForDeviceAddr=*/false, VD, VarRef);
9394       IsFirstComponentList = false;
9395     }
9396   }
9397 
9398   /// Generate the default map information for a given capture \a CI,
9399   /// record field declaration \a RI and captured value \a CV.
9400   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9401                               const FieldDecl &RI, llvm::Value *CV,
9402                               MapCombinedInfoTy &CombinedInfo) const {
9403     bool IsImplicit = true;
9404     // Do the default mapping.
9405     if (CI.capturesThis()) {
9406       CombinedInfo.Exprs.push_back(nullptr);
9407       CombinedInfo.BasePointers.push_back(CV);
9408       CombinedInfo.Pointers.push_back(CV);
9409       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9410       CombinedInfo.Sizes.push_back(
9411           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9412                                     CGF.Int64Ty, /*isSigned=*/true));
9413       // Default map type.
9414       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9415     } else if (CI.capturesVariableByCopy()) {
9416       const VarDecl *VD = CI.getCapturedVar();
9417       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9418       CombinedInfo.BasePointers.push_back(CV);
9419       CombinedInfo.Pointers.push_back(CV);
9420       if (!RI.getType()->isAnyPointerType()) {
9421         // We have to signal to the runtime captures passed by value that are
9422         // not pointers.
9423         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9424         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9425             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9426       } else {
9427         // Pointers are implicitly mapped with a zero size and no flags
9428         // (other than first map that is added for all implicit maps).
9429         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9430         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9431       }
9432       auto I = FirstPrivateDecls.find(VD);
9433       if (I != FirstPrivateDecls.end())
9434         IsImplicit = I->getSecond();
9435     } else {
9436       assert(CI.capturesVariable() && "Expected captured reference.");
9437       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9438       QualType ElementType = PtrTy->getPointeeType();
9439       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9440           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9441       // The default map type for a scalar/complex type is 'to' because by
9442       // default the value doesn't have to be retrieved. For an aggregate
9443       // type, the default is 'tofrom'.
9444       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9445       const VarDecl *VD = CI.getCapturedVar();
9446       auto I = FirstPrivateDecls.find(VD);
9447       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9448       CombinedInfo.BasePointers.push_back(CV);
9449       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9450         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9451             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9452             AlignmentSource::Decl));
9453         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9454       } else {
9455         CombinedInfo.Pointers.push_back(CV);
9456       }
9457       if (I != FirstPrivateDecls.end())
9458         IsImplicit = I->getSecond();
9459     }
9460     // Every default map produces a single argument which is a target parameter.
9461     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9462 
9463     // Add flag stating this is an implicit map.
9464     if (IsImplicit)
9465       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9466 
9467     // No user-defined mapper for default mapping.
9468     CombinedInfo.Mappers.push_back(nullptr);
9469   }
9470 };
9471 } // anonymous namespace
9472 
9473 static void emitNonContiguousDescriptor(
9474     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9475     CGOpenMPRuntime::TargetDataInfo &Info) {
9476   CodeGenModule &CGM = CGF.CGM;
9477   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9478       &NonContigInfo = CombinedInfo.NonContigInfo;
9479 
9480   // Build an array of struct descriptor_dim and then assign it to
9481   // offload_args.
9482   //
9483   // struct descriptor_dim {
9484   //  uint64_t offset;
9485   //  uint64_t count;
9486   //  uint64_t stride
9487   // };
9488   ASTContext &C = CGF.getContext();
9489   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9490   RecordDecl *RD;
9491   RD = C.buildImplicitRecord("descriptor_dim");
9492   RD->startDefinition();
9493   addFieldToRecordDecl(C, RD, Int64Ty);
9494   addFieldToRecordDecl(C, RD, Int64Ty);
9495   addFieldToRecordDecl(C, RD, Int64Ty);
9496   RD->completeDefinition();
9497   QualType DimTy = C.getRecordType(RD);
9498 
9499   enum { OffsetFD = 0, CountFD, StrideFD };
9500   // We need two index variable here since the size of "Dims" is the same as the
9501   // size of Components, however, the size of offset, count, and stride is equal
9502   // to the size of base declaration that is non-contiguous.
9503   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9504     // Skip emitting ir if dimension size is 1 since it cannot be
9505     // non-contiguous.
9506     if (NonContigInfo.Dims[I] == 1)
9507       continue;
9508     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9509     QualType ArrayTy =
9510         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9511     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9512     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9513       unsigned RevIdx = EE - II - 1;
9514       LValue DimsLVal = CGF.MakeAddrLValue(
9515           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9516       // Offset
9517       LValue OffsetLVal = CGF.EmitLValueForField(
9518           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9519       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9520       // Count
9521       LValue CountLVal = CGF.EmitLValueForField(
9522           DimsLVal, *std::next(RD->field_begin(), CountFD));
9523       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9524       // Stride
9525       LValue StrideLVal = CGF.EmitLValueForField(
9526           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9527       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9528     }
9529     // args[I] = &dims
9530     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9531         DimsAddr, CGM.Int8PtrTy);
9532     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9533         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9534         Info.PointersArray, 0, I);
9535     Address PAddr = Address::deprecated(P, CGF.getPointerAlign());
9536     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9537     ++L;
9538   }
9539 }
9540 
9541 // Try to extract the base declaration from a `this->x` expression if possible.
9542 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9543   if (!E)
9544     return nullptr;
9545 
9546   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9547     if (const MemberExpr *ME =
9548             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9549       return ME->getMemberDecl();
9550   return nullptr;
9551 }
9552 
9553 /// Emit a string constant containing the names of the values mapped to the
9554 /// offloading runtime library.
9555 llvm::Constant *
9556 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9557                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9558 
9559   uint32_t SrcLocStrSize;
9560   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9561     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9562 
9563   SourceLocation Loc;
9564   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9565     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9566       Loc = VD->getLocation();
9567     else
9568       Loc = MapExprs.getMapExpr()->getExprLoc();
9569   } else {
9570     Loc = MapExprs.getMapDecl()->getLocation();
9571   }
9572 
9573   std::string ExprName;
9574   if (MapExprs.getMapExpr()) {
9575     PrintingPolicy P(CGF.getContext().getLangOpts());
9576     llvm::raw_string_ostream OS(ExprName);
9577     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9578     OS.flush();
9579   } else {
9580     ExprName = MapExprs.getMapDecl()->getNameAsString();
9581   }
9582 
9583   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9584   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9585                                          PLoc.getLine(), PLoc.getColumn(),
9586                                          SrcLocStrSize);
9587 }
9588 
9589 /// Emit the arrays used to pass the captures and map information to the
9590 /// offloading runtime library. If there is no map or capture information,
9591 /// return nullptr by reference.
9592 static void emitOffloadingArrays(
9593     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9594     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9595     bool IsNonContiguous = false) {
9596   CodeGenModule &CGM = CGF.CGM;
9597   ASTContext &Ctx = CGF.getContext();
9598 
9599   // Reset the array information.
9600   Info.clearArrayInfo();
9601   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9602 
9603   if (Info.NumberOfPtrs) {
9604     // Detect if we have any capture size requiring runtime evaluation of the
9605     // size so that a constant array could be eventually used.
9606 
9607     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9608     QualType PointerArrayType = Ctx.getConstantArrayType(
9609         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9610         /*IndexTypeQuals=*/0);
9611 
9612     Info.BasePointersArray =
9613         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9614     Info.PointersArray =
9615         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9616     Address MappersArray =
9617         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9618     Info.MappersArray = MappersArray.getPointer();
9619 
9620     // If we don't have any VLA types or other types that require runtime
9621     // evaluation, we can use a constant array for the map sizes, otherwise we
9622     // need to fill up the arrays as we do for the pointers.
9623     QualType Int64Ty =
9624         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9625     SmallVector<llvm::Constant *> ConstSizes(
9626         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9627     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9628     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9629       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9630         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9631           if (IsNonContiguous && (CombinedInfo.Types[I] &
9632                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9633             ConstSizes[I] = llvm::ConstantInt::get(
9634                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9635           else
9636             ConstSizes[I] = CI;
9637           continue;
9638         }
9639       }
9640       RuntimeSizes.set(I);
9641     }
9642 
9643     if (RuntimeSizes.all()) {
9644       QualType SizeArrayType = Ctx.getConstantArrayType(
9645           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9646           /*IndexTypeQuals=*/0);
9647       Info.SizesArray =
9648           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9649     } else {
9650       auto *SizesArrayInit = llvm::ConstantArray::get(
9651           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9652       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9653       auto *SizesArrayGbl = new llvm::GlobalVariable(
9654           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9655           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9656       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9657       if (RuntimeSizes.any()) {
9658         QualType SizeArrayType = Ctx.getConstantArrayType(
9659             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9660             /*IndexTypeQuals=*/0);
9661         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9662         llvm::Value *GblConstPtr =
9663             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9664                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9665         CGF.Builder.CreateMemCpy(
9666             Buffer,
9667             Address(GblConstPtr, CGM.Int64Ty,
9668                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9669                         /*DestWidth=*/64, /*Signed=*/false))),
9670             CGF.getTypeSize(SizeArrayType));
9671         Info.SizesArray = Buffer.getPointer();
9672       } else {
9673         Info.SizesArray = SizesArrayGbl;
9674       }
9675     }
9676 
9677     // The map types are always constant so we don't need to generate code to
9678     // fill arrays. Instead, we create an array constant.
9679     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9680     llvm::copy(CombinedInfo.Types, Mapping.begin());
9681     std::string MaptypesName =
9682         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9683     auto *MapTypesArrayGbl =
9684         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9685     Info.MapTypesArray = MapTypesArrayGbl;
9686 
9687     // The information types are only built if there is debug information
9688     // requested.
9689     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9690       Info.MapNamesArray = llvm::Constant::getNullValue(
9691           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9692     } else {
9693       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9694         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9695       };
9696       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9697       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9698       std::string MapnamesName =
9699           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9700       auto *MapNamesArrayGbl =
9701           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9702       Info.MapNamesArray = MapNamesArrayGbl;
9703     }
9704 
9705     // If there's a present map type modifier, it must not be applied to the end
9706     // of a region, so generate a separate map type array in that case.
9707     if (Info.separateBeginEndCalls()) {
9708       bool EndMapTypesDiffer = false;
9709       for (uint64_t &Type : Mapping) {
9710         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9711           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9712           EndMapTypesDiffer = true;
9713         }
9714       }
9715       if (EndMapTypesDiffer) {
9716         MapTypesArrayGbl =
9717             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9718         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9719       }
9720     }
9721 
9722     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9723       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9724       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9725           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9726           Info.BasePointersArray, 0, I);
9727       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9728           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9729       Address BPAddr =
9730           Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9731       CGF.Builder.CreateStore(BPVal, BPAddr);
9732 
9733       if (Info.requiresDevicePointerInfo())
9734         if (const ValueDecl *DevVD =
9735                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9736           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9737 
9738       llvm::Value *PVal = CombinedInfo.Pointers[I];
9739       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9740           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9741           Info.PointersArray, 0, I);
9742       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9743           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9744       Address PAddr =
9745           Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9746       CGF.Builder.CreateStore(PVal, PAddr);
9747 
9748       if (RuntimeSizes.test(I)) {
9749         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9750             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9751             Info.SizesArray,
9752             /*Idx0=*/0,
9753             /*Idx1=*/I);
9754         Address SAddr =
9755             Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty));
9756         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9757                                                           CGM.Int64Ty,
9758                                                           /*isSigned=*/true),
9759                                 SAddr);
9760       }
9761 
9762       // Fill up the mapper array.
9763       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9764       if (CombinedInfo.Mappers[I]) {
9765         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9766             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9767         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9768         Info.HasMapper = true;
9769       }
9770       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9771       CGF.Builder.CreateStore(MFunc, MAddr);
9772     }
9773   }
9774 
9775   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9776       Info.NumberOfPtrs == 0)
9777     return;
9778 
9779   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9780 }
9781 
9782 namespace {
9783 /// Additional arguments for emitOffloadingArraysArgument function.
9784 struct ArgumentsOptions {
9785   bool ForEndCall = false;
9786   ArgumentsOptions() = default;
9787   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9788 };
9789 } // namespace
9790 
9791 /// Emit the arguments to be passed to the runtime library based on the
9792 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9793 /// ForEndCall, emit map types to be passed for the end of the region instead of
9794 /// the beginning.
9795 static void emitOffloadingArraysArgument(
9796     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9797     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9798     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9799     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9800     const ArgumentsOptions &Options = ArgumentsOptions()) {
9801   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9802          "expected region end call to runtime only when end call is separate");
9803   CodeGenModule &CGM = CGF.CGM;
9804   if (Info.NumberOfPtrs) {
9805     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9806         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9807         Info.BasePointersArray,
9808         /*Idx0=*/0, /*Idx1=*/0);
9809     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9810         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9811         Info.PointersArray,
9812         /*Idx0=*/0,
9813         /*Idx1=*/0);
9814     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9815         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9816         /*Idx0=*/0, /*Idx1=*/0);
9817     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9818         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9819         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9820                                                     : Info.MapTypesArray,
9821         /*Idx0=*/0,
9822         /*Idx1=*/0);
9823 
9824     // Only emit the mapper information arrays if debug information is
9825     // requested.
9826     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9827       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9828     else
9829       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9830           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9831           Info.MapNamesArray,
9832           /*Idx0=*/0,
9833           /*Idx1=*/0);
9834     // If there is no user-defined mapper, set the mapper array to nullptr to
9835     // avoid an unnecessary data privatization
9836     if (!Info.HasMapper)
9837       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9838     else
9839       MappersArrayArg =
9840           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9841   } else {
9842     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9843     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9844     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9845     MapTypesArrayArg =
9846         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9847     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9848     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9849   }
9850 }
9851 
9852 /// Check for inner distribute directive.
9853 static const OMPExecutableDirective *
9854 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9855   const auto *CS = D.getInnermostCapturedStmt();
9856   const auto *Body =
9857       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9858   const Stmt *ChildStmt =
9859       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9860 
9861   if (const auto *NestedDir =
9862           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9863     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9864     switch (D.getDirectiveKind()) {
9865     case OMPD_target:
9866       if (isOpenMPDistributeDirective(DKind))
9867         return NestedDir;
9868       if (DKind == OMPD_teams) {
9869         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9870             /*IgnoreCaptured=*/true);
9871         if (!Body)
9872           return nullptr;
9873         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9874         if (const auto *NND =
9875                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9876           DKind = NND->getDirectiveKind();
9877           if (isOpenMPDistributeDirective(DKind))
9878             return NND;
9879         }
9880       }
9881       return nullptr;
9882     case OMPD_target_teams:
9883       if (isOpenMPDistributeDirective(DKind))
9884         return NestedDir;
9885       return nullptr;
9886     case OMPD_target_parallel:
9887     case OMPD_target_simd:
9888     case OMPD_target_parallel_for:
9889     case OMPD_target_parallel_for_simd:
9890       return nullptr;
9891     case OMPD_target_teams_distribute:
9892     case OMPD_target_teams_distribute_simd:
9893     case OMPD_target_teams_distribute_parallel_for:
9894     case OMPD_target_teams_distribute_parallel_for_simd:
9895     case OMPD_parallel:
9896     case OMPD_for:
9897     case OMPD_parallel_for:
9898     case OMPD_parallel_master:
9899     case OMPD_parallel_sections:
9900     case OMPD_for_simd:
9901     case OMPD_parallel_for_simd:
9902     case OMPD_cancel:
9903     case OMPD_cancellation_point:
9904     case OMPD_ordered:
9905     case OMPD_threadprivate:
9906     case OMPD_allocate:
9907     case OMPD_task:
9908     case OMPD_simd:
9909     case OMPD_tile:
9910     case OMPD_unroll:
9911     case OMPD_sections:
9912     case OMPD_section:
9913     case OMPD_single:
9914     case OMPD_master:
9915     case OMPD_critical:
9916     case OMPD_taskyield:
9917     case OMPD_barrier:
9918     case OMPD_taskwait:
9919     case OMPD_taskgroup:
9920     case OMPD_atomic:
9921     case OMPD_flush:
9922     case OMPD_depobj:
9923     case OMPD_scan:
9924     case OMPD_teams:
9925     case OMPD_target_data:
9926     case OMPD_target_exit_data:
9927     case OMPD_target_enter_data:
9928     case OMPD_distribute:
9929     case OMPD_distribute_simd:
9930     case OMPD_distribute_parallel_for:
9931     case OMPD_distribute_parallel_for_simd:
9932     case OMPD_teams_distribute:
9933     case OMPD_teams_distribute_simd:
9934     case OMPD_teams_distribute_parallel_for:
9935     case OMPD_teams_distribute_parallel_for_simd:
9936     case OMPD_target_update:
9937     case OMPD_declare_simd:
9938     case OMPD_declare_variant:
9939     case OMPD_begin_declare_variant:
9940     case OMPD_end_declare_variant:
9941     case OMPD_declare_target:
9942     case OMPD_end_declare_target:
9943     case OMPD_declare_reduction:
9944     case OMPD_declare_mapper:
9945     case OMPD_taskloop:
9946     case OMPD_taskloop_simd:
9947     case OMPD_master_taskloop:
9948     case OMPD_master_taskloop_simd:
9949     case OMPD_parallel_master_taskloop:
9950     case OMPD_parallel_master_taskloop_simd:
9951     case OMPD_requires:
9952     case OMPD_metadirective:
9953     case OMPD_unknown:
9954     default:
9955       llvm_unreachable("Unexpected directive.");
9956     }
9957   }
9958 
9959   return nullptr;
9960 }
9961 
9962 /// Emit the user-defined mapper function. The code generation follows the
9963 /// pattern in the example below.
9964 /// \code
9965 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9966 ///                                           void *base, void *begin,
9967 ///                                           int64_t size, int64_t type,
9968 ///                                           void *name = nullptr) {
9969 ///   // Allocate space for an array section first or add a base/begin for
9970 ///   // pointer dereference.
9971 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9972 ///       !maptype.IsDelete)
9973 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9974 ///                                 size*sizeof(Ty), clearToFromMember(type));
9975 ///   // Map members.
9976 ///   for (unsigned i = 0; i < size; i++) {
9977 ///     // For each component specified by this mapper:
9978 ///     for (auto c : begin[i]->all_components) {
9979 ///       if (c.hasMapper())
9980 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9981 ///                       c.arg_type, c.arg_name);
9982 ///       else
9983 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9984 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9985 ///                                     c.arg_name);
9986 ///     }
9987 ///   }
9988 ///   // Delete the array section.
9989 ///   if (size > 1 && maptype.IsDelete)
9990 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9991 ///                                 size*sizeof(Ty), clearToFromMember(type));
9992 /// }
9993 /// \endcode
9994 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9995                                             CodeGenFunction *CGF) {
9996   if (UDMMap.count(D) > 0)
9997     return;
9998   ASTContext &C = CGM.getContext();
9999   QualType Ty = D->getType();
10000   QualType PtrTy = C.getPointerType(Ty).withRestrict();
10001   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10002   auto *MapperVarDecl =
10003       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
10004   SourceLocation Loc = D->getLocation();
10005   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10006 
10007   // Prepare mapper function arguments and attributes.
10008   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
10009                               C.VoidPtrTy, ImplicitParamDecl::Other);
10010   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
10011                             ImplicitParamDecl::Other);
10012   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
10013                              C.VoidPtrTy, ImplicitParamDecl::Other);
10014   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
10015                             ImplicitParamDecl::Other);
10016   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
10017                             ImplicitParamDecl::Other);
10018   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
10019                             ImplicitParamDecl::Other);
10020   FunctionArgList Args;
10021   Args.push_back(&HandleArg);
10022   Args.push_back(&BaseArg);
10023   Args.push_back(&BeginArg);
10024   Args.push_back(&SizeArg);
10025   Args.push_back(&TypeArg);
10026   Args.push_back(&NameArg);
10027   const CGFunctionInfo &FnInfo =
10028       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
10029   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
10030   SmallString<64> TyStr;
10031   llvm::raw_svector_ostream Out(TyStr);
10032   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10033   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10034   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10035                                     Name, &CGM.getModule());
10036   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10037   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10038   // Start the mapper function code generation.
10039   CodeGenFunction MapperCGF(CGM);
10040   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10041   // Compute the starting and end addresses of array elements.
10042   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10043       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10044       C.getPointerType(Int64Ty), Loc);
10045   // Prepare common arguments for array initiation and deletion.
10046   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10047       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10048       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10049   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10050       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10051       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10052   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10053       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10054       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10055   // Convert the size in bytes into the number of array elements.
10056   Size = MapperCGF.Builder.CreateExactUDiv(
10057       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10058   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10059       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10060   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10061       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10062   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10063       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10064       C.getPointerType(Int64Ty), Loc);
10065   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10066       MapperCGF.GetAddrOfLocalVar(&NameArg),
10067       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10068 
10069   // Emit array initiation if this is an array section and \p MapType indicates
10070   // that memory allocation is required.
10071   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10072   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10073                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10074 
10075   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10076 
10077   // Emit the loop header block.
10078   MapperCGF.EmitBlock(HeadBB);
10079   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10080   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10081   // Evaluate whether the initial condition is satisfied.
10082   llvm::Value *IsEmpty =
10083       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10084   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10085   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10086 
10087   // Emit the loop body block.
10088   MapperCGF.EmitBlock(BodyBB);
10089   llvm::BasicBlock *LastBB = BodyBB;
10090   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10091       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10092   PtrPHI->addIncoming(PtrBegin, EntryBB);
10093   Address PtrCurrent =
10094       Address::deprecated(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10095                                       .getAlignment()
10096                                       .alignmentOfArrayElement(ElementSize));
10097   // Privatize the declared variable of mapper to be the current array element.
10098   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10099   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10100   (void)Scope.Privatize();
10101 
10102   // Get map clause information. Fill up the arrays with all mapped variables.
10103   MappableExprsHandler::MapCombinedInfoTy Info;
10104   MappableExprsHandler MEHandler(*D, MapperCGF);
10105   MEHandler.generateAllInfoForMapper(Info);
10106 
10107   // Call the runtime API __tgt_mapper_num_components to get the number of
10108   // pre-existing components.
10109   llvm::Value *OffloadingArgs[] = {Handle};
10110   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10111       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10112                                             OMPRTL___tgt_mapper_num_components),
10113       OffloadingArgs);
10114   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10115       PreviousSize,
10116       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10117 
10118   // Fill up the runtime mapper handle for all components.
10119   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10120     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10121         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10122     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10123         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10124     llvm::Value *CurSizeArg = Info.Sizes[I];
10125     llvm::Value *CurNameArg =
10126         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10127             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10128             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10129 
10130     // Extract the MEMBER_OF field from the map type.
10131     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10132     llvm::Value *MemberMapType =
10133         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10134 
10135     // Combine the map type inherited from user-defined mapper with that
10136     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10137     // bits of the \a MapType, which is the input argument of the mapper
10138     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10139     // bits of MemberMapType.
10140     // [OpenMP 5.0], 1.2.6. map-type decay.
10141     //        | alloc |  to   | from  | tofrom | release | delete
10142     // ----------------------------------------------------------
10143     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10144     // to     | alloc |  to   | alloc |   to   | release | delete
10145     // from   | alloc | alloc | from  |  from  | release | delete
10146     // tofrom | alloc |  to   | from  | tofrom | release | delete
10147     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10148         MapType,
10149         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10150                                    MappableExprsHandler::OMP_MAP_FROM));
10151     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10152     llvm::BasicBlock *AllocElseBB =
10153         MapperCGF.createBasicBlock("omp.type.alloc.else");
10154     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10155     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10156     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10157     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10158     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10159     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10160     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10161     MapperCGF.EmitBlock(AllocBB);
10162     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10163         MemberMapType,
10164         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10165                                      MappableExprsHandler::OMP_MAP_FROM)));
10166     MapperCGF.Builder.CreateBr(EndBB);
10167     MapperCGF.EmitBlock(AllocElseBB);
10168     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10169         LeftToFrom,
10170         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10171     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10172     // In case of to, clear OMP_MAP_FROM.
10173     MapperCGF.EmitBlock(ToBB);
10174     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10175         MemberMapType,
10176         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10177     MapperCGF.Builder.CreateBr(EndBB);
10178     MapperCGF.EmitBlock(ToElseBB);
10179     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10180         LeftToFrom,
10181         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10182     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10183     // In case of from, clear OMP_MAP_TO.
10184     MapperCGF.EmitBlock(FromBB);
10185     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10186         MemberMapType,
10187         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10188     // In case of tofrom, do nothing.
10189     MapperCGF.EmitBlock(EndBB);
10190     LastBB = EndBB;
10191     llvm::PHINode *CurMapType =
10192         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10193     CurMapType->addIncoming(AllocMapType, AllocBB);
10194     CurMapType->addIncoming(ToMapType, ToBB);
10195     CurMapType->addIncoming(FromMapType, FromBB);
10196     CurMapType->addIncoming(MemberMapType, ToElseBB);
10197 
10198     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10199                                      CurSizeArg, CurMapType, CurNameArg};
10200     if (Info.Mappers[I]) {
10201       // Call the corresponding mapper function.
10202       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10203           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10204       assert(MapperFunc && "Expect a valid mapper function is available.");
10205       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10206     } else {
10207       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10208       // data structure.
10209       MapperCGF.EmitRuntimeCall(
10210           OMPBuilder.getOrCreateRuntimeFunction(
10211               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10212           OffloadingArgs);
10213     }
10214   }
10215 
10216   // Update the pointer to point to the next element that needs to be mapped,
10217   // and check whether we have mapped all elements.
10218   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10219   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10220       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10221   PtrPHI->addIncoming(PtrNext, LastBB);
10222   llvm::Value *IsDone =
10223       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10224   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10225   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10226 
10227   MapperCGF.EmitBlock(ExitBB);
10228   // Emit array deletion if this is an array section and \p MapType indicates
10229   // that deletion is required.
10230   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10231                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10232 
10233   // Emit the function exit block.
10234   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10235   MapperCGF.FinishFunction();
10236   UDMMap.try_emplace(D, Fn);
10237   if (CGF) {
10238     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10239     Decls.second.push_back(D);
10240   }
10241 }
10242 
10243 /// Emit the array initialization or deletion portion for user-defined mapper
10244 /// code generation. First, it evaluates whether an array section is mapped and
10245 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10246 /// true, and \a MapType indicates to not delete this array, array
10247 /// initialization code is generated. If \a IsInit is false, and \a MapType
10248 /// indicates to not this array, array deletion code is generated.
10249 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10250     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10251     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10252     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10253     bool IsInit) {
10254   StringRef Prefix = IsInit ? ".init" : ".del";
10255 
10256   // Evaluate if this is an array section.
10257   llvm::BasicBlock *BodyBB =
10258       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10259   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10260       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10261   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10262       MapType,
10263       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10264   llvm::Value *DeleteCond;
10265   llvm::Value *Cond;
10266   if (IsInit) {
10267     // base != begin?
10268     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10269     // IsPtrAndObj?
10270     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10271         MapType,
10272         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10273     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10274     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10275     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10276     DeleteCond = MapperCGF.Builder.CreateIsNull(
10277         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10278   } else {
10279     Cond = IsArray;
10280     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10281         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10282   }
10283   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10284   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10285 
10286   MapperCGF.EmitBlock(BodyBB);
10287   // Get the array size by multiplying element size and element number (i.e., \p
10288   // Size).
10289   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10290       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10291   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10292   // memory allocation/deletion purpose only.
10293   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10294       MapType,
10295       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10296                                    MappableExprsHandler::OMP_MAP_FROM)));
10297   MapTypeArg = MapperCGF.Builder.CreateOr(
10298       MapTypeArg,
10299       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10300 
10301   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10302   // data structure.
10303   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10304                                    ArraySize, MapTypeArg, MapName};
10305   MapperCGF.EmitRuntimeCall(
10306       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10307                                             OMPRTL___tgt_push_mapper_component),
10308       OffloadingArgs);
10309 }
10310 
10311 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10312     const OMPDeclareMapperDecl *D) {
10313   auto I = UDMMap.find(D);
10314   if (I != UDMMap.end())
10315     return I->second;
10316   emitUserDefinedMapper(D);
10317   return UDMMap.lookup(D);
10318 }
10319 
10320 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10321     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10322     llvm::Value *DeviceID,
10323     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10324                                      const OMPLoopDirective &D)>
10325         SizeEmitter) {
10326   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10327   const OMPExecutableDirective *TD = &D;
10328   // Get nested teams distribute kind directive, if any.
10329   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10330     TD = getNestedDistributeDirective(CGM.getContext(), D);
10331   if (!TD)
10332     return;
10333   const auto *LD = cast<OMPLoopDirective>(TD);
10334   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10335                                                          PrePostActionTy &) {
10336     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10337       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10338       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10339       CGF.EmitRuntimeCall(
10340           OMPBuilder.getOrCreateRuntimeFunction(
10341               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10342           Args);
10343     }
10344   };
10345   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10346 }
10347 
10348 void CGOpenMPRuntime::emitTargetCall(
10349     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10350     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10351     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10352     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10353                                      const OMPLoopDirective &D)>
10354         SizeEmitter) {
10355   if (!CGF.HaveInsertPoint())
10356     return;
10357 
10358   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10359                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10360 
10361   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10362 
10363   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10364                                  D.hasClausesOfKind<OMPNowaitClause>();
10365   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10366   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10367   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10368                                             PrePostActionTy &) {
10369     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10370   };
10371   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10372 
10373   CodeGenFunction::OMPTargetDataInfo InputInfo;
10374   llvm::Value *MapTypesArray = nullptr;
10375   llvm::Value *MapNamesArray = nullptr;
10376   // Generate code for the host fallback function.
10377   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10378                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10379     if (OffloadingMandatory) {
10380       CGF.Builder.CreateUnreachable();
10381     } else {
10382       if (RequiresOuterTask) {
10383         CapturedVars.clear();
10384         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10385       }
10386       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10387     }
10388   };
10389   // Fill up the pointer arrays and transfer execution to the device.
10390   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10391                     &MapNamesArray, SizeEmitter,
10392                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10393     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10394       // Reverse offloading is not supported, so just execute on the host.
10395       FallbackGen(CGF);
10396       return;
10397     }
10398 
10399     // On top of the arrays that were filled up, the target offloading call
10400     // takes as arguments the device id as well as the host pointer. The host
10401     // pointer is used by the runtime library to identify the current target
10402     // region, so it only has to be unique and not necessarily point to
10403     // anything. It could be the pointer to the outlined function that
10404     // implements the target region, but we aren't using that so that the
10405     // compiler doesn't need to keep that, and could therefore inline the host
10406     // function if proven worthwhile during optimization.
10407 
10408     // From this point on, we need to have an ID of the target region defined.
10409     assert(OutlinedFnID && "Invalid outlined function ID!");
10410     (void)OutlinedFnID;
10411 
10412     // Emit device ID if any.
10413     llvm::Value *DeviceID;
10414     if (Device.getPointer()) {
10415       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10416               Device.getInt() == OMPC_DEVICE_device_num) &&
10417              "Expected device_num modifier.");
10418       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10419       DeviceID =
10420           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10421     } else {
10422       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10423     }
10424 
10425     // Emit the number of elements in the offloading arrays.
10426     llvm::Value *PointerNum =
10427         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10428 
10429     // Return value of the runtime offloading call.
10430     llvm::Value *Return;
10431 
10432     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10433     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10434 
10435     // Source location for the ident struct
10436     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10437 
10438     // Emit tripcount for the target loop-based directive.
10439     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10440 
10441     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10442     // The target region is an outlined function launched by the runtime
10443     // via calls __tgt_target() or __tgt_target_teams().
10444     //
10445     // __tgt_target() launches a target region with one team and one thread,
10446     // executing a serial region.  This master thread may in turn launch
10447     // more threads within its team upon encountering a parallel region,
10448     // however, no additional teams can be launched on the device.
10449     //
10450     // __tgt_target_teams() launches a target region with one or more teams,
10451     // each with one or more threads.  This call is required for target
10452     // constructs such as:
10453     //  'target teams'
10454     //  'target' / 'teams'
10455     //  'target teams distribute parallel for'
10456     //  'target parallel'
10457     // and so on.
10458     //
10459     // Note that on the host and CPU targets, the runtime implementation of
10460     // these calls simply call the outlined function without forking threads.
10461     // The outlined functions themselves have runtime calls to
10462     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10463     // the compiler in emitTeamsCall() and emitParallelCall().
10464     //
10465     // In contrast, on the NVPTX target, the implementation of
10466     // __tgt_target_teams() launches a GPU kernel with the requested number
10467     // of teams and threads so no additional calls to the runtime are required.
10468     if (NumTeams) {
10469       // If we have NumTeams defined this means that we have an enclosed teams
10470       // region. Therefore we also expect to have NumThreads defined. These two
10471       // values should be defined in the presence of a teams directive,
10472       // regardless of having any clauses associated. If the user is using teams
10473       // but no clauses, these two values will be the default that should be
10474       // passed to the runtime library - a 32-bit integer with the value zero.
10475       assert(NumThreads && "Thread limit expression should be available along "
10476                            "with number of teams.");
10477       SmallVector<llvm::Value *> OffloadingArgs = {
10478           RTLoc,
10479           DeviceID,
10480           OutlinedFnID,
10481           PointerNum,
10482           InputInfo.BasePointersArray.getPointer(),
10483           InputInfo.PointersArray.getPointer(),
10484           InputInfo.SizesArray.getPointer(),
10485           MapTypesArray,
10486           MapNamesArray,
10487           InputInfo.MappersArray.getPointer(),
10488           NumTeams,
10489           NumThreads};
10490       if (HasNowait) {
10491         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10492         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10493         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10494         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10495         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10496         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10497       }
10498       Return = CGF.EmitRuntimeCall(
10499           OMPBuilder.getOrCreateRuntimeFunction(
10500               CGM.getModule(), HasNowait
10501                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10502                                    : OMPRTL___tgt_target_teams_mapper),
10503           OffloadingArgs);
10504     } else {
10505       SmallVector<llvm::Value *> OffloadingArgs = {
10506           RTLoc,
10507           DeviceID,
10508           OutlinedFnID,
10509           PointerNum,
10510           InputInfo.BasePointersArray.getPointer(),
10511           InputInfo.PointersArray.getPointer(),
10512           InputInfo.SizesArray.getPointer(),
10513           MapTypesArray,
10514           MapNamesArray,
10515           InputInfo.MappersArray.getPointer()};
10516       if (HasNowait) {
10517         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10518         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10519         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10520         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10521         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10522         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10523       }
10524       Return = CGF.EmitRuntimeCall(
10525           OMPBuilder.getOrCreateRuntimeFunction(
10526               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10527                                          : OMPRTL___tgt_target_mapper),
10528           OffloadingArgs);
10529     }
10530 
10531     // Check the error code and execute the host version if required.
10532     llvm::BasicBlock *OffloadFailedBlock =
10533         CGF.createBasicBlock("omp_offload.failed");
10534     llvm::BasicBlock *OffloadContBlock =
10535         CGF.createBasicBlock("omp_offload.cont");
10536     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10537     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10538 
10539     CGF.EmitBlock(OffloadFailedBlock);
10540     FallbackGen(CGF);
10541 
10542     CGF.EmitBranch(OffloadContBlock);
10543 
10544     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10545   };
10546 
10547   // Notify that the host version must be executed.
10548   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10549     FallbackGen(CGF);
10550   };
10551 
10552   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10553                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10554                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10555     // Fill up the arrays with all the captured variables.
10556     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10557 
10558     // Get mappable expression information.
10559     MappableExprsHandler MEHandler(D, CGF);
10560     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10561     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10562 
10563     auto RI = CS.getCapturedRecordDecl()->field_begin();
10564     auto *CV = CapturedVars.begin();
10565     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10566                                               CE = CS.capture_end();
10567          CI != CE; ++CI, ++RI, ++CV) {
10568       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10569       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10570 
10571       // VLA sizes are passed to the outlined region by copy and do not have map
10572       // information associated.
10573       if (CI->capturesVariableArrayType()) {
10574         CurInfo.Exprs.push_back(nullptr);
10575         CurInfo.BasePointers.push_back(*CV);
10576         CurInfo.Pointers.push_back(*CV);
10577         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10578             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10579         // Copy to the device as an argument. No need to retrieve it.
10580         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10581                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10582                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10583         CurInfo.Mappers.push_back(nullptr);
10584       } else {
10585         // If we have any information in the map clause, we use it, otherwise we
10586         // just do a default mapping.
10587         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10588         if (!CI->capturesThis())
10589           MappedVarSet.insert(CI->getCapturedVar());
10590         else
10591           MappedVarSet.insert(nullptr);
10592         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10593           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10594         // Generate correct mapping for variables captured by reference in
10595         // lambdas.
10596         if (CI->capturesVariable())
10597           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10598                                                   CurInfo, LambdaPointers);
10599       }
10600       // We expect to have at least an element of information for this capture.
10601       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10602              "Non-existing map pointer for capture!");
10603       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10604              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10605              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10606              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10607              "Inconsistent map information sizes!");
10608 
10609       // If there is an entry in PartialStruct it means we have a struct with
10610       // individual members mapped. Emit an extra combined entry.
10611       if (PartialStruct.Base.isValid()) {
10612         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10613         MEHandler.emitCombinedEntry(
10614             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10615             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10616       }
10617 
10618       // We need to append the results of this capture to what we already have.
10619       CombinedInfo.append(CurInfo);
10620     }
10621     // Adjust MEMBER_OF flags for the lambdas captures.
10622     MEHandler.adjustMemberOfForLambdaCaptures(
10623         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10624         CombinedInfo.Types);
10625     // Map any list items in a map clause that were not captures because they
10626     // weren't referenced within the construct.
10627     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10628 
10629     TargetDataInfo Info;
10630     // Fill up the arrays and create the arguments.
10631     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10632     emitOffloadingArraysArgument(
10633         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10634         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10635         {/*ForEndCall=*/false});
10636 
10637     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10638     InputInfo.BasePointersArray =
10639         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
10640     InputInfo.PointersArray =
10641         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
10642     InputInfo.SizesArray =
10643         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
10644     InputInfo.MappersArray =
10645         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
10646     MapTypesArray = Info.MapTypesArray;
10647     MapNamesArray = Info.MapNamesArray;
10648     if (RequiresOuterTask)
10649       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10650     else
10651       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10652   };
10653 
10654   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10655                              CodeGenFunction &CGF, PrePostActionTy &) {
10656     if (RequiresOuterTask) {
10657       CodeGenFunction::OMPTargetDataInfo InputInfo;
10658       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10659     } else {
10660       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10661     }
10662   };
10663 
10664   // If we have a target function ID it means that we need to support
10665   // offloading, otherwise, just execute on the host. We need to execute on host
10666   // regardless of the conditional in the if clause if, e.g., the user do not
10667   // specify target triples.
10668   if (OutlinedFnID) {
10669     if (IfCond) {
10670       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10671     } else {
10672       RegionCodeGenTy ThenRCG(TargetThenGen);
10673       ThenRCG(CGF);
10674     }
10675   } else {
10676     RegionCodeGenTy ElseRCG(TargetElseGen);
10677     ElseRCG(CGF);
10678   }
10679 }
10680 
10681 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10682                                                     StringRef ParentName) {
10683   if (!S)
10684     return;
10685 
10686   // Codegen OMP target directives that offload compute to the device.
10687   bool RequiresDeviceCodegen =
10688       isa<OMPExecutableDirective>(S) &&
10689       isOpenMPTargetExecutionDirective(
10690           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10691 
10692   if (RequiresDeviceCodegen) {
10693     const auto &E = *cast<OMPExecutableDirective>(S);
10694     unsigned DeviceID;
10695     unsigned FileID;
10696     unsigned Line;
10697     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10698                              FileID, Line);
10699 
10700     // Is this a target region that should not be emitted as an entry point? If
10701     // so just signal we are done with this target region.
10702     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10703                                                             ParentName, Line))
10704       return;
10705 
10706     switch (E.getDirectiveKind()) {
10707     case OMPD_target:
10708       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10709                                                    cast<OMPTargetDirective>(E));
10710       break;
10711     case OMPD_target_parallel:
10712       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10713           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10714       break;
10715     case OMPD_target_teams:
10716       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10717           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10718       break;
10719     case OMPD_target_teams_distribute:
10720       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10721           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10722       break;
10723     case OMPD_target_teams_distribute_simd:
10724       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10725           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10726       break;
10727     case OMPD_target_parallel_for:
10728       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10729           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10730       break;
10731     case OMPD_target_parallel_for_simd:
10732       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10733           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10734       break;
10735     case OMPD_target_simd:
10736       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10737           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10738       break;
10739     case OMPD_target_teams_distribute_parallel_for:
10740       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10741           CGM, ParentName,
10742           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10743       break;
10744     case OMPD_target_teams_distribute_parallel_for_simd:
10745       CodeGenFunction::
10746           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10747               CGM, ParentName,
10748               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10749       break;
10750     case OMPD_parallel:
10751     case OMPD_for:
10752     case OMPD_parallel_for:
10753     case OMPD_parallel_master:
10754     case OMPD_parallel_sections:
10755     case OMPD_for_simd:
10756     case OMPD_parallel_for_simd:
10757     case OMPD_cancel:
10758     case OMPD_cancellation_point:
10759     case OMPD_ordered:
10760     case OMPD_threadprivate:
10761     case OMPD_allocate:
10762     case OMPD_task:
10763     case OMPD_simd:
10764     case OMPD_tile:
10765     case OMPD_unroll:
10766     case OMPD_sections:
10767     case OMPD_section:
10768     case OMPD_single:
10769     case OMPD_master:
10770     case OMPD_critical:
10771     case OMPD_taskyield:
10772     case OMPD_barrier:
10773     case OMPD_taskwait:
10774     case OMPD_taskgroup:
10775     case OMPD_atomic:
10776     case OMPD_flush:
10777     case OMPD_depobj:
10778     case OMPD_scan:
10779     case OMPD_teams:
10780     case OMPD_target_data:
10781     case OMPD_target_exit_data:
10782     case OMPD_target_enter_data:
10783     case OMPD_distribute:
10784     case OMPD_distribute_simd:
10785     case OMPD_distribute_parallel_for:
10786     case OMPD_distribute_parallel_for_simd:
10787     case OMPD_teams_distribute:
10788     case OMPD_teams_distribute_simd:
10789     case OMPD_teams_distribute_parallel_for:
10790     case OMPD_teams_distribute_parallel_for_simd:
10791     case OMPD_target_update:
10792     case OMPD_declare_simd:
10793     case OMPD_declare_variant:
10794     case OMPD_begin_declare_variant:
10795     case OMPD_end_declare_variant:
10796     case OMPD_declare_target:
10797     case OMPD_end_declare_target:
10798     case OMPD_declare_reduction:
10799     case OMPD_declare_mapper:
10800     case OMPD_taskloop:
10801     case OMPD_taskloop_simd:
10802     case OMPD_master_taskloop:
10803     case OMPD_master_taskloop_simd:
10804     case OMPD_parallel_master_taskloop:
10805     case OMPD_parallel_master_taskloop_simd:
10806     case OMPD_requires:
10807     case OMPD_metadirective:
10808     case OMPD_unknown:
10809     default:
10810       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10811     }
10812     return;
10813   }
10814 
10815   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10816     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10817       return;
10818 
10819     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10820     return;
10821   }
10822 
10823   // If this is a lambda function, look into its body.
10824   if (const auto *L = dyn_cast<LambdaExpr>(S))
10825     S = L->getBody();
10826 
10827   // Keep looking for target regions recursively.
10828   for (const Stmt *II : S->children())
10829     scanForTargetRegionsFunctions(II, ParentName);
10830 }
10831 
10832 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10833   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10834       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10835   if (!DevTy)
10836     return false;
10837   // Do not emit device_type(nohost) functions for the host.
10838   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10839     return true;
10840   // Do not emit device_type(host) functions for the device.
10841   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10842     return true;
10843   return false;
10844 }
10845 
10846 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10847   // If emitting code for the host, we do not process FD here. Instead we do
10848   // the normal code generation.
10849   if (!CGM.getLangOpts().OpenMPIsDevice) {
10850     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10851       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10852                                   CGM.getLangOpts().OpenMPIsDevice))
10853         return true;
10854     return false;
10855   }
10856 
10857   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10858   // Try to detect target regions in the function.
10859   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10860     StringRef Name = CGM.getMangledName(GD);
10861     scanForTargetRegionsFunctions(FD->getBody(), Name);
10862     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10863                                 CGM.getLangOpts().OpenMPIsDevice))
10864       return true;
10865   }
10866 
10867   // Do not to emit function if it is not marked as declare target.
10868   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10869          AlreadyEmittedTargetDecls.count(VD) == 0;
10870 }
10871 
10872 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10873   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10874                               CGM.getLangOpts().OpenMPIsDevice))
10875     return true;
10876 
10877   if (!CGM.getLangOpts().OpenMPIsDevice)
10878     return false;
10879 
10880   // Check if there are Ctors/Dtors in this declaration and look for target
10881   // regions in it. We use the complete variant to produce the kernel name
10882   // mangling.
10883   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10884   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10885     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10886       StringRef ParentName =
10887           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10888       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10889     }
10890     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10891       StringRef ParentName =
10892           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10893       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10894     }
10895   }
10896 
10897   // Do not to emit variable if it is not marked as declare target.
10898   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10899       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10900           cast<VarDecl>(GD.getDecl()));
10901   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10902       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10903        HasRequiresUnifiedSharedMemory)) {
10904     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10905     return true;
10906   }
10907   return false;
10908 }
10909 
10910 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10911                                                    llvm::Constant *Addr) {
10912   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10913       !CGM.getLangOpts().OpenMPIsDevice)
10914     return;
10915 
10916   // If we have host/nohost variables, they do not need to be registered.
10917   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10918       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10919   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10920     return;
10921 
10922   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10923       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10924   if (!Res) {
10925     if (CGM.getLangOpts().OpenMPIsDevice) {
10926       // Register non-target variables being emitted in device code (debug info
10927       // may cause this).
10928       StringRef VarName = CGM.getMangledName(VD);
10929       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10930     }
10931     return;
10932   }
10933   // Register declare target variables.
10934   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10935   StringRef VarName;
10936   CharUnits VarSize;
10937   llvm::GlobalValue::LinkageTypes Linkage;
10938 
10939   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10940       !HasRequiresUnifiedSharedMemory) {
10941     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10942     VarName = CGM.getMangledName(VD);
10943     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10944       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10945       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10946     } else {
10947       VarSize = CharUnits::Zero();
10948     }
10949     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10950     // Temp solution to prevent optimizations of the internal variables.
10951     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10952       // Do not create a "ref-variable" if the original is not also available
10953       // on the host.
10954       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10955         return;
10956       std::string RefName = getName({VarName, "ref"});
10957       if (!CGM.GetGlobalValue(RefName)) {
10958         llvm::Constant *AddrRef =
10959             getOrCreateInternalVariable(Addr->getType(), RefName);
10960         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10961         GVAddrRef->setConstant(/*Val=*/true);
10962         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10963         GVAddrRef->setInitializer(Addr);
10964         CGM.addCompilerUsedGlobal(GVAddrRef);
10965       }
10966     }
10967   } else {
10968     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10969             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10970              HasRequiresUnifiedSharedMemory)) &&
10971            "Declare target attribute must link or to with unified memory.");
10972     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10973       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10974     else
10975       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10976 
10977     if (CGM.getLangOpts().OpenMPIsDevice) {
10978       VarName = Addr->getName();
10979       Addr = nullptr;
10980     } else {
10981       VarName = getAddrOfDeclareTargetVar(VD).getName();
10982       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10983     }
10984     VarSize = CGM.getPointerSize();
10985     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10986   }
10987 
10988   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10989       VarName, Addr, VarSize, Flags, Linkage);
10990 }
10991 
10992 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10993   if (isa<FunctionDecl>(GD.getDecl()) ||
10994       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10995     return emitTargetFunctions(GD);
10996 
10997   return emitTargetGlobalVariable(GD);
10998 }
10999 
11000 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
11001   for (const VarDecl *VD : DeferredGlobalVariables) {
11002     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11003         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11004     if (!Res)
11005       continue;
11006     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
11007         !HasRequiresUnifiedSharedMemory) {
11008       CGM.EmitGlobal(VD);
11009     } else {
11010       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11011               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
11012                HasRequiresUnifiedSharedMemory)) &&
11013              "Expected link clause or to clause with unified memory.");
11014       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11015     }
11016   }
11017 }
11018 
11019 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
11020     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11021   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11022          " Expected target-based directive.");
11023 }
11024 
11025 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11026   for (const OMPClause *Clause : D->clauselists()) {
11027     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11028       HasRequiresUnifiedSharedMemory = true;
11029     } else if (const auto *AC =
11030                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11031       switch (AC->getAtomicDefaultMemOrderKind()) {
11032       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11033         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11034         break;
11035       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11036         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11037         break;
11038       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11039         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11040         break;
11041       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11042         break;
11043       }
11044     }
11045   }
11046 }
11047 
11048 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11049   return RequiresAtomicOrdering;
11050 }
11051 
11052 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11053                                                        LangAS &AS) {
11054   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11055     return false;
11056   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11057   switch(A->getAllocatorType()) {
11058   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11059   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11060   // Not supported, fallback to the default mem space.
11061   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11062   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11063   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11064   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11065   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11066   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11067   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11068     AS = LangAS::Default;
11069     return true;
11070   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11071     llvm_unreachable("Expected predefined allocator for the variables with the "
11072                      "static storage.");
11073   }
11074   return false;
11075 }
11076 
11077 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11078   return HasRequiresUnifiedSharedMemory;
11079 }
11080 
11081 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11082     CodeGenModule &CGM)
11083     : CGM(CGM) {
11084   if (CGM.getLangOpts().OpenMPIsDevice) {
11085     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11086     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11087   }
11088 }
11089 
11090 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11091   if (CGM.getLangOpts().OpenMPIsDevice)
11092     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11093 }
11094 
11095 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11096   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11097     return true;
11098 
11099   const auto *D = cast<FunctionDecl>(GD.getDecl());
11100   // Do not to emit function if it is marked as declare target as it was already
11101   // emitted.
11102   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11103     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11104       if (auto *F = dyn_cast_or_null<llvm::Function>(
11105               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11106         return !F->isDeclaration();
11107       return false;
11108     }
11109     return true;
11110   }
11111 
11112   return !AlreadyEmittedTargetDecls.insert(D).second;
11113 }
11114 
11115 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11116   // If we don't have entries or if we are emitting code for the device, we
11117   // don't need to do anything.
11118   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11119       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11120       (OffloadEntriesInfoManager.empty() &&
11121        !HasEmittedDeclareTargetRegion &&
11122        !HasEmittedTargetRegion))
11123     return nullptr;
11124 
11125   // Create and register the function that handles the requires directives.
11126   ASTContext &C = CGM.getContext();
11127 
11128   llvm::Function *RequiresRegFn;
11129   {
11130     CodeGenFunction CGF(CGM);
11131     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11132     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11133     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11134     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11135     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11136     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11137     // TODO: check for other requires clauses.
11138     // The requires directive takes effect only when a target region is
11139     // present in the compilation unit. Otherwise it is ignored and not
11140     // passed to the runtime. This avoids the runtime from throwing an error
11141     // for mismatching requires clauses across compilation units that don't
11142     // contain at least 1 target region.
11143     assert((HasEmittedTargetRegion ||
11144             HasEmittedDeclareTargetRegion ||
11145             !OffloadEntriesInfoManager.empty()) &&
11146            "Target or declare target region expected.");
11147     if (HasRequiresUnifiedSharedMemory)
11148       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11149     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11150                             CGM.getModule(), OMPRTL___tgt_register_requires),
11151                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11152     CGF.FinishFunction();
11153   }
11154   return RequiresRegFn;
11155 }
11156 
11157 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11158                                     const OMPExecutableDirective &D,
11159                                     SourceLocation Loc,
11160                                     llvm::Function *OutlinedFn,
11161                                     ArrayRef<llvm::Value *> CapturedVars) {
11162   if (!CGF.HaveInsertPoint())
11163     return;
11164 
11165   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11166   CodeGenFunction::RunCleanupsScope Scope(CGF);
11167 
11168   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11169   llvm::Value *Args[] = {
11170       RTLoc,
11171       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11172       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11173   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11174   RealArgs.append(std::begin(Args), std::end(Args));
11175   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11176 
11177   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11178       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11179   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11180 }
11181 
11182 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11183                                          const Expr *NumTeams,
11184                                          const Expr *ThreadLimit,
11185                                          SourceLocation Loc) {
11186   if (!CGF.HaveInsertPoint())
11187     return;
11188 
11189   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11190 
11191   llvm::Value *NumTeamsVal =
11192       NumTeams
11193           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11194                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11195           : CGF.Builder.getInt32(0);
11196 
11197   llvm::Value *ThreadLimitVal =
11198       ThreadLimit
11199           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11200                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11201           : CGF.Builder.getInt32(0);
11202 
11203   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11204   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11205                                      ThreadLimitVal};
11206   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11207                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11208                       PushNumTeamsArgs);
11209 }
11210 
11211 void CGOpenMPRuntime::emitTargetDataCalls(
11212     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11213     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11214   if (!CGF.HaveInsertPoint())
11215     return;
11216 
11217   // Action used to replace the default codegen action and turn privatization
11218   // off.
11219   PrePostActionTy NoPrivAction;
11220 
11221   // Generate the code for the opening of the data environment. Capture all the
11222   // arguments of the runtime call by reference because they are used in the
11223   // closing of the region.
11224   auto &&BeginThenGen = [this, &D, Device, &Info,
11225                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11226     // Fill up the arrays with all the mapped variables.
11227     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11228 
11229     // Get map clause information.
11230     MappableExprsHandler MEHandler(D, CGF);
11231     MEHandler.generateAllInfo(CombinedInfo);
11232 
11233     // Fill up the arrays and create the arguments.
11234     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11235                          /*IsNonContiguous=*/true);
11236 
11237     llvm::Value *BasePointersArrayArg = nullptr;
11238     llvm::Value *PointersArrayArg = nullptr;
11239     llvm::Value *SizesArrayArg = nullptr;
11240     llvm::Value *MapTypesArrayArg = nullptr;
11241     llvm::Value *MapNamesArrayArg = nullptr;
11242     llvm::Value *MappersArrayArg = nullptr;
11243     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11244                                  SizesArrayArg, MapTypesArrayArg,
11245                                  MapNamesArrayArg, MappersArrayArg, Info);
11246 
11247     // Emit device ID if any.
11248     llvm::Value *DeviceID = nullptr;
11249     if (Device) {
11250       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11251                                            CGF.Int64Ty, /*isSigned=*/true);
11252     } else {
11253       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11254     }
11255 
11256     // Emit the number of elements in the offloading arrays.
11257     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11258     //
11259     // Source location for the ident struct
11260     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11261 
11262     llvm::Value *OffloadingArgs[] = {RTLoc,
11263                                      DeviceID,
11264                                      PointerNum,
11265                                      BasePointersArrayArg,
11266                                      PointersArrayArg,
11267                                      SizesArrayArg,
11268                                      MapTypesArrayArg,
11269                                      MapNamesArrayArg,
11270                                      MappersArrayArg};
11271     CGF.EmitRuntimeCall(
11272         OMPBuilder.getOrCreateRuntimeFunction(
11273             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11274         OffloadingArgs);
11275 
11276     // If device pointer privatization is required, emit the body of the region
11277     // here. It will have to be duplicated: with and without privatization.
11278     if (!Info.CaptureDeviceAddrMap.empty())
11279       CodeGen(CGF);
11280   };
11281 
11282   // Generate code for the closing of the data region.
11283   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11284                                                 PrePostActionTy &) {
11285     assert(Info.isValid() && "Invalid data environment closing arguments.");
11286 
11287     llvm::Value *BasePointersArrayArg = nullptr;
11288     llvm::Value *PointersArrayArg = nullptr;
11289     llvm::Value *SizesArrayArg = nullptr;
11290     llvm::Value *MapTypesArrayArg = nullptr;
11291     llvm::Value *MapNamesArrayArg = nullptr;
11292     llvm::Value *MappersArrayArg = nullptr;
11293     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11294                                  SizesArrayArg, MapTypesArrayArg,
11295                                  MapNamesArrayArg, MappersArrayArg, Info,
11296                                  {/*ForEndCall=*/true});
11297 
11298     // Emit device ID if any.
11299     llvm::Value *DeviceID = nullptr;
11300     if (Device) {
11301       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11302                                            CGF.Int64Ty, /*isSigned=*/true);
11303     } else {
11304       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11305     }
11306 
11307     // Emit the number of elements in the offloading arrays.
11308     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11309 
11310     // Source location for the ident struct
11311     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11312 
11313     llvm::Value *OffloadingArgs[] = {RTLoc,
11314                                      DeviceID,
11315                                      PointerNum,
11316                                      BasePointersArrayArg,
11317                                      PointersArrayArg,
11318                                      SizesArrayArg,
11319                                      MapTypesArrayArg,
11320                                      MapNamesArrayArg,
11321                                      MappersArrayArg};
11322     CGF.EmitRuntimeCall(
11323         OMPBuilder.getOrCreateRuntimeFunction(
11324             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11325         OffloadingArgs);
11326   };
11327 
11328   // If we need device pointer privatization, we need to emit the body of the
11329   // region with no privatization in the 'else' branch of the conditional.
11330   // Otherwise, we don't have to do anything.
11331   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11332                                                          PrePostActionTy &) {
11333     if (!Info.CaptureDeviceAddrMap.empty()) {
11334       CodeGen.setAction(NoPrivAction);
11335       CodeGen(CGF);
11336     }
11337   };
11338 
11339   // We don't have to do anything to close the region if the if clause evaluates
11340   // to false.
11341   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11342 
11343   if (IfCond) {
11344     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11345   } else {
11346     RegionCodeGenTy RCG(BeginThenGen);
11347     RCG(CGF);
11348   }
11349 
11350   // If we don't require privatization of device pointers, we emit the body in
11351   // between the runtime calls. This avoids duplicating the body code.
11352   if (Info.CaptureDeviceAddrMap.empty()) {
11353     CodeGen.setAction(NoPrivAction);
11354     CodeGen(CGF);
11355   }
11356 
11357   if (IfCond) {
11358     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11359   } else {
11360     RegionCodeGenTy RCG(EndThenGen);
11361     RCG(CGF);
11362   }
11363 }
11364 
11365 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11366     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11367     const Expr *Device) {
11368   if (!CGF.HaveInsertPoint())
11369     return;
11370 
11371   assert((isa<OMPTargetEnterDataDirective>(D) ||
11372           isa<OMPTargetExitDataDirective>(D) ||
11373           isa<OMPTargetUpdateDirective>(D)) &&
11374          "Expecting either target enter, exit data, or update directives.");
11375 
11376   CodeGenFunction::OMPTargetDataInfo InputInfo;
11377   llvm::Value *MapTypesArray = nullptr;
11378   llvm::Value *MapNamesArray = nullptr;
11379   // Generate the code for the opening of the data environment.
11380   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11381                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11382     // Emit device ID if any.
11383     llvm::Value *DeviceID = nullptr;
11384     if (Device) {
11385       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11386                                            CGF.Int64Ty, /*isSigned=*/true);
11387     } else {
11388       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11389     }
11390 
11391     // Emit the number of elements in the offloading arrays.
11392     llvm::Constant *PointerNum =
11393         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11394 
11395     // Source location for the ident struct
11396     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11397 
11398     llvm::Value *OffloadingArgs[] = {RTLoc,
11399                                      DeviceID,
11400                                      PointerNum,
11401                                      InputInfo.BasePointersArray.getPointer(),
11402                                      InputInfo.PointersArray.getPointer(),
11403                                      InputInfo.SizesArray.getPointer(),
11404                                      MapTypesArray,
11405                                      MapNamesArray,
11406                                      InputInfo.MappersArray.getPointer()};
11407 
11408     // Select the right runtime function call for each standalone
11409     // directive.
11410     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11411     RuntimeFunction RTLFn;
11412     switch (D.getDirectiveKind()) {
11413     case OMPD_target_enter_data:
11414       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11415                         : OMPRTL___tgt_target_data_begin_mapper;
11416       break;
11417     case OMPD_target_exit_data:
11418       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11419                         : OMPRTL___tgt_target_data_end_mapper;
11420       break;
11421     case OMPD_target_update:
11422       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11423                         : OMPRTL___tgt_target_data_update_mapper;
11424       break;
11425     case OMPD_parallel:
11426     case OMPD_for:
11427     case OMPD_parallel_for:
11428     case OMPD_parallel_master:
11429     case OMPD_parallel_sections:
11430     case OMPD_for_simd:
11431     case OMPD_parallel_for_simd:
11432     case OMPD_cancel:
11433     case OMPD_cancellation_point:
11434     case OMPD_ordered:
11435     case OMPD_threadprivate:
11436     case OMPD_allocate:
11437     case OMPD_task:
11438     case OMPD_simd:
11439     case OMPD_tile:
11440     case OMPD_unroll:
11441     case OMPD_sections:
11442     case OMPD_section:
11443     case OMPD_single:
11444     case OMPD_master:
11445     case OMPD_critical:
11446     case OMPD_taskyield:
11447     case OMPD_barrier:
11448     case OMPD_taskwait:
11449     case OMPD_taskgroup:
11450     case OMPD_atomic:
11451     case OMPD_flush:
11452     case OMPD_depobj:
11453     case OMPD_scan:
11454     case OMPD_teams:
11455     case OMPD_target_data:
11456     case OMPD_distribute:
11457     case OMPD_distribute_simd:
11458     case OMPD_distribute_parallel_for:
11459     case OMPD_distribute_parallel_for_simd:
11460     case OMPD_teams_distribute:
11461     case OMPD_teams_distribute_simd:
11462     case OMPD_teams_distribute_parallel_for:
11463     case OMPD_teams_distribute_parallel_for_simd:
11464     case OMPD_declare_simd:
11465     case OMPD_declare_variant:
11466     case OMPD_begin_declare_variant:
11467     case OMPD_end_declare_variant:
11468     case OMPD_declare_target:
11469     case OMPD_end_declare_target:
11470     case OMPD_declare_reduction:
11471     case OMPD_declare_mapper:
11472     case OMPD_taskloop:
11473     case OMPD_taskloop_simd:
11474     case OMPD_master_taskloop:
11475     case OMPD_master_taskloop_simd:
11476     case OMPD_parallel_master_taskloop:
11477     case OMPD_parallel_master_taskloop_simd:
11478     case OMPD_target:
11479     case OMPD_target_simd:
11480     case OMPD_target_teams_distribute:
11481     case OMPD_target_teams_distribute_simd:
11482     case OMPD_target_teams_distribute_parallel_for:
11483     case OMPD_target_teams_distribute_parallel_for_simd:
11484     case OMPD_target_teams:
11485     case OMPD_target_parallel:
11486     case OMPD_target_parallel_for:
11487     case OMPD_target_parallel_for_simd:
11488     case OMPD_requires:
11489     case OMPD_metadirective:
11490     case OMPD_unknown:
11491     default:
11492       llvm_unreachable("Unexpected standalone target data directive.");
11493       break;
11494     }
11495     CGF.EmitRuntimeCall(
11496         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11497         OffloadingArgs);
11498   };
11499 
11500   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11501                           &MapNamesArray](CodeGenFunction &CGF,
11502                                           PrePostActionTy &) {
11503     // Fill up the arrays with all the mapped variables.
11504     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11505 
11506     // Get map clause information.
11507     MappableExprsHandler MEHandler(D, CGF);
11508     MEHandler.generateAllInfo(CombinedInfo);
11509 
11510     TargetDataInfo Info;
11511     // Fill up the arrays and create the arguments.
11512     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11513                          /*IsNonContiguous=*/true);
11514     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11515                              D.hasClausesOfKind<OMPNowaitClause>();
11516     emitOffloadingArraysArgument(
11517         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11518         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11519         {/*ForEndCall=*/false});
11520     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11521     InputInfo.BasePointersArray =
11522         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
11523     InputInfo.PointersArray =
11524         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
11525     InputInfo.SizesArray =
11526         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
11527     InputInfo.MappersArray =
11528         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
11529     MapTypesArray = Info.MapTypesArray;
11530     MapNamesArray = Info.MapNamesArray;
11531     if (RequiresOuterTask)
11532       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11533     else
11534       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11535   };
11536 
11537   if (IfCond) {
11538     emitIfClause(CGF, IfCond, TargetThenGen,
11539                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11540   } else {
11541     RegionCodeGenTy ThenRCG(TargetThenGen);
11542     ThenRCG(CGF);
11543   }
11544 }
11545 
11546 namespace {
11547   /// Kind of parameter in a function with 'declare simd' directive.
11548   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11549   /// Attribute set of the parameter.
11550   struct ParamAttrTy {
11551     ParamKindTy Kind = Vector;
11552     llvm::APSInt StrideOrArg;
11553     llvm::APSInt Alignment;
11554   };
11555 } // namespace
11556 
11557 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11558                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11559   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11560   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11561   // of that clause. The VLEN value must be power of 2.
11562   // In other case the notion of the function`s "characteristic data type" (CDT)
11563   // is used to compute the vector length.
11564   // CDT is defined in the following order:
11565   //   a) For non-void function, the CDT is the return type.
11566   //   b) If the function has any non-uniform, non-linear parameters, then the
11567   //   CDT is the type of the first such parameter.
11568   //   c) If the CDT determined by a) or b) above is struct, union, or class
11569   //   type which is pass-by-value (except for the type that maps to the
11570   //   built-in complex data type), the characteristic data type is int.
11571   //   d) If none of the above three cases is applicable, the CDT is int.
11572   // The VLEN is then determined based on the CDT and the size of vector
11573   // register of that ISA for which current vector version is generated. The
11574   // VLEN is computed using the formula below:
11575   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11576   // where vector register size specified in section 3.2.1 Registers and the
11577   // Stack Frame of original AMD64 ABI document.
11578   QualType RetType = FD->getReturnType();
11579   if (RetType.isNull())
11580     return 0;
11581   ASTContext &C = FD->getASTContext();
11582   QualType CDT;
11583   if (!RetType.isNull() && !RetType->isVoidType()) {
11584     CDT = RetType;
11585   } else {
11586     unsigned Offset = 0;
11587     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11588       if (ParamAttrs[Offset].Kind == Vector)
11589         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11590       ++Offset;
11591     }
11592     if (CDT.isNull()) {
11593       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11594         if (ParamAttrs[I + Offset].Kind == Vector) {
11595           CDT = FD->getParamDecl(I)->getType();
11596           break;
11597         }
11598       }
11599     }
11600   }
11601   if (CDT.isNull())
11602     CDT = C.IntTy;
11603   CDT = CDT->getCanonicalTypeUnqualified();
11604   if (CDT->isRecordType() || CDT->isUnionType())
11605     CDT = C.IntTy;
11606   return C.getTypeSize(CDT);
11607 }
11608 
11609 static void
11610 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11611                            const llvm::APSInt &VLENVal,
11612                            ArrayRef<ParamAttrTy> ParamAttrs,
11613                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11614   struct ISADataTy {
11615     char ISA;
11616     unsigned VecRegSize;
11617   };
11618   ISADataTy ISAData[] = {
11619       {
11620           'b', 128
11621       }, // SSE
11622       {
11623           'c', 256
11624       }, // AVX
11625       {
11626           'd', 256
11627       }, // AVX2
11628       {
11629           'e', 512
11630       }, // AVX512
11631   };
11632   llvm::SmallVector<char, 2> Masked;
11633   switch (State) {
11634   case OMPDeclareSimdDeclAttr::BS_Undefined:
11635     Masked.push_back('N');
11636     Masked.push_back('M');
11637     break;
11638   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11639     Masked.push_back('N');
11640     break;
11641   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11642     Masked.push_back('M');
11643     break;
11644   }
11645   for (char Mask : Masked) {
11646     for (const ISADataTy &Data : ISAData) {
11647       SmallString<256> Buffer;
11648       llvm::raw_svector_ostream Out(Buffer);
11649       Out << "_ZGV" << Data.ISA << Mask;
11650       if (!VLENVal) {
11651         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11652         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11653         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11654       } else {
11655         Out << VLENVal;
11656       }
11657       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11658         switch (ParamAttr.Kind){
11659         case LinearWithVarStride:
11660           Out << 's' << ParamAttr.StrideOrArg;
11661           break;
11662         case Linear:
11663           Out << 'l';
11664           if (ParamAttr.StrideOrArg != 1)
11665             Out << ParamAttr.StrideOrArg;
11666           break;
11667         case Uniform:
11668           Out << 'u';
11669           break;
11670         case Vector:
11671           Out << 'v';
11672           break;
11673         }
11674         if (!!ParamAttr.Alignment)
11675           Out << 'a' << ParamAttr.Alignment;
11676       }
11677       Out << '_' << Fn->getName();
11678       Fn->addFnAttr(Out.str());
11679     }
11680   }
11681 }
11682 
11683 // This are the Functions that are needed to mangle the name of the
11684 // vector functions generated by the compiler, according to the rules
11685 // defined in the "Vector Function ABI specifications for AArch64",
11686 // available at
11687 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11688 
11689 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11690 ///
11691 /// TODO: Need to implement the behavior for reference marked with a
11692 /// var or no linear modifiers (1.b in the section). For this, we
11693 /// need to extend ParamKindTy to support the linear modifiers.
11694 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11695   QT = QT.getCanonicalType();
11696 
11697   if (QT->isVoidType())
11698     return false;
11699 
11700   if (Kind == ParamKindTy::Uniform)
11701     return false;
11702 
11703   if (Kind == ParamKindTy::Linear)
11704     return false;
11705 
11706   // TODO: Handle linear references with modifiers
11707 
11708   if (Kind == ParamKindTy::LinearWithVarStride)
11709     return false;
11710 
11711   return true;
11712 }
11713 
11714 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11715 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11716   QT = QT.getCanonicalType();
11717   unsigned Size = C.getTypeSize(QT);
11718 
11719   // Only scalars and complex within 16 bytes wide set PVB to true.
11720   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11721     return false;
11722 
11723   if (QT->isFloatingType())
11724     return true;
11725 
11726   if (QT->isIntegerType())
11727     return true;
11728 
11729   if (QT->isPointerType())
11730     return true;
11731 
11732   // TODO: Add support for complex types (section 3.1.2, item 2).
11733 
11734   return false;
11735 }
11736 
11737 /// Computes the lane size (LS) of a return type or of an input parameter,
11738 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11739 /// TODO: Add support for references, section 3.2.1, item 1.
11740 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11741   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11742     QualType PTy = QT.getCanonicalType()->getPointeeType();
11743     if (getAArch64PBV(PTy, C))
11744       return C.getTypeSize(PTy);
11745   }
11746   if (getAArch64PBV(QT, C))
11747     return C.getTypeSize(QT);
11748 
11749   return C.getTypeSize(C.getUIntPtrType());
11750 }
11751 
11752 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11753 // signature of the scalar function, as defined in 3.2.2 of the
11754 // AAVFABI.
11755 static std::tuple<unsigned, unsigned, bool>
11756 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11757   QualType RetType = FD->getReturnType().getCanonicalType();
11758 
11759   ASTContext &C = FD->getASTContext();
11760 
11761   bool OutputBecomesInput = false;
11762 
11763   llvm::SmallVector<unsigned, 8> Sizes;
11764   if (!RetType->isVoidType()) {
11765     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11766     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11767       OutputBecomesInput = true;
11768   }
11769   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11770     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11771     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11772   }
11773 
11774   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11775   // The LS of a function parameter / return value can only be a power
11776   // of 2, starting from 8 bits, up to 128.
11777   assert(llvm::all_of(Sizes,
11778                       [](unsigned Size) {
11779                         return Size == 8 || Size == 16 || Size == 32 ||
11780                                Size == 64 || Size == 128;
11781                       }) &&
11782          "Invalid size");
11783 
11784   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11785                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11786                          OutputBecomesInput);
11787 }
11788 
11789 /// Mangle the parameter part of the vector function name according to
11790 /// their OpenMP classification. The mangling function is defined in
11791 /// section 3.5 of the AAVFABI.
11792 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11793   SmallString<256> Buffer;
11794   llvm::raw_svector_ostream Out(Buffer);
11795   for (const auto &ParamAttr : ParamAttrs) {
11796     switch (ParamAttr.Kind) {
11797     case LinearWithVarStride:
11798       Out << "ls" << ParamAttr.StrideOrArg;
11799       break;
11800     case Linear:
11801       Out << 'l';
11802       // Don't print the step value if it is not present or if it is
11803       // equal to 1.
11804       if (ParamAttr.StrideOrArg != 1)
11805         Out << ParamAttr.StrideOrArg;
11806       break;
11807     case Uniform:
11808       Out << 'u';
11809       break;
11810     case Vector:
11811       Out << 'v';
11812       break;
11813     }
11814 
11815     if (!!ParamAttr.Alignment)
11816       Out << 'a' << ParamAttr.Alignment;
11817   }
11818 
11819   return std::string(Out.str());
11820 }
11821 
11822 // Function used to add the attribute. The parameter `VLEN` is
11823 // templated to allow the use of "x" when targeting scalable functions
11824 // for SVE.
11825 template <typename T>
11826 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11827                                  char ISA, StringRef ParSeq,
11828                                  StringRef MangledName, bool OutputBecomesInput,
11829                                  llvm::Function *Fn) {
11830   SmallString<256> Buffer;
11831   llvm::raw_svector_ostream Out(Buffer);
11832   Out << Prefix << ISA << LMask << VLEN;
11833   if (OutputBecomesInput)
11834     Out << "v";
11835   Out << ParSeq << "_" << MangledName;
11836   Fn->addFnAttr(Out.str());
11837 }
11838 
11839 // Helper function to generate the Advanced SIMD names depending on
11840 // the value of the NDS when simdlen is not present.
11841 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11842                                       StringRef Prefix, char ISA,
11843                                       StringRef ParSeq, StringRef MangledName,
11844                                       bool OutputBecomesInput,
11845                                       llvm::Function *Fn) {
11846   switch (NDS) {
11847   case 8:
11848     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11849                          OutputBecomesInput, Fn);
11850     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11851                          OutputBecomesInput, Fn);
11852     break;
11853   case 16:
11854     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11855                          OutputBecomesInput, Fn);
11856     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11857                          OutputBecomesInput, Fn);
11858     break;
11859   case 32:
11860     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11861                          OutputBecomesInput, Fn);
11862     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11863                          OutputBecomesInput, Fn);
11864     break;
11865   case 64:
11866   case 128:
11867     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11868                          OutputBecomesInput, Fn);
11869     break;
11870   default:
11871     llvm_unreachable("Scalar type is too wide.");
11872   }
11873 }
11874 
11875 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11876 static void emitAArch64DeclareSimdFunction(
11877     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11878     ArrayRef<ParamAttrTy> ParamAttrs,
11879     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11880     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11881 
11882   // Get basic data for building the vector signature.
11883   const auto Data = getNDSWDS(FD, ParamAttrs);
11884   const unsigned NDS = std::get<0>(Data);
11885   const unsigned WDS = std::get<1>(Data);
11886   const bool OutputBecomesInput = std::get<2>(Data);
11887 
11888   // Check the values provided via `simdlen` by the user.
11889   // 1. A `simdlen(1)` doesn't produce vector signatures,
11890   if (UserVLEN == 1) {
11891     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11892         DiagnosticsEngine::Warning,
11893         "The clause simdlen(1) has no effect when targeting aarch64.");
11894     CGM.getDiags().Report(SLoc, DiagID);
11895     return;
11896   }
11897 
11898   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11899   // Advanced SIMD output.
11900   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11901     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11902         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11903                                     "power of 2 when targeting Advanced SIMD.");
11904     CGM.getDiags().Report(SLoc, DiagID);
11905     return;
11906   }
11907 
11908   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11909   // limits.
11910   if (ISA == 's' && UserVLEN != 0) {
11911     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11912       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11913           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11914                                       "lanes in the architectural constraints "
11915                                       "for SVE (min is 128-bit, max is "
11916                                       "2048-bit, by steps of 128-bit)");
11917       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11918       return;
11919     }
11920   }
11921 
11922   // Sort out parameter sequence.
11923   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11924   StringRef Prefix = "_ZGV";
11925   // Generate simdlen from user input (if any).
11926   if (UserVLEN) {
11927     if (ISA == 's') {
11928       // SVE generates only a masked function.
11929       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11930                            OutputBecomesInput, Fn);
11931     } else {
11932       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11933       // Advanced SIMD generates one or two functions, depending on
11934       // the `[not]inbranch` clause.
11935       switch (State) {
11936       case OMPDeclareSimdDeclAttr::BS_Undefined:
11937         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11938                              OutputBecomesInput, Fn);
11939         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11940                              OutputBecomesInput, Fn);
11941         break;
11942       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11943         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11944                              OutputBecomesInput, Fn);
11945         break;
11946       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11947         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11948                              OutputBecomesInput, Fn);
11949         break;
11950       }
11951     }
11952   } else {
11953     // If no user simdlen is provided, follow the AAVFABI rules for
11954     // generating the vector length.
11955     if (ISA == 's') {
11956       // SVE, section 3.4.1, item 1.
11957       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11958                            OutputBecomesInput, Fn);
11959     } else {
11960       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11961       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11962       // two vector names depending on the use of the clause
11963       // `[not]inbranch`.
11964       switch (State) {
11965       case OMPDeclareSimdDeclAttr::BS_Undefined:
11966         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11967                                   OutputBecomesInput, Fn);
11968         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11969                                   OutputBecomesInput, Fn);
11970         break;
11971       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11972         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11973                                   OutputBecomesInput, Fn);
11974         break;
11975       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11976         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11977                                   OutputBecomesInput, Fn);
11978         break;
11979       }
11980     }
11981   }
11982 }
11983 
11984 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11985                                               llvm::Function *Fn) {
11986   ASTContext &C = CGM.getContext();
11987   FD = FD->getMostRecentDecl();
11988   // Map params to their positions in function decl.
11989   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11990   if (isa<CXXMethodDecl>(FD))
11991     ParamPositions.try_emplace(FD, 0);
11992   unsigned ParamPos = ParamPositions.size();
11993   for (const ParmVarDecl *P : FD->parameters()) {
11994     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11995     ++ParamPos;
11996   }
11997   while (FD) {
11998     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11999       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
12000       // Mark uniform parameters.
12001       for (const Expr *E : Attr->uniforms()) {
12002         E = E->IgnoreParenImpCasts();
12003         unsigned Pos;
12004         if (isa<CXXThisExpr>(E)) {
12005           Pos = ParamPositions[FD];
12006         } else {
12007           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12008                                 ->getCanonicalDecl();
12009           Pos = ParamPositions[PVD];
12010         }
12011         ParamAttrs[Pos].Kind = Uniform;
12012       }
12013       // Get alignment info.
12014       auto *NI = Attr->alignments_begin();
12015       for (const Expr *E : Attr->aligneds()) {
12016         E = E->IgnoreParenImpCasts();
12017         unsigned Pos;
12018         QualType ParmTy;
12019         if (isa<CXXThisExpr>(E)) {
12020           Pos = ParamPositions[FD];
12021           ParmTy = E->getType();
12022         } else {
12023           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12024                                 ->getCanonicalDecl();
12025           Pos = ParamPositions[PVD];
12026           ParmTy = PVD->getType();
12027         }
12028         ParamAttrs[Pos].Alignment =
12029             (*NI)
12030                 ? (*NI)->EvaluateKnownConstInt(C)
12031                 : llvm::APSInt::getUnsigned(
12032                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12033                           .getQuantity());
12034         ++NI;
12035       }
12036       // Mark linear parameters.
12037       auto *SI = Attr->steps_begin();
12038       auto *MI = Attr->modifiers_begin();
12039       for (const Expr *E : Attr->linears()) {
12040         E = E->IgnoreParenImpCasts();
12041         unsigned Pos;
12042         // Rescaling factor needed to compute the linear parameter
12043         // value in the mangled name.
12044         unsigned PtrRescalingFactor = 1;
12045         if (isa<CXXThisExpr>(E)) {
12046           Pos = ParamPositions[FD];
12047         } else {
12048           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12049                                 ->getCanonicalDecl();
12050           Pos = ParamPositions[PVD];
12051           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12052             PtrRescalingFactor = CGM.getContext()
12053                                      .getTypeSizeInChars(P->getPointeeType())
12054                                      .getQuantity();
12055         }
12056         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12057         ParamAttr.Kind = Linear;
12058         // Assuming a stride of 1, for `linear` without modifiers.
12059         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12060         if (*SI) {
12061           Expr::EvalResult Result;
12062           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12063             if (const auto *DRE =
12064                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12065               if (const auto *StridePVD =
12066                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12067                 ParamAttr.Kind = LinearWithVarStride;
12068                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12069                     ParamPositions[StridePVD->getCanonicalDecl()]);
12070               }
12071             }
12072           } else {
12073             ParamAttr.StrideOrArg = Result.Val.getInt();
12074           }
12075         }
12076         // If we are using a linear clause on a pointer, we need to
12077         // rescale the value of linear_step with the byte size of the
12078         // pointee type.
12079         if (Linear == ParamAttr.Kind)
12080           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12081         ++SI;
12082         ++MI;
12083       }
12084       llvm::APSInt VLENVal;
12085       SourceLocation ExprLoc;
12086       const Expr *VLENExpr = Attr->getSimdlen();
12087       if (VLENExpr) {
12088         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12089         ExprLoc = VLENExpr->getExprLoc();
12090       }
12091       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12092       if (CGM.getTriple().isX86()) {
12093         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12094       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12095         unsigned VLEN = VLENVal.getExtValue();
12096         StringRef MangledName = Fn->getName();
12097         if (CGM.getTarget().hasFeature("sve"))
12098           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12099                                          MangledName, 's', 128, Fn, ExprLoc);
12100         if (CGM.getTarget().hasFeature("neon"))
12101           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12102                                          MangledName, 'n', 128, Fn, ExprLoc);
12103       }
12104     }
12105     FD = FD->getPreviousDecl();
12106   }
12107 }
12108 
12109 namespace {
12110 /// Cleanup action for doacross support.
12111 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12112 public:
12113   static const int DoacrossFinArgs = 2;
12114 
12115 private:
12116   llvm::FunctionCallee RTLFn;
12117   llvm::Value *Args[DoacrossFinArgs];
12118 
12119 public:
12120   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12121                     ArrayRef<llvm::Value *> CallArgs)
12122       : RTLFn(RTLFn) {
12123     assert(CallArgs.size() == DoacrossFinArgs);
12124     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12125   }
12126   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12127     if (!CGF.HaveInsertPoint())
12128       return;
12129     CGF.EmitRuntimeCall(RTLFn, Args);
12130   }
12131 };
12132 } // namespace
12133 
12134 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12135                                        const OMPLoopDirective &D,
12136                                        ArrayRef<Expr *> NumIterations) {
12137   if (!CGF.HaveInsertPoint())
12138     return;
12139 
12140   ASTContext &C = CGM.getContext();
12141   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12142   RecordDecl *RD;
12143   if (KmpDimTy.isNull()) {
12144     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12145     //  kmp_int64 lo; // lower
12146     //  kmp_int64 up; // upper
12147     //  kmp_int64 st; // stride
12148     // };
12149     RD = C.buildImplicitRecord("kmp_dim");
12150     RD->startDefinition();
12151     addFieldToRecordDecl(C, RD, Int64Ty);
12152     addFieldToRecordDecl(C, RD, Int64Ty);
12153     addFieldToRecordDecl(C, RD, Int64Ty);
12154     RD->completeDefinition();
12155     KmpDimTy = C.getRecordType(RD);
12156   } else {
12157     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12158   }
12159   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12160   QualType ArrayTy =
12161       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12162 
12163   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12164   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12165   enum { LowerFD = 0, UpperFD, StrideFD };
12166   // Fill dims with data.
12167   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12168     LValue DimsLVal = CGF.MakeAddrLValue(
12169         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12170     // dims.upper = num_iterations;
12171     LValue UpperLVal = CGF.EmitLValueForField(
12172         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12173     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12174         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12175         Int64Ty, NumIterations[I]->getExprLoc());
12176     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12177     // dims.stride = 1;
12178     LValue StrideLVal = CGF.EmitLValueForField(
12179         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12180     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12181                           StrideLVal);
12182   }
12183 
12184   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12185   // kmp_int32 num_dims, struct kmp_dim * dims);
12186   llvm::Value *Args[] = {
12187       emitUpdateLocation(CGF, D.getBeginLoc()),
12188       getThreadID(CGF, D.getBeginLoc()),
12189       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12190       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12191           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12192           CGM.VoidPtrTy)};
12193 
12194   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12195       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12196   CGF.EmitRuntimeCall(RTLFn, Args);
12197   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12198       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12199   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12200       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12201   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12202                                              llvm::makeArrayRef(FiniArgs));
12203 }
12204 
12205 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12206                                           const OMPDependClause *C) {
12207   QualType Int64Ty =
12208       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12209   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12210   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12211       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12212   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12213   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12214     const Expr *CounterVal = C->getLoopData(I);
12215     assert(CounterVal);
12216     llvm::Value *CntVal = CGF.EmitScalarConversion(
12217         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12218         CounterVal->getExprLoc());
12219     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12220                           /*Volatile=*/false, Int64Ty);
12221   }
12222   llvm::Value *Args[] = {
12223       emitUpdateLocation(CGF, C->getBeginLoc()),
12224       getThreadID(CGF, C->getBeginLoc()),
12225       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12226   llvm::FunctionCallee RTLFn;
12227   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12228     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12229                                                   OMPRTL___kmpc_doacross_post);
12230   } else {
12231     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12232     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12233                                                   OMPRTL___kmpc_doacross_wait);
12234   }
12235   CGF.EmitRuntimeCall(RTLFn, Args);
12236 }
12237 
12238 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12239                                llvm::FunctionCallee Callee,
12240                                ArrayRef<llvm::Value *> Args) const {
12241   assert(Loc.isValid() && "Outlined function call location must be valid.");
12242   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12243 
12244   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12245     if (Fn->doesNotThrow()) {
12246       CGF.EmitNounwindRuntimeCall(Fn, Args);
12247       return;
12248     }
12249   }
12250   CGF.EmitRuntimeCall(Callee, Args);
12251 }
12252 
12253 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12254     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12255     ArrayRef<llvm::Value *> Args) const {
12256   emitCall(CGF, Loc, OutlinedFn, Args);
12257 }
12258 
12259 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12260   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12261     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12262       HasEmittedDeclareTargetRegion = true;
12263 }
12264 
12265 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12266                                              const VarDecl *NativeParam,
12267                                              const VarDecl *TargetParam) const {
12268   return CGF.GetAddrOfLocalVar(NativeParam);
12269 }
12270 
12271 /// Return allocator value from expression, or return a null allocator (default
12272 /// when no allocator specified).
12273 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12274                                     const Expr *Allocator) {
12275   llvm::Value *AllocVal;
12276   if (Allocator) {
12277     AllocVal = CGF.EmitScalarExpr(Allocator);
12278     // According to the standard, the original allocator type is a enum
12279     // (integer). Convert to pointer type, if required.
12280     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12281                                         CGF.getContext().VoidPtrTy,
12282                                         Allocator->getExprLoc());
12283   } else {
12284     // If no allocator specified, it defaults to the null allocator.
12285     AllocVal = llvm::Constant::getNullValue(
12286         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12287   }
12288   return AllocVal;
12289 }
12290 
12291 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12292                                                    const VarDecl *VD) {
12293   if (!VD)
12294     return Address::invalid();
12295   Address UntiedAddr = Address::invalid();
12296   Address UntiedRealAddr = Address::invalid();
12297   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12298   if (It != FunctionToUntiedTaskStackMap.end()) {
12299     const UntiedLocalVarsAddressesMap &UntiedData =
12300         UntiedLocalVarsStack[It->second];
12301     auto I = UntiedData.find(VD);
12302     if (I != UntiedData.end()) {
12303       UntiedAddr = I->second.first;
12304       UntiedRealAddr = I->second.second;
12305     }
12306   }
12307   const VarDecl *CVD = VD->getCanonicalDecl();
12308   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12309     // Use the default allocation.
12310     if (!isAllocatableDecl(VD))
12311       return UntiedAddr;
12312     llvm::Value *Size;
12313     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12314     if (CVD->getType()->isVariablyModifiedType()) {
12315       Size = CGF.getTypeSize(CVD->getType());
12316       // Align the size: ((size + align - 1) / align) * align
12317       Size = CGF.Builder.CreateNUWAdd(
12318           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12319       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12320       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12321     } else {
12322       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12323       Size = CGM.getSize(Sz.alignTo(Align));
12324     }
12325     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12326     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12327     const Expr *Allocator = AA->getAllocator();
12328     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12329     llvm::Value *Alignment =
12330         AA->getAlignment()
12331             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12332                                         CGM.SizeTy, /*isSigned=*/false)
12333             : nullptr;
12334     SmallVector<llvm::Value *, 4> Args;
12335     Args.push_back(ThreadID);
12336     if (Alignment)
12337       Args.push_back(Alignment);
12338     Args.push_back(Size);
12339     Args.push_back(AllocVal);
12340     llvm::omp::RuntimeFunction FnID =
12341         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12342     llvm::Value *Addr = CGF.EmitRuntimeCall(
12343         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12344         getName({CVD->getName(), ".void.addr"}));
12345     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12346         CGM.getModule(), OMPRTL___kmpc_free);
12347     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12348     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12349         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12350     if (UntiedAddr.isValid())
12351       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12352 
12353     // Cleanup action for allocate support.
12354     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12355       llvm::FunctionCallee RTLFn;
12356       SourceLocation::UIntTy LocEncoding;
12357       Address Addr;
12358       const Expr *AllocExpr;
12359 
12360     public:
12361       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12362                            SourceLocation::UIntTy LocEncoding, Address Addr,
12363                            const Expr *AllocExpr)
12364           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12365             AllocExpr(AllocExpr) {}
12366       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12367         if (!CGF.HaveInsertPoint())
12368           return;
12369         llvm::Value *Args[3];
12370         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12371             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12372         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12373             Addr.getPointer(), CGF.VoidPtrTy);
12374         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12375         Args[2] = AllocVal;
12376         CGF.EmitRuntimeCall(RTLFn, Args);
12377       }
12378     };
12379     Address VDAddr = UntiedRealAddr.isValid()
12380                          ? UntiedRealAddr
12381                          : Address::deprecated(Addr, Align);
12382     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12383         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12384         VDAddr, Allocator);
12385     if (UntiedRealAddr.isValid())
12386       if (auto *Region =
12387               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12388         Region->emitUntiedSwitch(CGF);
12389     return VDAddr;
12390   }
12391   return UntiedAddr;
12392 }
12393 
12394 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12395                                              const VarDecl *VD) const {
12396   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12397   if (It == FunctionToUntiedTaskStackMap.end())
12398     return false;
12399   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12400 }
12401 
12402 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12403     CodeGenModule &CGM, const OMPLoopDirective &S)
12404     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12405   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12406   if (!NeedToPush)
12407     return;
12408   NontemporalDeclsSet &DS =
12409       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12410   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12411     for (const Stmt *Ref : C->private_refs()) {
12412       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12413       const ValueDecl *VD;
12414       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12415         VD = DRE->getDecl();
12416       } else {
12417         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12418         assert((ME->isImplicitCXXThis() ||
12419                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12420                "Expected member of current class.");
12421         VD = ME->getMemberDecl();
12422       }
12423       DS.insert(VD);
12424     }
12425   }
12426 }
12427 
12428 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12429   if (!NeedToPush)
12430     return;
12431   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12432 }
12433 
12434 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12435     CodeGenFunction &CGF,
12436     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12437                           std::pair<Address, Address>> &LocalVars)
12438     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12439   if (!NeedToPush)
12440     return;
12441   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12442       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12443   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12444 }
12445 
12446 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12447   if (!NeedToPush)
12448     return;
12449   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12450 }
12451 
12452 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12453   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12454 
12455   return llvm::any_of(
12456       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12457       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12458 }
12459 
12460 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12461     const OMPExecutableDirective &S,
12462     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12463     const {
12464   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12465   // Vars in target/task regions must be excluded completely.
12466   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12467       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12468     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12469     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12470     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12471     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12472       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12473         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12474     }
12475   }
12476   // Exclude vars in private clauses.
12477   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12478     for (const Expr *Ref : C->varlists()) {
12479       if (!Ref->getType()->isScalarType())
12480         continue;
12481       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12482       if (!DRE)
12483         continue;
12484       NeedToCheckForLPCs.insert(DRE->getDecl());
12485     }
12486   }
12487   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12488     for (const Expr *Ref : C->varlists()) {
12489       if (!Ref->getType()->isScalarType())
12490         continue;
12491       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12492       if (!DRE)
12493         continue;
12494       NeedToCheckForLPCs.insert(DRE->getDecl());
12495     }
12496   }
12497   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12498     for (const Expr *Ref : C->varlists()) {
12499       if (!Ref->getType()->isScalarType())
12500         continue;
12501       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12502       if (!DRE)
12503         continue;
12504       NeedToCheckForLPCs.insert(DRE->getDecl());
12505     }
12506   }
12507   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12508     for (const Expr *Ref : C->varlists()) {
12509       if (!Ref->getType()->isScalarType())
12510         continue;
12511       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12512       if (!DRE)
12513         continue;
12514       NeedToCheckForLPCs.insert(DRE->getDecl());
12515     }
12516   }
12517   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12518     for (const Expr *Ref : C->varlists()) {
12519       if (!Ref->getType()->isScalarType())
12520         continue;
12521       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12522       if (!DRE)
12523         continue;
12524       NeedToCheckForLPCs.insert(DRE->getDecl());
12525     }
12526   }
12527   for (const Decl *VD : NeedToCheckForLPCs) {
12528     for (const LastprivateConditionalData &Data :
12529          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12530       if (Data.DeclToUniqueName.count(VD) > 0) {
12531         if (!Data.Disabled)
12532           NeedToAddForLPCsAsDisabled.insert(VD);
12533         break;
12534       }
12535     }
12536   }
12537 }
12538 
12539 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12540     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12541     : CGM(CGF.CGM),
12542       Action((CGM.getLangOpts().OpenMP >= 50 &&
12543               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12544                            [](const OMPLastprivateClause *C) {
12545                              return C->getKind() ==
12546                                     OMPC_LASTPRIVATE_conditional;
12547                            }))
12548                  ? ActionToDo::PushAsLastprivateConditional
12549                  : ActionToDo::DoNotPush) {
12550   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12551   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12552     return;
12553   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12554          "Expected a push action.");
12555   LastprivateConditionalData &Data =
12556       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12557   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12558     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12559       continue;
12560 
12561     for (const Expr *Ref : C->varlists()) {
12562       Data.DeclToUniqueName.insert(std::make_pair(
12563           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12564           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12565     }
12566   }
12567   Data.IVLVal = IVLVal;
12568   Data.Fn = CGF.CurFn;
12569 }
12570 
12571 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12572     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12573     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12574   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12575   if (CGM.getLangOpts().OpenMP < 50)
12576     return;
12577   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12578   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12579   if (!NeedToAddForLPCsAsDisabled.empty()) {
12580     Action = ActionToDo::DisableLastprivateConditional;
12581     LastprivateConditionalData &Data =
12582         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12583     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12584       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12585     Data.Fn = CGF.CurFn;
12586     Data.Disabled = true;
12587   }
12588 }
12589 
12590 CGOpenMPRuntime::LastprivateConditionalRAII
12591 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12592     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12593   return LastprivateConditionalRAII(CGF, S);
12594 }
12595 
12596 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12597   if (CGM.getLangOpts().OpenMP < 50)
12598     return;
12599   if (Action == ActionToDo::DisableLastprivateConditional) {
12600     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12601            "Expected list of disabled private vars.");
12602     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12603   }
12604   if (Action == ActionToDo::PushAsLastprivateConditional) {
12605     assert(
12606         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12607         "Expected list of lastprivate conditional vars.");
12608     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12609   }
12610 }
12611 
12612 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12613                                                         const VarDecl *VD) {
12614   ASTContext &C = CGM.getContext();
12615   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12616   if (I == LastprivateConditionalToTypes.end())
12617     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12618   QualType NewType;
12619   const FieldDecl *VDField;
12620   const FieldDecl *FiredField;
12621   LValue BaseLVal;
12622   auto VI = I->getSecond().find(VD);
12623   if (VI == I->getSecond().end()) {
12624     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12625     RD->startDefinition();
12626     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12627     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12628     RD->completeDefinition();
12629     NewType = C.getRecordType(RD);
12630     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12631     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12632     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12633   } else {
12634     NewType = std::get<0>(VI->getSecond());
12635     VDField = std::get<1>(VI->getSecond());
12636     FiredField = std::get<2>(VI->getSecond());
12637     BaseLVal = std::get<3>(VI->getSecond());
12638   }
12639   LValue FiredLVal =
12640       CGF.EmitLValueForField(BaseLVal, FiredField);
12641   CGF.EmitStoreOfScalar(
12642       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12643       FiredLVal);
12644   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12645 }
12646 
12647 namespace {
12648 /// Checks if the lastprivate conditional variable is referenced in LHS.
12649 class LastprivateConditionalRefChecker final
12650     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12651   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12652   const Expr *FoundE = nullptr;
12653   const Decl *FoundD = nullptr;
12654   StringRef UniqueDeclName;
12655   LValue IVLVal;
12656   llvm::Function *FoundFn = nullptr;
12657   SourceLocation Loc;
12658 
12659 public:
12660   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12661     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12662          llvm::reverse(LPM)) {
12663       auto It = D.DeclToUniqueName.find(E->getDecl());
12664       if (It == D.DeclToUniqueName.end())
12665         continue;
12666       if (D.Disabled)
12667         return false;
12668       FoundE = E;
12669       FoundD = E->getDecl()->getCanonicalDecl();
12670       UniqueDeclName = It->second;
12671       IVLVal = D.IVLVal;
12672       FoundFn = D.Fn;
12673       break;
12674     }
12675     return FoundE == E;
12676   }
12677   bool VisitMemberExpr(const MemberExpr *E) {
12678     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12679       return false;
12680     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12681          llvm::reverse(LPM)) {
12682       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12683       if (It == D.DeclToUniqueName.end())
12684         continue;
12685       if (D.Disabled)
12686         return false;
12687       FoundE = E;
12688       FoundD = E->getMemberDecl()->getCanonicalDecl();
12689       UniqueDeclName = It->second;
12690       IVLVal = D.IVLVal;
12691       FoundFn = D.Fn;
12692       break;
12693     }
12694     return FoundE == E;
12695   }
12696   bool VisitStmt(const Stmt *S) {
12697     for (const Stmt *Child : S->children()) {
12698       if (!Child)
12699         continue;
12700       if (const auto *E = dyn_cast<Expr>(Child))
12701         if (!E->isGLValue())
12702           continue;
12703       if (Visit(Child))
12704         return true;
12705     }
12706     return false;
12707   }
12708   explicit LastprivateConditionalRefChecker(
12709       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12710       : LPM(LPM) {}
12711   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12712   getFoundData() const {
12713     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12714   }
12715 };
12716 } // namespace
12717 
12718 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12719                                                        LValue IVLVal,
12720                                                        StringRef UniqueDeclName,
12721                                                        LValue LVal,
12722                                                        SourceLocation Loc) {
12723   // Last updated loop counter for the lastprivate conditional var.
12724   // int<xx> last_iv = 0;
12725   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12726   llvm::Constant *LastIV =
12727       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12728   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12729       IVLVal.getAlignment().getAsAlign());
12730   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12731 
12732   // Last value of the lastprivate conditional.
12733   // decltype(priv_a) last_a;
12734   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12735       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12736   Last->setAlignment(LVal.getAlignment().getAsAlign());
12737   LValue LastLVal = CGF.MakeAddrLValue(
12738       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12739 
12740   // Global loop counter. Required to handle inner parallel-for regions.
12741   // iv
12742   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12743 
12744   // #pragma omp critical(a)
12745   // if (last_iv <= iv) {
12746   //   last_iv = iv;
12747   //   last_a = priv_a;
12748   // }
12749   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12750                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12751     Action.Enter(CGF);
12752     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12753     // (last_iv <= iv) ? Check if the variable is updated and store new
12754     // value in global var.
12755     llvm::Value *CmpRes;
12756     if (IVLVal.getType()->isSignedIntegerType()) {
12757       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12758     } else {
12759       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12760              "Loop iteration variable must be integer.");
12761       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12762     }
12763     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12764     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12765     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12766     // {
12767     CGF.EmitBlock(ThenBB);
12768 
12769     //   last_iv = iv;
12770     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12771 
12772     //   last_a = priv_a;
12773     switch (CGF.getEvaluationKind(LVal.getType())) {
12774     case TEK_Scalar: {
12775       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12776       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12777       break;
12778     }
12779     case TEK_Complex: {
12780       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12781       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12782       break;
12783     }
12784     case TEK_Aggregate:
12785       llvm_unreachable(
12786           "Aggregates are not supported in lastprivate conditional.");
12787     }
12788     // }
12789     CGF.EmitBranch(ExitBB);
12790     // There is no need to emit line number for unconditional branch.
12791     (void)ApplyDebugLocation::CreateEmpty(CGF);
12792     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12793   };
12794 
12795   if (CGM.getLangOpts().OpenMPSimd) {
12796     // Do not emit as a critical region as no parallel region could be emitted.
12797     RegionCodeGenTy ThenRCG(CodeGen);
12798     ThenRCG(CGF);
12799   } else {
12800     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12801   }
12802 }
12803 
12804 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12805                                                          const Expr *LHS) {
12806   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12807     return;
12808   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12809   if (!Checker.Visit(LHS))
12810     return;
12811   const Expr *FoundE;
12812   const Decl *FoundD;
12813   StringRef UniqueDeclName;
12814   LValue IVLVal;
12815   llvm::Function *FoundFn;
12816   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12817       Checker.getFoundData();
12818   if (FoundFn != CGF.CurFn) {
12819     // Special codegen for inner parallel regions.
12820     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12821     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12822     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12823            "Lastprivate conditional is not found in outer region.");
12824     QualType StructTy = std::get<0>(It->getSecond());
12825     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12826     LValue PrivLVal = CGF.EmitLValue(FoundE);
12827     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12828         PrivLVal.getAddress(CGF),
12829         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12830     LValue BaseLVal =
12831         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12832     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12833     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12834                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12835                         FiredLVal, llvm::AtomicOrdering::Unordered,
12836                         /*IsVolatile=*/true, /*isInit=*/false);
12837     return;
12838   }
12839 
12840   // Private address of the lastprivate conditional in the current context.
12841   // priv_a
12842   LValue LVal = CGF.EmitLValue(FoundE);
12843   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12844                                    FoundE->getExprLoc());
12845 }
12846 
12847 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12848     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12849     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12850   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12851     return;
12852   auto Range = llvm::reverse(LastprivateConditionalStack);
12853   auto It = llvm::find_if(
12854       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12855   if (It == Range.end() || It->Fn != CGF.CurFn)
12856     return;
12857   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12858   assert(LPCI != LastprivateConditionalToTypes.end() &&
12859          "Lastprivates must be registered already.");
12860   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12861   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12862   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12863   for (const auto &Pair : It->DeclToUniqueName) {
12864     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12865     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12866       continue;
12867     auto I = LPCI->getSecond().find(Pair.first);
12868     assert(I != LPCI->getSecond().end() &&
12869            "Lastprivate must be rehistered already.");
12870     // bool Cmp = priv_a.Fired != 0;
12871     LValue BaseLVal = std::get<3>(I->getSecond());
12872     LValue FiredLVal =
12873         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12874     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12875     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12876     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12877     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12878     // if (Cmp) {
12879     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12880     CGF.EmitBlock(ThenBB);
12881     Address Addr = CGF.GetAddrOfLocalVar(VD);
12882     LValue LVal;
12883     if (VD->getType()->isReferenceType())
12884       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12885                                            AlignmentSource::Decl);
12886     else
12887       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12888                                 AlignmentSource::Decl);
12889     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12890                                      D.getBeginLoc());
12891     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12892     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12893     // }
12894   }
12895 }
12896 
12897 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12898     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12899     SourceLocation Loc) {
12900   if (CGF.getLangOpts().OpenMP < 50)
12901     return;
12902   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12903   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12904          "Unknown lastprivate conditional variable.");
12905   StringRef UniqueName = It->second;
12906   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12907   // The variable was not updated in the region - exit.
12908   if (!GV)
12909     return;
12910   LValue LPLVal = CGF.MakeAddrLValue(
12911       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12912       PrivLVal.getType().getNonReferenceType());
12913   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12914   CGF.EmitStoreOfScalar(Res, PrivLVal);
12915 }
12916 
12917 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12918     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12919     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12920   llvm_unreachable("Not supported in SIMD-only mode");
12921 }
12922 
12923 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12924     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12925     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12926   llvm_unreachable("Not supported in SIMD-only mode");
12927 }
12928 
12929 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12930     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12931     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12932     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12933     bool Tied, unsigned &NumberOfParts) {
12934   llvm_unreachable("Not supported in SIMD-only mode");
12935 }
12936 
12937 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12938                                            SourceLocation Loc,
12939                                            llvm::Function *OutlinedFn,
12940                                            ArrayRef<llvm::Value *> CapturedVars,
12941                                            const Expr *IfCond,
12942                                            llvm::Value *NumThreads) {
12943   llvm_unreachable("Not supported in SIMD-only mode");
12944 }
12945 
12946 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12947     CodeGenFunction &CGF, StringRef CriticalName,
12948     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12949     const Expr *Hint) {
12950   llvm_unreachable("Not supported in SIMD-only mode");
12951 }
12952 
12953 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12954                                            const RegionCodeGenTy &MasterOpGen,
12955                                            SourceLocation Loc) {
12956   llvm_unreachable("Not supported in SIMD-only mode");
12957 }
12958 
12959 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12960                                            const RegionCodeGenTy &MasterOpGen,
12961                                            SourceLocation Loc,
12962                                            const Expr *Filter) {
12963   llvm_unreachable("Not supported in SIMD-only mode");
12964 }
12965 
12966 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12967                                             SourceLocation Loc) {
12968   llvm_unreachable("Not supported in SIMD-only mode");
12969 }
12970 
12971 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12972     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12973     SourceLocation Loc) {
12974   llvm_unreachable("Not supported in SIMD-only mode");
12975 }
12976 
12977 void CGOpenMPSIMDRuntime::emitSingleRegion(
12978     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12979     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12980     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12981     ArrayRef<const Expr *> AssignmentOps) {
12982   llvm_unreachable("Not supported in SIMD-only mode");
12983 }
12984 
12985 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12986                                             const RegionCodeGenTy &OrderedOpGen,
12987                                             SourceLocation Loc,
12988                                             bool IsThreads) {
12989   llvm_unreachable("Not supported in SIMD-only mode");
12990 }
12991 
12992 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12993                                           SourceLocation Loc,
12994                                           OpenMPDirectiveKind Kind,
12995                                           bool EmitChecks,
12996                                           bool ForceSimpleCall) {
12997   llvm_unreachable("Not supported in SIMD-only mode");
12998 }
12999 
13000 void CGOpenMPSIMDRuntime::emitForDispatchInit(
13001     CodeGenFunction &CGF, SourceLocation Loc,
13002     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13003     bool Ordered, const DispatchRTInput &DispatchValues) {
13004   llvm_unreachable("Not supported in SIMD-only mode");
13005 }
13006 
13007 void CGOpenMPSIMDRuntime::emitForStaticInit(
13008     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
13009     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13010   llvm_unreachable("Not supported in SIMD-only mode");
13011 }
13012 
13013 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
13014     CodeGenFunction &CGF, SourceLocation Loc,
13015     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13016   llvm_unreachable("Not supported in SIMD-only mode");
13017 }
13018 
13019 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
13020                                                      SourceLocation Loc,
13021                                                      unsigned IVSize,
13022                                                      bool IVSigned) {
13023   llvm_unreachable("Not supported in SIMD-only mode");
13024 }
13025 
13026 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
13027                                               SourceLocation Loc,
13028                                               OpenMPDirectiveKind DKind) {
13029   llvm_unreachable("Not supported in SIMD-only mode");
13030 }
13031 
13032 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
13033                                               SourceLocation Loc,
13034                                               unsigned IVSize, bool IVSigned,
13035                                               Address IL, Address LB,
13036                                               Address UB, Address ST) {
13037   llvm_unreachable("Not supported in SIMD-only mode");
13038 }
13039 
13040 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
13041                                                llvm::Value *NumThreads,
13042                                                SourceLocation Loc) {
13043   llvm_unreachable("Not supported in SIMD-only mode");
13044 }
13045 
13046 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13047                                              ProcBindKind ProcBind,
13048                                              SourceLocation Loc) {
13049   llvm_unreachable("Not supported in SIMD-only mode");
13050 }
13051 
13052 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13053                                                     const VarDecl *VD,
13054                                                     Address VDAddr,
13055                                                     SourceLocation Loc) {
13056   llvm_unreachable("Not supported in SIMD-only mode");
13057 }
13058 
13059 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13060     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13061     CodeGenFunction *CGF) {
13062   llvm_unreachable("Not supported in SIMD-only mode");
13063 }
13064 
13065 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13066     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13067   llvm_unreachable("Not supported in SIMD-only mode");
13068 }
13069 
13070 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13071                                     ArrayRef<const Expr *> Vars,
13072                                     SourceLocation Loc,
13073                                     llvm::AtomicOrdering AO) {
13074   llvm_unreachable("Not supported in SIMD-only mode");
13075 }
13076 
13077 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13078                                        const OMPExecutableDirective &D,
13079                                        llvm::Function *TaskFunction,
13080                                        QualType SharedsTy, Address Shareds,
13081                                        const Expr *IfCond,
13082                                        const OMPTaskDataTy &Data) {
13083   llvm_unreachable("Not supported in SIMD-only mode");
13084 }
13085 
13086 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13087     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13088     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13089     const Expr *IfCond, const OMPTaskDataTy &Data) {
13090   llvm_unreachable("Not supported in SIMD-only mode");
13091 }
13092 
13093 void CGOpenMPSIMDRuntime::emitReduction(
13094     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13095     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13096     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13097   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13098   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13099                                  ReductionOps, Options);
13100 }
13101 
13102 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13103     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13104     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13105   llvm_unreachable("Not supported in SIMD-only mode");
13106 }
13107 
13108 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13109                                                 SourceLocation Loc,
13110                                                 bool IsWorksharingReduction) {
13111   llvm_unreachable("Not supported in SIMD-only mode");
13112 }
13113 
13114 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13115                                                   SourceLocation Loc,
13116                                                   ReductionCodeGen &RCG,
13117                                                   unsigned N) {
13118   llvm_unreachable("Not supported in SIMD-only mode");
13119 }
13120 
13121 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13122                                                   SourceLocation Loc,
13123                                                   llvm::Value *ReductionsPtr,
13124                                                   LValue SharedLVal) {
13125   llvm_unreachable("Not supported in SIMD-only mode");
13126 }
13127 
13128 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13129                                            SourceLocation Loc,
13130                                            const OMPTaskDataTy &Data) {
13131   llvm_unreachable("Not supported in SIMD-only mode");
13132 }
13133 
13134 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13135     CodeGenFunction &CGF, SourceLocation Loc,
13136     OpenMPDirectiveKind CancelRegion) {
13137   llvm_unreachable("Not supported in SIMD-only mode");
13138 }
13139 
13140 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13141                                          SourceLocation Loc, const Expr *IfCond,
13142                                          OpenMPDirectiveKind CancelRegion) {
13143   llvm_unreachable("Not supported in SIMD-only mode");
13144 }
13145 
13146 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13147     const OMPExecutableDirective &D, StringRef ParentName,
13148     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13149     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13150   llvm_unreachable("Not supported in SIMD-only mode");
13151 }
13152 
13153 void CGOpenMPSIMDRuntime::emitTargetCall(
13154     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13155     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13156     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13157     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13158                                      const OMPLoopDirective &D)>
13159         SizeEmitter) {
13160   llvm_unreachable("Not supported in SIMD-only mode");
13161 }
13162 
13163 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13164   llvm_unreachable("Not supported in SIMD-only mode");
13165 }
13166 
13167 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13168   llvm_unreachable("Not supported in SIMD-only mode");
13169 }
13170 
13171 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13172   return false;
13173 }
13174 
13175 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13176                                         const OMPExecutableDirective &D,
13177                                         SourceLocation Loc,
13178                                         llvm::Function *OutlinedFn,
13179                                         ArrayRef<llvm::Value *> CapturedVars) {
13180   llvm_unreachable("Not supported in SIMD-only mode");
13181 }
13182 
13183 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13184                                              const Expr *NumTeams,
13185                                              const Expr *ThreadLimit,
13186                                              SourceLocation Loc) {
13187   llvm_unreachable("Not supported in SIMD-only mode");
13188 }
13189 
13190 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13191     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13192     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13193   llvm_unreachable("Not supported in SIMD-only mode");
13194 }
13195 
13196 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13197     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13198     const Expr *Device) {
13199   llvm_unreachable("Not supported in SIMD-only mode");
13200 }
13201 
13202 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13203                                            const OMPLoopDirective &D,
13204                                            ArrayRef<Expr *> NumIterations) {
13205   llvm_unreachable("Not supported in SIMD-only mode");
13206 }
13207 
13208 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13209                                               const OMPDependClause *C) {
13210   llvm_unreachable("Not supported in SIMD-only mode");
13211 }
13212 
13213 const VarDecl *
13214 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13215                                         const VarDecl *NativeParam) const {
13216   llvm_unreachable("Not supported in SIMD-only mode");
13217 }
13218 
13219 Address
13220 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13221                                          const VarDecl *NativeParam,
13222                                          const VarDecl *TargetParam) const {
13223   llvm_unreachable("Not supported in SIMD-only mode");
13224 }
13225