1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/Support/AtomicOrdering.h"
40 #include "llvm/Support/Format.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <cassert>
43 #include <numeric>
44 
45 using namespace clang;
46 using namespace CodeGen;
47 using namespace llvm::omp;
48 
49 namespace {
50 /// Base class for handling code generation inside OpenMP regions.
51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
52 public:
53   /// Kinds of OpenMP regions used in codegen.
54   enum CGOpenMPRegionKind {
55     /// Region with outlined function for standalone 'parallel'
56     /// directive.
57     ParallelOutlinedRegion,
58     /// Region with outlined function for standalone 'task' directive.
59     TaskOutlinedRegion,
60     /// Region for constructs that do not require function outlining,
61     /// like 'for', 'sections', 'atomic' etc. directives.
62     InlinedRegion,
63     /// Region with outlined function for standalone 'target' directive.
64     TargetRegion,
65   };
66 
67   CGOpenMPRegionInfo(const CapturedStmt &CS,
68                      const CGOpenMPRegionKind RegionKind,
69                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
70                      bool HasCancel)
71       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
72         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
73 
74   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
75                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
76                      bool HasCancel)
77       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
78         Kind(Kind), HasCancel(HasCancel) {}
79 
80   /// Get a variable or parameter for storing global thread id
81   /// inside OpenMP construct.
82   virtual const VarDecl *getThreadIDVariable() const = 0;
83 
84   /// Emit the captured statement body.
85   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
86 
87   /// Get an LValue for the current ThreadID variable.
88   /// \return LValue for thread id variable. This LValue always has type int32*.
89   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
90 
91   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
92 
93   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
94 
95   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
96 
97   bool hasCancel() const { return HasCancel; }
98 
99   static bool classof(const CGCapturedStmtInfo *Info) {
100     return Info->getKind() == CR_OpenMP;
101   }
102 
103   ~CGOpenMPRegionInfo() override = default;
104 
105 protected:
106   CGOpenMPRegionKind RegionKind;
107   RegionCodeGenTy CodeGen;
108   OpenMPDirectiveKind Kind;
109   bool HasCancel;
110 };
111 
112 /// API for captured statement code generation in OpenMP constructs.
113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
114 public:
115   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
116                              const RegionCodeGenTy &CodeGen,
117                              OpenMPDirectiveKind Kind, bool HasCancel,
118                              StringRef HelperName)
119       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
120                            HasCancel),
121         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
122     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
123   }
124 
125   /// Get a variable or parameter for storing global thread id
126   /// inside OpenMP construct.
127   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
128 
129   /// Get the name of the capture helper.
130   StringRef getHelperName() const override { return HelperName; }
131 
132   static bool classof(const CGCapturedStmtInfo *Info) {
133     return CGOpenMPRegionInfo::classof(Info) &&
134            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135                ParallelOutlinedRegion;
136   }
137 
138 private:
139   /// A variable or parameter storing global thread id for OpenMP
140   /// constructs.
141   const VarDecl *ThreadIDVar;
142   StringRef HelperName;
143 };
144 
145 /// API for captured statement code generation in OpenMP constructs.
146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
147 public:
148   class UntiedTaskActionTy final : public PrePostActionTy {
149     bool Untied;
150     const VarDecl *PartIDVar;
151     const RegionCodeGenTy UntiedCodeGen;
152     llvm::SwitchInst *UntiedSwitch = nullptr;
153 
154   public:
155     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
156                        const RegionCodeGenTy &UntiedCodeGen)
157         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
158     void Enter(CodeGenFunction &CGF) override {
159       if (Untied) {
160         // Emit task switching point.
161         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
162             CGF.GetAddrOfLocalVar(PartIDVar),
163             PartIDVar->getType()->castAs<PointerType>());
164         llvm::Value *Res =
165             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
166         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
167         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
168         CGF.EmitBlock(DoneBB);
169         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
170         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
171         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
172                               CGF.Builder.GetInsertBlock());
173         emitUntiedSwitch(CGF);
174       }
175     }
176     void emitUntiedSwitch(CodeGenFunction &CGF) const {
177       if (Untied) {
178         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
179             CGF.GetAddrOfLocalVar(PartIDVar),
180             PartIDVar->getType()->castAs<PointerType>());
181         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
182                               PartIdLVal);
183         UntiedCodeGen(CGF);
184         CodeGenFunction::JumpDest CurPoint =
185             CGF.getJumpDestInCurrentScope(".untied.next.");
186         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
187         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
188         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
189                               CGF.Builder.GetInsertBlock());
190         CGF.EmitBranchThroughCleanup(CurPoint);
191         CGF.EmitBlock(CurPoint.getBlock());
192       }
193     }
194     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
195   };
196   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
197                                  const VarDecl *ThreadIDVar,
198                                  const RegionCodeGenTy &CodeGen,
199                                  OpenMPDirectiveKind Kind, bool HasCancel,
200                                  const UntiedTaskActionTy &Action)
201       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
202         ThreadIDVar(ThreadIDVar), Action(Action) {
203     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
204   }
205 
206   /// Get a variable or parameter for storing global thread id
207   /// inside OpenMP construct.
208   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
209 
210   /// Get an LValue for the current ThreadID variable.
211   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
212 
213   /// Get the name of the capture helper.
214   StringRef getHelperName() const override { return ".omp_outlined."; }
215 
216   void emitUntiedSwitch(CodeGenFunction &CGF) override {
217     Action.emitUntiedSwitch(CGF);
218   }
219 
220   static bool classof(const CGCapturedStmtInfo *Info) {
221     return CGOpenMPRegionInfo::classof(Info) &&
222            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
223                TaskOutlinedRegion;
224   }
225 
226 private:
227   /// A variable or parameter storing global thread id for OpenMP
228   /// constructs.
229   const VarDecl *ThreadIDVar;
230   /// Action for emitting code for untied tasks.
231   const UntiedTaskActionTy &Action;
232 };
233 
234 /// API for inlined captured statement code generation in OpenMP
235 /// constructs.
236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
237 public:
238   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
239                             const RegionCodeGenTy &CodeGen,
240                             OpenMPDirectiveKind Kind, bool HasCancel)
241       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
242         OldCSI(OldCSI),
243         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
244 
245   // Retrieve the value of the context parameter.
246   llvm::Value *getContextValue() const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->getContextValue();
249     llvm_unreachable("No context value for inlined OpenMP region");
250   }
251 
252   void setContextValue(llvm::Value *V) override {
253     if (OuterRegionInfo) {
254       OuterRegionInfo->setContextValue(V);
255       return;
256     }
257     llvm_unreachable("No context value for inlined OpenMP region");
258   }
259 
260   /// Lookup the captured field decl for a variable.
261   const FieldDecl *lookup(const VarDecl *VD) const override {
262     if (OuterRegionInfo)
263       return OuterRegionInfo->lookup(VD);
264     // If there is no outer outlined region,no need to lookup in a list of
265     // captured variables, we can use the original one.
266     return nullptr;
267   }
268 
269   FieldDecl *getThisFieldDecl() const override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThisFieldDecl();
272     return nullptr;
273   }
274 
275   /// Get a variable or parameter for storing global thread id
276   /// inside OpenMP construct.
277   const VarDecl *getThreadIDVariable() const override {
278     if (OuterRegionInfo)
279       return OuterRegionInfo->getThreadIDVariable();
280     return nullptr;
281   }
282 
283   /// Get an LValue for the current ThreadID variable.
284   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
285     if (OuterRegionInfo)
286       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
287     llvm_unreachable("No LValue for inlined OpenMP construct");
288   }
289 
290   /// Get the name of the capture helper.
291   StringRef getHelperName() const override {
292     if (auto *OuterRegionInfo = getOldCSI())
293       return OuterRegionInfo->getHelperName();
294     llvm_unreachable("No helper name for inlined OpenMP construct");
295   }
296 
297   void emitUntiedSwitch(CodeGenFunction &CGF) override {
298     if (OuterRegionInfo)
299       OuterRegionInfo->emitUntiedSwitch(CGF);
300   }
301 
302   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
303 
304   static bool classof(const CGCapturedStmtInfo *Info) {
305     return CGOpenMPRegionInfo::classof(Info) &&
306            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
307   }
308 
309   ~CGOpenMPInlinedRegionInfo() override = default;
310 
311 private:
312   /// CodeGen info about outer OpenMP region.
313   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
314   CGOpenMPRegionInfo *OuterRegionInfo;
315 };
316 
317 /// API for captured statement code generation in OpenMP target
318 /// constructs. For this captures, implicit parameters are used instead of the
319 /// captured fields. The name of the target region has to be unique in a given
320 /// application so it is provided by the client, because only the client has
321 /// the information to generate that.
322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
323 public:
324   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
325                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
326       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
327                            /*HasCancel=*/false),
328         HelperName(HelperName) {}
329 
330   /// This is unused for target regions because each starts executing
331   /// with a single thread.
332   const VarDecl *getThreadIDVariable() const override { return nullptr; }
333 
334   /// Get the name of the capture helper.
335   StringRef getHelperName() const override { return HelperName; }
336 
337   static bool classof(const CGCapturedStmtInfo *Info) {
338     return CGOpenMPRegionInfo::classof(Info) &&
339            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
340   }
341 
342 private:
343   StringRef HelperName;
344 };
345 
346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
347   llvm_unreachable("No codegen for expressions");
348 }
349 /// API for generation of expressions captured in a innermost OpenMP
350 /// region.
351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
352 public:
353   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
354       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
355                                   OMPD_unknown,
356                                   /*HasCancel=*/false),
357         PrivScope(CGF) {
358     // Make sure the globals captured in the provided statement are local by
359     // using the privatization logic. We assume the same variable is not
360     // captured more than once.
361     for (const auto &C : CS.captures()) {
362       if (!C.capturesVariable() && !C.capturesVariableByCopy())
363         continue;
364 
365       const VarDecl *VD = C.getCapturedVar();
366       if (VD->isLocalVarDeclOrParm())
367         continue;
368 
369       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
370                       /*RefersToEnclosingVariableOrCapture=*/false,
371                       VD->getType().getNonReferenceType(), VK_LValue,
372                       C.getLocation());
373       PrivScope.addPrivate(
374           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
375     }
376     (void)PrivScope.Privatize();
377   }
378 
379   /// Lookup the captured field decl for a variable.
380   const FieldDecl *lookup(const VarDecl *VD) const override {
381     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382       return FD;
383     return nullptr;
384   }
385 
386   /// Emit the captured statement body.
387   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388     llvm_unreachable("No body for expressions");
389   }
390 
391   /// Get a variable or parameter for storing global thread id
392   /// inside OpenMP construct.
393   const VarDecl *getThreadIDVariable() const override {
394     llvm_unreachable("No thread id for expressions");
395   }
396 
397   /// Get the name of the capture helper.
398   StringRef getHelperName() const override {
399     llvm_unreachable("No helper name for expressions");
400   }
401 
402   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405   /// Private scope to capture global variables.
406   CodeGenFunction::OMPPrivateScope PrivScope;
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411   CodeGenFunction &CGF;
412   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413   FieldDecl *LambdaThisCaptureField = nullptr;
414   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415   bool NoInheritance = false;
416 
417 public:
418   /// Constructs region for combined constructs.
419   /// \param CodeGen Code generation sequence for combined directives. Includes
420   /// a list of functions used for code generation of implicitly inlined
421   /// regions.
422   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423                           OpenMPDirectiveKind Kind, bool HasCancel,
424                           bool NoInheritance = true)
425       : CGF(CGF), NoInheritance(NoInheritance) {
426     // Start emission for the construct.
427     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429     if (NoInheritance) {
430       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432       CGF.LambdaThisCaptureField = nullptr;
433       BlockInfo = CGF.BlockInfo;
434       CGF.BlockInfo = nullptr;
435     }
436   }
437 
438   ~InlinedOpenMPRegionRAII() {
439     // Restore original CapturedStmtInfo only if we're done with code emission.
440     auto *OldCSI =
441         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442     delete CGF.CapturedStmtInfo;
443     CGF.CapturedStmtInfo = OldCSI;
444     if (NoInheritance) {
445       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447       CGF.BlockInfo = BlockInfo;
448     }
449   }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456   /// Use trampoline for internal microtask.
457   OMP_IDENT_IMD = 0x01,
458   /// Use c-style ident structure.
459   OMP_IDENT_KMPC = 0x02,
460   /// Atomic reduction option for kmpc_reduce.
461   OMP_ATOMIC_REDUCE = 0x10,
462   /// Explicit 'barrier' directive.
463   OMP_IDENT_BARRIER_EXPL = 0x20,
464   /// Implicit barrier in code.
465   OMP_IDENT_BARRIER_IMPL = 0x40,
466   /// Implicit barrier in 'for' directive.
467   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468   /// Implicit barrier in 'sections' directive.
469   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470   /// Implicit barrier in 'single' directive.
471   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472   /// Call of __kmp_for_static_init for static loop.
473   OMP_IDENT_WORK_LOOP = 0x200,
474   /// Call of __kmp_for_static_init for sections.
475   OMP_IDENT_WORK_SECTIONS = 0x400,
476   /// Call of __kmp_for_static_init for distribute.
477   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags : int64_t {
485   /// flag undefined.
486   OMP_REQ_UNDEFINED               = 0x000,
487   /// no requires clause present.
488   OMP_REQ_NONE                    = 0x001,
489   /// reverse_offload clause.
490   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
491   /// unified_address clause.
492   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
493   /// unified_shared_memory clause.
494   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
495   /// dynamic_allocators clause.
496   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
497   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
500 enum OpenMPOffloadingReservedDeviceIDs {
501   /// Device ID if the device was not defined, runtime should get it
502   /// from environment variables in the spec.
503   OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
513 ///                                  see above  */
514 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
515 ///                                  KMP_IDENT_KMPC identifies this union
516 ///                                  member  */
517 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
518 ///                                  see above */
519 ///#if USE_ITT_BUILD
520 ///                            /*  but currently used for storing
521 ///                                region-specific ITT */
522 ///                            /*  contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
525 ///                                 C++  */
526 ///    char const *psource;    /**< String describing the source location.
527 ///                            The string is composed of semi-colon separated
528 //                             fields which describe the source file,
529 ///                            the function and a pair of line numbers that
530 ///                            delimit the construct.
531 ///                             */
532 /// } ident_t;
533 enum IdentFieldIndex {
534   /// might be used in Fortran
535   IdentField_Reserved_1,
536   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537   IdentField_Flags,
538   /// Not really used in Fortran any more
539   IdentField_Reserved_2,
540   /// Source[4] in Fortran, do not use for C++
541   IdentField_Reserved_3,
542   /// String describing the source location. The string is composed of
543   /// semi-colon separated fields which describe the source file, the function
544   /// and a pair of line numbers that delimit the construct.
545   IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551   /// Lower bound for default (unordered) versions.
552   OMP_sch_lower = 32,
553   OMP_sch_static_chunked = 33,
554   OMP_sch_static = 34,
555   OMP_sch_dynamic_chunked = 35,
556   OMP_sch_guided_chunked = 36,
557   OMP_sch_runtime = 37,
558   OMP_sch_auto = 38,
559   /// static with chunk adjustment (e.g., simd)
560   OMP_sch_static_balanced_chunked = 45,
561   /// Lower bound for 'ordered' versions.
562   OMP_ord_lower = 64,
563   OMP_ord_static_chunked = 65,
564   OMP_ord_static = 66,
565   OMP_ord_dynamic_chunked = 67,
566   OMP_ord_guided_chunked = 68,
567   OMP_ord_runtime = 69,
568   OMP_ord_auto = 70,
569   OMP_sch_default = OMP_sch_static,
570   /// dist_schedule types
571   OMP_dist_sch_static_chunked = 91,
572   OMP_dist_sch_static = 92,
573   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574   /// Set if the monotonic schedule modifier was present.
575   OMP_sch_modifier_monotonic = (1 << 29),
576   /// Set if the nonmonotonic schedule modifier was present.
577   OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583   PrePostActionTy *Action;
584 
585 public:
586   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588     if (!CGF.HaveInsertPoint())
589       return;
590     Action->Exit(CGF);
591   }
592 };
593 
594 } // anonymous namespace
595 
596 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597   CodeGenFunction::RunCleanupsScope Scope(CGF);
598   if (PrePostAction) {
599     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600     Callback(CodeGen, CGF, *PrePostAction);
601   } else {
602     PrePostActionTy Action;
603     Callback(CodeGen, CGF, Action);
604   }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613       if (const auto *DRE =
614               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616           return DRD;
617   return nullptr;
618 }
619 
620 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621                                              const OMPDeclareReductionDecl *DRD,
622                                              const Expr *InitOp,
623                                              Address Private, Address Original,
624                                              QualType Ty) {
625   if (DRD->getInitializer()) {
626     std::pair<llvm::Function *, llvm::Function *> Reduction =
627         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628     const auto *CE = cast<CallExpr>(InitOp);
629     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632     const auto *LHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634     const auto *RHSDRE =
635         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
638                             [=]() { return Private; });
639     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
640                             [=]() { return Original; });
641     (void)PrivateScope.Privatize();
642     RValue Func = RValue::get(Reduction.second);
643     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
644     CGF.EmitIgnoredExpr(InitOp);
645   } else {
646     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
647     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
648     auto *GV = new llvm::GlobalVariable(
649         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
650         llvm::GlobalValue::PrivateLinkage, Init, Name);
651     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
652     RValue InitRVal;
653     switch (CGF.getEvaluationKind(Ty)) {
654     case TEK_Scalar:
655       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
656       break;
657     case TEK_Complex:
658       InitRVal =
659           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
660       break;
661     case TEK_Aggregate: {
662       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
663       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
664       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
665                            /*IsInitializer=*/false);
666       return;
667     }
668     }
669     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
670     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
671     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
672                          /*IsInitializer=*/false);
673   }
674 }
675 
676 /// Emit initialization of arrays of complex types.
677 /// \param DestAddr Address of the array.
678 /// \param Type Type of array.
679 /// \param Init Initial expression of array.
680 /// \param SrcAddr Address of the original array.
681 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
682                                  QualType Type, bool EmitDeclareReductionInit,
683                                  const Expr *Init,
684                                  const OMPDeclareReductionDecl *DRD,
685                                  Address SrcAddr = Address::invalid()) {
686   // Perform element-by-element initialization.
687   QualType ElementTy;
688 
689   // Drill down to the base element type on both arrays.
690   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
691   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI, SrcAddr.getElementType(),
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI, DestAddr.getElementType(),
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedAddr);
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
842   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
843   if (AsArraySection) {
844     Size = CGF.Builder.CreatePtrDiff(ElemType,
845                                      OrigAddresses[N].second.getPointer(CGF),
846                                      OrigAddresses[N].first.getPointer(CGF));
847     Size = CGF.Builder.CreateNUWAdd(
848         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
849     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
850   } else {
851     SizeInChars =
852         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
853     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
854   }
855   Sizes.emplace_back(SizeInChars, Size);
856   CodeGenFunction::OpaqueValueMapping OpaqueMap(
857       CGF,
858       cast<OpaqueValueExpr>(
859           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
860       RValue::get(Size));
861   CGF.EmitVariablyModifiedType(PrivateType);
862 }
863 
864 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
865                                          llvm::Value *Size) {
866   const auto *PrivateVD =
867       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
868   QualType PrivateType = PrivateVD->getType();
869   if (!PrivateType->isVariablyModifiedType()) {
870     assert(!Size && !Sizes[N].second &&
871            "Size should be nullptr for non-variably modified reduction "
872            "items.");
873     return;
874   }
875   CodeGenFunction::OpaqueValueMapping OpaqueMap(
876       CGF,
877       cast<OpaqueValueExpr>(
878           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
879       RValue::get(Size));
880   CGF.EmitVariablyModifiedType(PrivateType);
881 }
882 
883 void ReductionCodeGen::emitInitialization(
884     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
885     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
886   assert(SharedAddresses.size() > N && "No variable was generated");
887   const auto *PrivateVD =
888       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
889   const OMPDeclareReductionDecl *DRD =
890       getReductionInit(ClausesData[N].ReductionOp);
891   QualType PrivateType = PrivateVD->getType();
892   PrivateAddr = CGF.Builder.CreateElementBitCast(
893       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
894   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
895     if (DRD && DRD->getInitializer())
896       (void)DefaultInit(CGF);
897     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
898   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
899     (void)DefaultInit(CGF);
900     QualType SharedType = SharedAddresses[N].first.getType();
901     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
902                                      PrivateAddr, SharedAddr, SharedType);
903   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
904              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
905     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
906                          PrivateVD->getType().getQualifiers(),
907                          /*IsInitializer=*/false);
908   }
909 }
910 
911 bool ReductionCodeGen::needCleanups(unsigned N) {
912   const auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916   return DTorKind != QualType::DK_none;
917 }
918 
919 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
920                                     Address PrivateAddr) {
921   const auto *PrivateVD =
922       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
923   QualType PrivateType = PrivateVD->getType();
924   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
925   if (needCleanups(N)) {
926     PrivateAddr = CGF.Builder.CreateElementBitCast(
927         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
928     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
929   }
930 }
931 
932 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
933                           LValue BaseLV) {
934   BaseTy = BaseTy.getNonReferenceType();
935   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
936          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
937     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
938       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
939     } else {
940       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
941       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
942     }
943     BaseTy = BaseTy->getPointeeType();
944   }
945   return CGF.MakeAddrLValue(
946       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
947                                        CGF.ConvertTypeForMem(ElTy)),
948       BaseLV.getType(), BaseLV.getBaseInfo(),
949       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
950 }
951 
952 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
953                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
954                           llvm::Value *Addr) {
955   Address Tmp = Address::invalid();
956   Address TopTmp = Address::invalid();
957   Address MostTopTmp = Address::invalid();
958   BaseTy = BaseTy.getNonReferenceType();
959   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
960          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
961     Tmp = CGF.CreateMemTemp(BaseTy);
962     if (TopTmp.isValid())
963       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
964     else
965       MostTopTmp = Tmp;
966     TopTmp = Tmp;
967     BaseTy = BaseTy->getPointeeType();
968   }
969   llvm::Type *Ty = BaseLVType;
970   if (Tmp.isValid())
971     Ty = Tmp.getElementType();
972   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
973   if (Tmp.isValid()) {
974     CGF.Builder.CreateStore(Addr, Tmp);
975     return MostTopTmp;
976   }
977   return Address::deprecated(Addr, BaseLVAlignment);
978 }
979 
980 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
981   const VarDecl *OrigVD = nullptr;
982   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
983     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
984     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
985       Base = TempOASE->getBase()->IgnoreParenImpCasts();
986     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987       Base = TempASE->getBase()->IgnoreParenImpCasts();
988     DE = cast<DeclRefExpr>(Base);
989     OrigVD = cast<VarDecl>(DE->getDecl());
990   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
991     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
992     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
993       Base = TempASE->getBase()->IgnoreParenImpCasts();
994     DE = cast<DeclRefExpr>(Base);
995     OrigVD = cast<VarDecl>(DE->getDecl());
996   }
997   return OrigVD;
998 }
999 
1000 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1001                                                Address PrivateAddr) {
1002   const DeclRefExpr *DE;
1003   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1004     BaseDecls.emplace_back(OrigVD);
1005     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1006     LValue BaseLValue =
1007         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1008                     OriginalBaseLValue);
1009     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1010     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1011         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1012         SharedAddr.getPointer());
1013     llvm::Value *PrivatePointer =
1014         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1015             PrivateAddr.getPointer(), SharedAddr.getType());
1016     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1017         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1018     return castToBase(CGF, OrigVD->getType(),
1019                       SharedAddresses[N].first.getType(),
1020                       OriginalBaseLValue.getAddress(CGF).getType(),
1021                       OriginalBaseLValue.getAlignment(), Ptr);
1022   }
1023   BaseDecls.emplace_back(
1024       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1025   return PrivateAddr;
1026 }
1027 
1028 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1029   const OMPDeclareReductionDecl *DRD =
1030       getReductionInit(ClausesData[N].ReductionOp);
1031   return DRD && DRD->getInitializer();
1032 }
1033 
1034 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1035   return CGF.EmitLoadOfPointerLValue(
1036       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1037       getThreadIDVariable()->getType()->castAs<PointerType>());
1038 }
1039 
1040 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1041   if (!CGF.HaveInsertPoint())
1042     return;
1043   // 1.2.2 OpenMP Language Terminology
1044   // Structured block - An executable statement with a single entry at the
1045   // top and a single exit at the bottom.
1046   // The point of exit cannot be a branch out of the structured block.
1047   // longjmp() and throw() must not violate the entry/exit criteria.
1048   CGF.EHStack.pushTerminate();
1049   if (S)
1050     CGF.incrementProfileCounter(S);
1051   CodeGen(CGF);
1052   CGF.EHStack.popTerminate();
1053 }
1054 
1055 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1056     CodeGenFunction &CGF) {
1057   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1058                             getThreadIDVariable()->getType(),
1059                             AlignmentSource::Decl);
1060 }
1061 
1062 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1063                                        QualType FieldTy) {
1064   auto *Field = FieldDecl::Create(
1065       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1066       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1067       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1068   Field->setAccess(AS_public);
1069   DC->addDecl(Field);
1070   return Field;
1071 }
1072 
1073 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1074                                  StringRef Separator)
1075     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1076       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1077   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1078 
1079   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1080   OMPBuilder.initialize();
1081   loadOffloadInfoMetadata();
1082 }
1083 
1084 void CGOpenMPRuntime::clear() {
1085   InternalVars.clear();
1086   // Clean non-target variable declarations possibly used only in debug info.
1087   for (const auto &Data : EmittedNonTargetVariables) {
1088     if (!Data.getValue().pointsToAliveValue())
1089       continue;
1090     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1091     if (!GV)
1092       continue;
1093     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1094       continue;
1095     GV->eraseFromParent();
1096   }
1097 }
1098 
1099 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1100   SmallString<128> Buffer;
1101   llvm::raw_svector_ostream OS(Buffer);
1102   StringRef Sep = FirstSeparator;
1103   for (StringRef Part : Parts) {
1104     OS << Sep << Part;
1105     Sep = Separator;
1106   }
1107   return std::string(OS.str());
1108 }
1109 
1110 static llvm::Function *
1111 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1112                           const Expr *CombinerInitializer, const VarDecl *In,
1113                           const VarDecl *Out, bool IsCombiner) {
1114   // void .omp_combiner.(Ty *in, Ty *out);
1115   ASTContext &C = CGM.getContext();
1116   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1117   FunctionArgList Args;
1118   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1119                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1120   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1121                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1122   Args.push_back(&OmpOutParm);
1123   Args.push_back(&OmpInParm);
1124   const CGFunctionInfo &FnInfo =
1125       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1126   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1127   std::string Name = CGM.getOpenMPRuntime().getName(
1128       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1129   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1130                                     Name, &CGM.getModule());
1131   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1132   if (CGM.getLangOpts().Optimize) {
1133     Fn->removeFnAttr(llvm::Attribute::NoInline);
1134     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1135     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1136   }
1137   CodeGenFunction CGF(CGM);
1138   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1139   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1140   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1141                     Out->getLocation());
1142   CodeGenFunction::OMPPrivateScope Scope(CGF);
1143   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1144   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1145     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1146         .getAddress(CGF);
1147   });
1148   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1149   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1150     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1151         .getAddress(CGF);
1152   });
1153   (void)Scope.Privatize();
1154   if (!IsCombiner && Out->hasInit() &&
1155       !CGF.isTrivialInitializer(Out->getInit())) {
1156     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1157                          Out->getType().getQualifiers(),
1158                          /*IsInitializer=*/true);
1159   }
1160   if (CombinerInitializer)
1161     CGF.EmitIgnoredExpr(CombinerInitializer);
1162   Scope.ForceCleanup();
1163   CGF.FinishFunction();
1164   return Fn;
1165 }
1166 
1167 void CGOpenMPRuntime::emitUserDefinedReduction(
1168     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1169   if (UDRMap.count(D) > 0)
1170     return;
1171   llvm::Function *Combiner = emitCombinerOrInitializer(
1172       CGM, D->getType(), D->getCombiner(),
1173       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1174       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1175       /*IsCombiner=*/true);
1176   llvm::Function *Initializer = nullptr;
1177   if (const Expr *Init = D->getInitializer()) {
1178     Initializer = emitCombinerOrInitializer(
1179         CGM, D->getType(),
1180         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1181                                                                      : nullptr,
1182         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1183         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1184         /*IsCombiner=*/false);
1185   }
1186   UDRMap.try_emplace(D, Combiner, Initializer);
1187   if (CGF) {
1188     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1189     Decls.second.push_back(D);
1190   }
1191 }
1192 
1193 std::pair<llvm::Function *, llvm::Function *>
1194 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1195   auto I = UDRMap.find(D);
1196   if (I != UDRMap.end())
1197     return I->second;
1198   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1199   return UDRMap.lookup(D);
1200 }
1201 
1202 namespace {
1203 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1204 // Builder if one is present.
1205 struct PushAndPopStackRAII {
1206   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1207                       bool HasCancel, llvm::omp::Directive Kind)
1208       : OMPBuilder(OMPBuilder) {
1209     if (!OMPBuilder)
1210       return;
1211 
1212     // The following callback is the crucial part of clangs cleanup process.
1213     //
1214     // NOTE:
1215     // Once the OpenMPIRBuilder is used to create parallel regions (and
1216     // similar), the cancellation destination (Dest below) is determined via
1217     // IP. That means if we have variables to finalize we split the block at IP,
1218     // use the new block (=BB) as destination to build a JumpDest (via
1219     // getJumpDestInCurrentScope(BB)) which then is fed to
1220     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1221     // to push & pop an FinalizationInfo object.
1222     // The FiniCB will still be needed but at the point where the
1223     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1224     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1225       assert(IP.getBlock()->end() == IP.getPoint() &&
1226              "Clang CG should cause non-terminated block!");
1227       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1228       CGF.Builder.restoreIP(IP);
1229       CodeGenFunction::JumpDest Dest =
1230           CGF.getOMPCancelDestination(OMPD_parallel);
1231       CGF.EmitBranchThroughCleanup(Dest);
1232     };
1233 
1234     // TODO: Remove this once we emit parallel regions through the
1235     //       OpenMPIRBuilder as it can do this setup internally.
1236     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1237     OMPBuilder->pushFinalizationCB(std::move(FI));
1238   }
1239   ~PushAndPopStackRAII() {
1240     if (OMPBuilder)
1241       OMPBuilder->popFinalizationCB();
1242   }
1243   llvm::OpenMPIRBuilder *OMPBuilder;
1244 };
1245 } // namespace
1246 
1247 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1248     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1249     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1250     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1251   assert(ThreadIDVar->getType()->isPointerType() &&
1252          "thread id variable must be of type kmp_int32 *");
1253   CodeGenFunction CGF(CGM, true);
1254   bool HasCancel = false;
1255   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1256     HasCancel = OPD->hasCancel();
1257   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1258     HasCancel = OPD->hasCancel();
1259   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1260     HasCancel = OPSD->hasCancel();
1261   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1264     HasCancel = OPFD->hasCancel();
1265   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1266     HasCancel = OPFD->hasCancel();
1267   else if (const auto *OPFD =
1268                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD =
1271                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273 
1274   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1275   //       parallel region to make cancellation barriers work properly.
1276   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1277   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1278   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1279                                     HasCancel, OutlinedHelperName);
1280   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1281   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1282 }
1283 
1284 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1285     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1286     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1287   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1288   return emitParallelOrTeamsOutlinedFunction(
1289       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1290 }
1291 
1292 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1293     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1294     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1295   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1296   return emitParallelOrTeamsOutlinedFunction(
1297       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1298 }
1299 
1300 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1301     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1302     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1303     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1304     bool Tied, unsigned &NumberOfParts) {
1305   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1306                                               PrePostActionTy &) {
1307     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1308     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1309     llvm::Value *TaskArgs[] = {
1310         UpLoc, ThreadID,
1311         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1312                                     TaskTVar->getType()->castAs<PointerType>())
1313             .getPointer(CGF)};
1314     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1315                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1316                         TaskArgs);
1317   };
1318   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1319                                                             UntiedCodeGen);
1320   CodeGen.setAction(Action);
1321   assert(!ThreadIDVar->getType()->isPointerType() &&
1322          "thread id variable must be of type kmp_int32 for tasks");
1323   const OpenMPDirectiveKind Region =
1324       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1325                                                       : OMPD_task;
1326   const CapturedStmt *CS = D.getCapturedStmt(Region);
1327   bool HasCancel = false;
1328   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1329     HasCancel = TD->hasCancel();
1330   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1333     HasCancel = TD->hasCancel();
1334   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336 
1337   CodeGenFunction CGF(CGM, true);
1338   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1339                                         InnermostKind, HasCancel, Action);
1340   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1341   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1342   if (!Tied)
1343     NumberOfParts = Action.getNumberOfParts();
1344   return Res;
1345 }
1346 
1347 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1348                              const RecordDecl *RD, const CGRecordLayout &RL,
1349                              ArrayRef<llvm::Constant *> Data) {
1350   llvm::StructType *StructTy = RL.getLLVMType();
1351   unsigned PrevIdx = 0;
1352   ConstantInitBuilder CIBuilder(CGM);
1353   const auto *DI = Data.begin();
1354   for (const FieldDecl *FD : RD->fields()) {
1355     unsigned Idx = RL.getLLVMFieldNo(FD);
1356     // Fill the alignment.
1357     for (unsigned I = PrevIdx; I < Idx; ++I)
1358       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1359     PrevIdx = Idx + 1;
1360     Fields.add(*DI);
1361     ++DI;
1362   }
1363 }
1364 
1365 template <class... As>
1366 static llvm::GlobalVariable *
1367 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1368                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1369                    As &&... Args) {
1370   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1371   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1372   ConstantInitBuilder CIBuilder(CGM);
1373   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1374   buildStructValue(Fields, CGM, RD, RL, Data);
1375   return Fields.finishAndCreateGlobal(
1376       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1377       std::forward<As>(Args)...);
1378 }
1379 
1380 template <typename T>
1381 static void
1382 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1383                                          ArrayRef<llvm::Constant *> Data,
1384                                          T &Parent) {
1385   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1386   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1387   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1388   buildStructValue(Fields, CGM, RD, RL, Data);
1389   Fields.finishAndAddTo(Parent);
1390 }
1391 
1392 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1393                                              bool AtCurrentPoint) {
1394   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1395   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1396 
1397   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1398   if (AtCurrentPoint) {
1399     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1400         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1401   } else {
1402     Elem.second.ServiceInsertPt =
1403         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1404     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1405   }
1406 }
1407 
1408 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1409   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1410   if (Elem.second.ServiceInsertPt) {
1411     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1412     Elem.second.ServiceInsertPt = nullptr;
1413     Ptr->eraseFromParent();
1414   }
1415 }
1416 
1417 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1418                                                   SourceLocation Loc,
1419                                                   SmallString<128> &Buffer) {
1420   llvm::raw_svector_ostream OS(Buffer);
1421   // Build debug location
1422   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1423   OS << ";" << PLoc.getFilename() << ";";
1424   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1425     OS << FD->getQualifiedNameAsString();
1426   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1427   return OS.str();
1428 }
1429 
1430 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1431                                                  SourceLocation Loc,
1432                                                  unsigned Flags) {
1433   uint32_t SrcLocStrSize;
1434   llvm::Constant *SrcLocStr;
1435   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1436       Loc.isInvalid()) {
1437     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1438   } else {
1439     std::string FunctionName;
1440     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1441       FunctionName = FD->getQualifiedNameAsString();
1442     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1443     const char *FileName = PLoc.getFilename();
1444     unsigned Line = PLoc.getLine();
1445     unsigned Column = PLoc.getColumn();
1446     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1447                                                 Column, SrcLocStrSize);
1448   }
1449   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1450   return OMPBuilder.getOrCreateIdent(
1451       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1452 }
1453 
1454 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1455                                           SourceLocation Loc) {
1456   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1457   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1458   // the clang invariants used below might be broken.
1459   if (CGM.getLangOpts().OpenMPIRBuilder) {
1460     SmallString<128> Buffer;
1461     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1462     uint32_t SrcLocStrSize;
1463     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1465     return OMPBuilder.getOrCreateThreadID(
1466         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1467   }
1468 
1469   llvm::Value *ThreadID = nullptr;
1470   // Check whether we've already cached a load of the thread id in this
1471   // function.
1472   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473   if (I != OpenMPLocThreadIDMap.end()) {
1474     ThreadID = I->second.ThreadID;
1475     if (ThreadID != nullptr)
1476       return ThreadID;
1477   }
1478   // If exceptions are enabled, do not use parameter to avoid possible crash.
1479   if (auto *OMPRegionInfo =
1480           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481     if (OMPRegionInfo->getThreadIDVariable()) {
1482       // Check if this an outlined function with thread id passed as argument.
1483       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486           !CGF.getLangOpts().CXXExceptions ||
1487           CGF.Builder.GetInsertBlock() == TopBlock ||
1488           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490               TopBlock ||
1491           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492               CGF.Builder.GetInsertBlock()) {
1493         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494         // If value loaded in entry block, cache it and use it everywhere in
1495         // function.
1496         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498           Elem.second.ThreadID = ThreadID;
1499         }
1500         return ThreadID;
1501       }
1502     }
1503   }
1504 
1505   // This is not an outlined function region - need to call __kmpc_int32
1506   // kmpc_global_thread_num(ident_t *loc).
1507   // Generate thread id value and cache this value for use across the
1508   // function.
1509   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510   if (!Elem.second.ServiceInsertPt)
1511     setLocThreadIdInsertPt(CGF);
1512   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514   llvm::CallInst *Call = CGF.Builder.CreateCall(
1515       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516                                             OMPRTL___kmpc_global_thread_num),
1517       emitUpdateLocation(CGF, Loc));
1518   Call->setCallingConv(CGF.getRuntimeCC());
1519   Elem.second.ThreadID = Call;
1520   return Call;
1521 }
1522 
1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526     clearLocThreadIdInsertPt(CGF);
1527     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528   }
1529   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531       UDRMap.erase(D);
1532     FunctionUDRMap.erase(CGF.CurFn);
1533   }
1534   auto I = FunctionUDMMap.find(CGF.CurFn);
1535   if (I != FunctionUDMMap.end()) {
1536     for(const auto *D : I->second)
1537       UDMMap.erase(D);
1538     FunctionUDMMap.erase(I);
1539   }
1540   LastprivateConditionalToTypes.erase(CGF.CurFn);
1541   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542 }
1543 
1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545   return OMPBuilder.IdentPtr;
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549   if (!Kmpc_MicroTy) {
1550     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554   }
1555   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556 }
1557 
1558 llvm::FunctionCallee
1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1560                                              bool IsGPUDistribute) {
1561   assert((IVSize == 32 || IVSize == 64) &&
1562          "IV size is not compatible with the omp runtime");
1563   StringRef Name;
1564   if (IsGPUDistribute)
1565     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1566                                     : "__kmpc_distribute_static_init_4u")
1567                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1568                                     : "__kmpc_distribute_static_init_8u");
1569   else
1570     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1571                                     : "__kmpc_for_static_init_4u")
1572                         : (IVSigned ? "__kmpc_for_static_init_8"
1573                                     : "__kmpc_for_static_init_8u");
1574 
1575   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1576   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1577   llvm::Type *TypeParams[] = {
1578     getIdentTyPointerTy(),                     // loc
1579     CGM.Int32Ty,                               // tid
1580     CGM.Int32Ty,                               // schedtype
1581     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1582     PtrTy,                                     // p_lower
1583     PtrTy,                                     // p_upper
1584     PtrTy,                                     // p_stride
1585     ITy,                                       // incr
1586     ITy                                        // chunk
1587   };
1588   auto *FnTy =
1589       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1590   return CGM.CreateRuntimeFunction(FnTy, Name);
1591 }
1592 
1593 llvm::FunctionCallee
1594 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1595   assert((IVSize == 32 || IVSize == 64) &&
1596          "IV size is not compatible with the omp runtime");
1597   StringRef Name =
1598       IVSize == 32
1599           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1600           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1601   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1602   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1603                                CGM.Int32Ty,           // tid
1604                                CGM.Int32Ty,           // schedtype
1605                                ITy,                   // lower
1606                                ITy,                   // upper
1607                                ITy,                   // stride
1608                                ITy                    // chunk
1609   };
1610   auto *FnTy =
1611       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1612   return CGM.CreateRuntimeFunction(FnTy, Name);
1613 }
1614 
1615 llvm::FunctionCallee
1616 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1617   assert((IVSize == 32 || IVSize == 64) &&
1618          "IV size is not compatible with the omp runtime");
1619   StringRef Name =
1620       IVSize == 32
1621           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1622           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1623   llvm::Type *TypeParams[] = {
1624       getIdentTyPointerTy(), // loc
1625       CGM.Int32Ty,           // tid
1626   };
1627   auto *FnTy =
1628       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1629   return CGM.CreateRuntimeFunction(FnTy, Name);
1630 }
1631 
1632 llvm::FunctionCallee
1633 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1634   assert((IVSize == 32 || IVSize == 64) &&
1635          "IV size is not compatible with the omp runtime");
1636   StringRef Name =
1637       IVSize == 32
1638           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1639           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1640   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1641   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1642   llvm::Type *TypeParams[] = {
1643     getIdentTyPointerTy(),                     // loc
1644     CGM.Int32Ty,                               // tid
1645     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1646     PtrTy,                                     // p_lower
1647     PtrTy,                                     // p_upper
1648     PtrTy                                      // p_stride
1649   };
1650   auto *FnTy =
1651       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1652   return CGM.CreateRuntimeFunction(FnTy, Name);
1653 }
1654 
1655 /// Obtain information that uniquely identifies a target entry. This
1656 /// consists of the file and device IDs as well as line number associated with
1657 /// the relevant entry source location.
1658 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1659                                      unsigned &DeviceID, unsigned &FileID,
1660                                      unsigned &LineNum) {
1661   SourceManager &SM = C.getSourceManager();
1662 
1663   // The loc should be always valid and have a file ID (the user cannot use
1664   // #pragma directives in macros)
1665 
1666   assert(Loc.isValid() && "Source location is expected to be always valid.");
1667 
1668   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1669   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670 
1671   llvm::sys::fs::UniqueID ID;
1672   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1673     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1674     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1675     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1676       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1677           << PLoc.getFilename() << EC.message();
1678   }
1679 
1680   DeviceID = ID.getDevice();
1681   FileID = ID.getFile();
1682   LineNum = PLoc.getLine();
1683 }
1684 
1685 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1686   if (CGM.getLangOpts().OpenMPSimd)
1687     return Address::invalid();
1688   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1689       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1690   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1691               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1692                HasRequiresUnifiedSharedMemory))) {
1693     SmallString<64> PtrName;
1694     {
1695       llvm::raw_svector_ostream OS(PtrName);
1696       OS << CGM.getMangledName(GlobalDecl(VD));
1697       if (!VD->isExternallyVisible()) {
1698         unsigned DeviceID, FileID, Line;
1699         getTargetEntryUniqueInfo(CGM.getContext(),
1700                                  VD->getCanonicalDecl()->getBeginLoc(),
1701                                  DeviceID, FileID, Line);
1702         OS << llvm::format("_%x", FileID);
1703       }
1704       OS << "_decl_tgt_ref_ptr";
1705     }
1706     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1707     if (!Ptr) {
1708       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1709       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1710                                         PtrName);
1711 
1712       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1713       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1714 
1715       if (!CGM.getLangOpts().OpenMPIsDevice)
1716         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1717       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1718     }
1719     return Address::deprecated(Ptr, CGM.getContext().getDeclAlign(VD));
1720   }
1721   return Address::invalid();
1722 }
1723 
1724 llvm::Constant *
1725 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1726   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1727          !CGM.getContext().getTargetInfo().isTLSSupported());
1728   // Lookup the entry, lazily creating it if necessary.
1729   std::string Suffix = getName({"cache", ""});
1730   return getOrCreateInternalVariable(
1731       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1732 }
1733 
1734 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1735                                                 const VarDecl *VD,
1736                                                 Address VDAddr,
1737                                                 SourceLocation Loc) {
1738   if (CGM.getLangOpts().OpenMPUseTLS &&
1739       CGM.getContext().getTargetInfo().isTLSSupported())
1740     return VDAddr;
1741 
1742   llvm::Type *VarTy = VDAddr.getElementType();
1743   llvm::Value *Args[] = {
1744       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1745       CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1746       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1747       getOrCreateThreadPrivateCache(VD)};
1748   return Address::deprecated(
1749       CGF.EmitRuntimeCall(
1750           OMPBuilder.getOrCreateRuntimeFunction(
1751               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1752           Args),
1753       VDAddr.getAlignment());
1754 }
1755 
1756 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1757     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1758     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1759   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1760   // library.
1761   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1762   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1763                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1764                       OMPLoc);
1765   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1766   // to register constructor/destructor for variable.
1767   llvm::Value *Args[] = {
1768       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1769       Ctor, CopyCtor, Dtor};
1770   CGF.EmitRuntimeCall(
1771       OMPBuilder.getOrCreateRuntimeFunction(
1772           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1773       Args);
1774 }
1775 
1776 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1777     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1778     bool PerformInit, CodeGenFunction *CGF) {
1779   if (CGM.getLangOpts().OpenMPUseTLS &&
1780       CGM.getContext().getTargetInfo().isTLSSupported())
1781     return nullptr;
1782 
1783   VD = VD->getDefinition(CGM.getContext());
1784   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1785     QualType ASTTy = VD->getType();
1786 
1787     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1788     const Expr *Init = VD->getAnyInitializer();
1789     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1790       // Generate function that re-emits the declaration's initializer into the
1791       // threadprivate copy of the variable VD
1792       CodeGenFunction CtorCGF(CGM);
1793       FunctionArgList Args;
1794       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1795                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1796                             ImplicitParamDecl::Other);
1797       Args.push_back(&Dst);
1798 
1799       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1800           CGM.getContext().VoidPtrTy, Args);
1801       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1802       std::string Name = getName({"__kmpc_global_ctor_", ""});
1803       llvm::Function *Fn =
1804           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1805       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1806                             Args, Loc, Loc);
1807       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1808           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1809           CGM.getContext().VoidPtrTy, Dst.getLocation());
1810       Address Arg = Address::deprecated(ArgVal, VDAddr.getAlignment());
1811       Arg = CtorCGF.Builder.CreateElementBitCast(
1812           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1813       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1814                                /*IsInitializer=*/true);
1815       ArgVal = CtorCGF.EmitLoadOfScalar(
1816           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1817           CGM.getContext().VoidPtrTy, Dst.getLocation());
1818       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1819       CtorCGF.FinishFunction();
1820       Ctor = Fn;
1821     }
1822     if (VD->getType().isDestructedType() != QualType::DK_none) {
1823       // Generate function that emits destructor call for the threadprivate copy
1824       // of the variable VD
1825       CodeGenFunction DtorCGF(CGM);
1826       FunctionArgList Args;
1827       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1828                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1829                             ImplicitParamDecl::Other);
1830       Args.push_back(&Dst);
1831 
1832       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1833           CGM.getContext().VoidTy, Args);
1834       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1835       std::string Name = getName({"__kmpc_global_dtor_", ""});
1836       llvm::Function *Fn =
1837           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1838       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1839       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1840                             Loc, Loc);
1841       // Create a scope with an artificial location for the body of this function.
1842       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1843       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1844           DtorCGF.GetAddrOfLocalVar(&Dst),
1845           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1846       DtorCGF.emitDestroy(Address::deprecated(ArgVal, VDAddr.getAlignment()),
1847                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1848                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1849       DtorCGF.FinishFunction();
1850       Dtor = Fn;
1851     }
1852     // Do not emit init function if it is not required.
1853     if (!Ctor && !Dtor)
1854       return nullptr;
1855 
1856     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1857     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1858                                                /*isVarArg=*/false)
1859                            ->getPointerTo();
1860     // Copying constructor for the threadprivate variable.
1861     // Must be NULL - reserved by runtime, but currently it requires that this
1862     // parameter is always NULL. Otherwise it fires assertion.
1863     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1864     if (Ctor == nullptr) {
1865       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1866                                              /*isVarArg=*/false)
1867                          ->getPointerTo();
1868       Ctor = llvm::Constant::getNullValue(CtorTy);
1869     }
1870     if (Dtor == nullptr) {
1871       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1872                                              /*isVarArg=*/false)
1873                          ->getPointerTo();
1874       Dtor = llvm::Constant::getNullValue(DtorTy);
1875     }
1876     if (!CGF) {
1877       auto *InitFunctionTy =
1878           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1879       std::string Name = getName({"__omp_threadprivate_init_", ""});
1880       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1881           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1882       CodeGenFunction InitCGF(CGM);
1883       FunctionArgList ArgList;
1884       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1885                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1886                             Loc, Loc);
1887       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1888       InitCGF.FinishFunction();
1889       return InitFunction;
1890     }
1891     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1892   }
1893   return nullptr;
1894 }
1895 
1896 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1897                                                      llvm::GlobalVariable *Addr,
1898                                                      bool PerformInit) {
1899   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1900       !CGM.getLangOpts().OpenMPIsDevice)
1901     return false;
1902   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1903       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1904   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1905       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1906        HasRequiresUnifiedSharedMemory))
1907     return CGM.getLangOpts().OpenMPIsDevice;
1908   VD = VD->getDefinition(CGM.getContext());
1909   assert(VD && "Unknown VarDecl");
1910 
1911   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1912     return CGM.getLangOpts().OpenMPIsDevice;
1913 
1914   QualType ASTTy = VD->getType();
1915   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1916 
1917   // Produce the unique prefix to identify the new target regions. We use
1918   // the source location of the variable declaration which we know to not
1919   // conflict with any target region.
1920   unsigned DeviceID;
1921   unsigned FileID;
1922   unsigned Line;
1923   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1924   SmallString<128> Buffer, Out;
1925   {
1926     llvm::raw_svector_ostream OS(Buffer);
1927     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1928        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1929   }
1930 
1931   const Expr *Init = VD->getAnyInitializer();
1932   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1933     llvm::Constant *Ctor;
1934     llvm::Constant *ID;
1935     if (CGM.getLangOpts().OpenMPIsDevice) {
1936       // Generate function that re-emits the declaration's initializer into
1937       // the threadprivate copy of the variable VD
1938       CodeGenFunction CtorCGF(CGM);
1939 
1940       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1941       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1942       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1943           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1944       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1945       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1946                             FunctionArgList(), Loc, Loc);
1947       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1948       CtorCGF.EmitAnyExprToMem(
1949           Init, Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)),
1950           Init->getType().getQualifiers(),
1951           /*IsInitializer=*/true);
1952       CtorCGF.FinishFunction();
1953       Ctor = Fn;
1954       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1955       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1956     } else {
1957       Ctor = new llvm::GlobalVariable(
1958           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1959           llvm::GlobalValue::PrivateLinkage,
1960           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1961       ID = Ctor;
1962     }
1963 
1964     // Register the information for the entry associated with the constructor.
1965     Out.clear();
1966     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1967         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1968         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1969   }
1970   if (VD->getType().isDestructedType() != QualType::DK_none) {
1971     llvm::Constant *Dtor;
1972     llvm::Constant *ID;
1973     if (CGM.getLangOpts().OpenMPIsDevice) {
1974       // Generate function that emits destructor call for the threadprivate
1975       // copy of the variable VD
1976       CodeGenFunction DtorCGF(CGM);
1977 
1978       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1979       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1980       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1981           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1982       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1983       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1984                             FunctionArgList(), Loc, Loc);
1985       // Create a scope with an artificial location for the body of this
1986       // function.
1987       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1988       DtorCGF.emitDestroy(
1989           Address::deprecated(Addr, CGM.getContext().getDeclAlign(VD)), ASTTy,
1990           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1991           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1992       DtorCGF.FinishFunction();
1993       Dtor = Fn;
1994       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1995       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1996     } else {
1997       Dtor = new llvm::GlobalVariable(
1998           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1999           llvm::GlobalValue::PrivateLinkage,
2000           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2001       ID = Dtor;
2002     }
2003     // Register the information for the entry associated with the destructor.
2004     Out.clear();
2005     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2006         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2007         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2008   }
2009   return CGM.getLangOpts().OpenMPIsDevice;
2010 }
2011 
2012 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2013                                                           QualType VarType,
2014                                                           StringRef Name) {
2015   std::string Suffix = getName({"artificial", ""});
2016   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2017   llvm::GlobalVariable *GAddr =
2018       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2019   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2020       CGM.getTarget().isTLSSupported()) {
2021     GAddr->setThreadLocal(/*Val=*/true);
2022     return Address(GAddr, GAddr->getValueType(),
2023                    CGM.getContext().getTypeAlignInChars(VarType));
2024   }
2025   std::string CacheSuffix = getName({"cache", ""});
2026   llvm::Value *Args[] = {
2027       emitUpdateLocation(CGF, SourceLocation()),
2028       getThreadID(CGF, SourceLocation()),
2029       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2030       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2031                                 /*isSigned=*/false),
2032       getOrCreateInternalVariable(
2033           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2034   return Address(
2035       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2036           CGF.EmitRuntimeCall(
2037               OMPBuilder.getOrCreateRuntimeFunction(
2038                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2039               Args),
2040           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2041       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2042 }
2043 
2044 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2045                                    const RegionCodeGenTy &ThenGen,
2046                                    const RegionCodeGenTy &ElseGen) {
2047   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2048 
2049   // If the condition constant folds and can be elided, try to avoid emitting
2050   // the condition and the dead arm of the if/else.
2051   bool CondConstant;
2052   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2053     if (CondConstant)
2054       ThenGen(CGF);
2055     else
2056       ElseGen(CGF);
2057     return;
2058   }
2059 
2060   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2061   // emit the conditional branch.
2062   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2063   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2064   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2065   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2066 
2067   // Emit the 'then' code.
2068   CGF.EmitBlock(ThenBlock);
2069   ThenGen(CGF);
2070   CGF.EmitBranch(ContBlock);
2071   // Emit the 'else' code if present.
2072   // There is no need to emit line number for unconditional branch.
2073   (void)ApplyDebugLocation::CreateEmpty(CGF);
2074   CGF.EmitBlock(ElseBlock);
2075   ElseGen(CGF);
2076   // There is no need to emit line number for unconditional branch.
2077   (void)ApplyDebugLocation::CreateEmpty(CGF);
2078   CGF.EmitBranch(ContBlock);
2079   // Emit the continuation block for code after the if.
2080   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2081 }
2082 
2083 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2084                                        llvm::Function *OutlinedFn,
2085                                        ArrayRef<llvm::Value *> CapturedVars,
2086                                        const Expr *IfCond,
2087                                        llvm::Value *NumThreads) {
2088   if (!CGF.HaveInsertPoint())
2089     return;
2090   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2091   auto &M = CGM.getModule();
2092   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2093                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2094     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2095     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2096     llvm::Value *Args[] = {
2097         RTLoc,
2098         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2099         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2100     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2101     RealArgs.append(std::begin(Args), std::end(Args));
2102     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2103 
2104     llvm::FunctionCallee RTLFn =
2105         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2106     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2107   };
2108   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2109                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2110     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2111     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2112     // Build calls:
2113     // __kmpc_serialized_parallel(&Loc, GTid);
2114     llvm::Value *Args[] = {RTLoc, ThreadID};
2115     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2116                             M, OMPRTL___kmpc_serialized_parallel),
2117                         Args);
2118 
2119     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2120     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2121     Address ZeroAddrBound =
2122         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2123                                          /*Name=*/".bound.zero.addr");
2124     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2125     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2126     // ThreadId for serialized parallels is 0.
2127     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2128     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2129     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2130 
2131     // Ensure we do not inline the function. This is trivially true for the ones
2132     // passed to __kmpc_fork_call but the ones called in serialized regions
2133     // could be inlined. This is not a perfect but it is closer to the invariant
2134     // we want, namely, every data environment starts with a new function.
2135     // TODO: We should pass the if condition to the runtime function and do the
2136     //       handling there. Much cleaner code.
2137     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2138     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2139     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2140 
2141     // __kmpc_end_serialized_parallel(&Loc, GTid);
2142     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2143     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2144                             M, OMPRTL___kmpc_end_serialized_parallel),
2145                         EndArgs);
2146   };
2147   if (IfCond) {
2148     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2149   } else {
2150     RegionCodeGenTy ThenRCG(ThenGen);
2151     ThenRCG(CGF);
2152   }
2153 }
2154 
2155 // If we're inside an (outlined) parallel region, use the region info's
2156 // thread-ID variable (it is passed in a first argument of the outlined function
2157 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2158 // regular serial code region, get thread ID by calling kmp_int32
2159 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2160 // return the address of that temp.
2161 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2162                                              SourceLocation Loc) {
2163   if (auto *OMPRegionInfo =
2164           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2165     if (OMPRegionInfo->getThreadIDVariable())
2166       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2167 
2168   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2169   QualType Int32Ty =
2170       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2171   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2172   CGF.EmitStoreOfScalar(ThreadID,
2173                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2174 
2175   return ThreadIDTemp;
2176 }
2177 
2178 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2179     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2180   SmallString<256> Buffer;
2181   llvm::raw_svector_ostream Out(Buffer);
2182   Out << Name;
2183   StringRef RuntimeName = Out.str();
2184   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2185   if (Elem.second) {
2186     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2187            "OMP internal variable has different type than requested");
2188     return &*Elem.second;
2189   }
2190 
2191   return Elem.second = new llvm::GlobalVariable(
2192              CGM.getModule(), Ty, /*IsConstant*/ false,
2193              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2194              Elem.first(), /*InsertBefore=*/nullptr,
2195              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2196 }
2197 
2198 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2199   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2200   std::string Name = getName({Prefix, "var"});
2201   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2202 }
2203 
2204 namespace {
2205 /// Common pre(post)-action for different OpenMP constructs.
2206 class CommonActionTy final : public PrePostActionTy {
2207   llvm::FunctionCallee EnterCallee;
2208   ArrayRef<llvm::Value *> EnterArgs;
2209   llvm::FunctionCallee ExitCallee;
2210   ArrayRef<llvm::Value *> ExitArgs;
2211   bool Conditional;
2212   llvm::BasicBlock *ContBlock = nullptr;
2213 
2214 public:
2215   CommonActionTy(llvm::FunctionCallee EnterCallee,
2216                  ArrayRef<llvm::Value *> EnterArgs,
2217                  llvm::FunctionCallee ExitCallee,
2218                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2219       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2220         ExitArgs(ExitArgs), Conditional(Conditional) {}
2221   void Enter(CodeGenFunction &CGF) override {
2222     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2223     if (Conditional) {
2224       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2225       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2226       ContBlock = CGF.createBasicBlock("omp_if.end");
2227       // Generate the branch (If-stmt)
2228       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2229       CGF.EmitBlock(ThenBlock);
2230     }
2231   }
2232   void Done(CodeGenFunction &CGF) {
2233     // Emit the rest of blocks/branches
2234     CGF.EmitBranch(ContBlock);
2235     CGF.EmitBlock(ContBlock, true);
2236   }
2237   void Exit(CodeGenFunction &CGF) override {
2238     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2239   }
2240 };
2241 } // anonymous namespace
2242 
2243 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2244                                          StringRef CriticalName,
2245                                          const RegionCodeGenTy &CriticalOpGen,
2246                                          SourceLocation Loc, const Expr *Hint) {
2247   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2248   // CriticalOpGen();
2249   // __kmpc_end_critical(ident_t *, gtid, Lock);
2250   // Prepare arguments and build a call to __kmpc_critical
2251   if (!CGF.HaveInsertPoint())
2252     return;
2253   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2254                          getCriticalRegionLock(CriticalName)};
2255   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2256                                                 std::end(Args));
2257   if (Hint) {
2258     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2259         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2260   }
2261   CommonActionTy Action(
2262       OMPBuilder.getOrCreateRuntimeFunction(
2263           CGM.getModule(),
2264           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2265       EnterArgs,
2266       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2267                                             OMPRTL___kmpc_end_critical),
2268       Args);
2269   CriticalOpGen.setAction(Action);
2270   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2271 }
2272 
2273 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2274                                        const RegionCodeGenTy &MasterOpGen,
2275                                        SourceLocation Loc) {
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   // if(__kmpc_master(ident_t *, gtid)) {
2279   //   MasterOpGen();
2280   //   __kmpc_end_master(ident_t *, gtid);
2281   // }
2282   // Prepare arguments and build a call to __kmpc_master
2283   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2284   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2285                             CGM.getModule(), OMPRTL___kmpc_master),
2286                         Args,
2287                         OMPBuilder.getOrCreateRuntimeFunction(
2288                             CGM.getModule(), OMPRTL___kmpc_end_master),
2289                         Args,
2290                         /*Conditional=*/true);
2291   MasterOpGen.setAction(Action);
2292   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2293   Action.Done(CGF);
2294 }
2295 
2296 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2297                                        const RegionCodeGenTy &MaskedOpGen,
2298                                        SourceLocation Loc, const Expr *Filter) {
2299   if (!CGF.HaveInsertPoint())
2300     return;
2301   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2302   //   MaskedOpGen();
2303   //   __kmpc_end_masked(iden_t *, gtid);
2304   // }
2305   // Prepare arguments and build a call to __kmpc_masked
2306   llvm::Value *FilterVal = Filter
2307                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2308                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2309   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2310                          FilterVal};
2311   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2312                             getThreadID(CGF, Loc)};
2313   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2314                             CGM.getModule(), OMPRTL___kmpc_masked),
2315                         Args,
2316                         OMPBuilder.getOrCreateRuntimeFunction(
2317                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2318                         ArgsEnd,
2319                         /*Conditional=*/true);
2320   MaskedOpGen.setAction(Action);
2321   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2322   Action.Done(CGF);
2323 }
2324 
2325 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2326                                         SourceLocation Loc) {
2327   if (!CGF.HaveInsertPoint())
2328     return;
2329   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330     OMPBuilder.createTaskyield(CGF.Builder);
2331   } else {
2332     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2333     llvm::Value *Args[] = {
2334         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2335         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2336     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2337                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2338                         Args);
2339   }
2340 
2341   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2342     Region->emitUntiedSwitch(CGF);
2343 }
2344 
2345 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2346                                           const RegionCodeGenTy &TaskgroupOpGen,
2347                                           SourceLocation Loc) {
2348   if (!CGF.HaveInsertPoint())
2349     return;
2350   // __kmpc_taskgroup(ident_t *, gtid);
2351   // TaskgroupOpGen();
2352   // __kmpc_end_taskgroup(ident_t *, gtid);
2353   // Prepare arguments and build a call to __kmpc_taskgroup
2354   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2355   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2357                         Args,
2358                         OMPBuilder.getOrCreateRuntimeFunction(
2359                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2360                         Args);
2361   TaskgroupOpGen.setAction(Action);
2362   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2363 }
2364 
2365 /// Given an array of pointers to variables, project the address of a
2366 /// given variable.
2367 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2368                                       unsigned Index, const VarDecl *Var) {
2369   // Pull out the pointer to the variable.
2370   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2371   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2372 
2373   Address Addr = Address::deprecated(Ptr, CGF.getContext().getDeclAlign(Var));
2374   Addr = CGF.Builder.CreateElementBitCast(
2375       Addr, CGF.ConvertTypeForMem(Var->getType()));
2376   return Addr;
2377 }
2378 
2379 static llvm::Value *emitCopyprivateCopyFunction(
2380     CodeGenModule &CGM, llvm::Type *ArgsType,
2381     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2382     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2383     SourceLocation Loc) {
2384   ASTContext &C = CGM.getContext();
2385   // void copy_func(void *LHSArg, void *RHSArg);
2386   FunctionArgList Args;
2387   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2388                            ImplicitParamDecl::Other);
2389   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390                            ImplicitParamDecl::Other);
2391   Args.push_back(&LHSArg);
2392   Args.push_back(&RHSArg);
2393   const auto &CGFI =
2394       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2395   std::string Name =
2396       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2397   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2398                                     llvm::GlobalValue::InternalLinkage, Name,
2399                                     &CGM.getModule());
2400   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2401   Fn->setDoesNotRecurse();
2402   CodeGenFunction CGF(CGM);
2403   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2404   // Dest = (void*[n])(LHSArg);
2405   // Src = (void*[n])(RHSArg);
2406   Address LHS = Address::deprecated(
2407       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2408           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
2409       CGF.getPointerAlign());
2410   Address RHS = Address::deprecated(
2411       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2412           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
2413       CGF.getPointerAlign());
2414   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2415   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2416   // ...
2417   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2418   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2419     const auto *DestVar =
2420         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2421     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2422 
2423     const auto *SrcVar =
2424         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2425     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2426 
2427     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2428     QualType Type = VD->getType();
2429     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2430   }
2431   CGF.FinishFunction();
2432   return Fn;
2433 }
2434 
2435 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2436                                        const RegionCodeGenTy &SingleOpGen,
2437                                        SourceLocation Loc,
2438                                        ArrayRef<const Expr *> CopyprivateVars,
2439                                        ArrayRef<const Expr *> SrcExprs,
2440                                        ArrayRef<const Expr *> DstExprs,
2441                                        ArrayRef<const Expr *> AssignmentOps) {
2442   if (!CGF.HaveInsertPoint())
2443     return;
2444   assert(CopyprivateVars.size() == SrcExprs.size() &&
2445          CopyprivateVars.size() == DstExprs.size() &&
2446          CopyprivateVars.size() == AssignmentOps.size());
2447   ASTContext &C = CGM.getContext();
2448   // int32 did_it = 0;
2449   // if(__kmpc_single(ident_t *, gtid)) {
2450   //   SingleOpGen();
2451   //   __kmpc_end_single(ident_t *, gtid);
2452   //   did_it = 1;
2453   // }
2454   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2455   // <copy_func>, did_it);
2456 
2457   Address DidIt = Address::invalid();
2458   if (!CopyprivateVars.empty()) {
2459     // int32 did_it = 0;
2460     QualType KmpInt32Ty =
2461         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2462     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2463     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2464   }
2465   // Prepare arguments and build a call to __kmpc_single
2466   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2467   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2468                             CGM.getModule(), OMPRTL___kmpc_single),
2469                         Args,
2470                         OMPBuilder.getOrCreateRuntimeFunction(
2471                             CGM.getModule(), OMPRTL___kmpc_end_single),
2472                         Args,
2473                         /*Conditional=*/true);
2474   SingleOpGen.setAction(Action);
2475   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2476   if (DidIt.isValid()) {
2477     // did_it = 1;
2478     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2479   }
2480   Action.Done(CGF);
2481   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2482   // <copy_func>, did_it);
2483   if (DidIt.isValid()) {
2484     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2485     QualType CopyprivateArrayTy = C.getConstantArrayType(
2486         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2487         /*IndexTypeQuals=*/0);
2488     // Create a list of all private variables for copyprivate.
2489     Address CopyprivateList =
2490         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2491     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2492       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2493       CGF.Builder.CreateStore(
2494           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2495               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2496               CGF.VoidPtrTy),
2497           Elem);
2498     }
2499     // Build function that copies private values from single region to all other
2500     // threads in the corresponding parallel region.
2501     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2502         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2503         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2504     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2505     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2506         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2507     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2508     llvm::Value *Args[] = {
2509         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2510         getThreadID(CGF, Loc),        // i32 <gtid>
2511         BufSize,                      // size_t <buf_size>
2512         CL.getPointer(),              // void *<copyprivate list>
2513         CpyFn,                        // void (*) (void *, void *) <copy_func>
2514         DidItVal                      // i32 did_it
2515     };
2516     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2517                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2518                         Args);
2519   }
2520 }
2521 
2522 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2523                                         const RegionCodeGenTy &OrderedOpGen,
2524                                         SourceLocation Loc, bool IsThreads) {
2525   if (!CGF.HaveInsertPoint())
2526     return;
2527   // __kmpc_ordered(ident_t *, gtid);
2528   // OrderedOpGen();
2529   // __kmpc_end_ordered(ident_t *, gtid);
2530   // Prepare arguments and build a call to __kmpc_ordered
2531   if (IsThreads) {
2532     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2533     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2534                               CGM.getModule(), OMPRTL___kmpc_ordered),
2535                           Args,
2536                           OMPBuilder.getOrCreateRuntimeFunction(
2537                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2538                           Args);
2539     OrderedOpGen.setAction(Action);
2540     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2541     return;
2542   }
2543   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2544 }
2545 
2546 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2547   unsigned Flags;
2548   if (Kind == OMPD_for)
2549     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2550   else if (Kind == OMPD_sections)
2551     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2552   else if (Kind == OMPD_single)
2553     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2554   else if (Kind == OMPD_barrier)
2555     Flags = OMP_IDENT_BARRIER_EXPL;
2556   else
2557     Flags = OMP_IDENT_BARRIER_IMPL;
2558   return Flags;
2559 }
2560 
2561 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2562     CodeGenFunction &CGF, const OMPLoopDirective &S,
2563     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2564   // Check if the loop directive is actually a doacross loop directive. In this
2565   // case choose static, 1 schedule.
2566   if (llvm::any_of(
2567           S.getClausesOfKind<OMPOrderedClause>(),
2568           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2569     ScheduleKind = OMPC_SCHEDULE_static;
2570     // Chunk size is 1 in this case.
2571     llvm::APInt ChunkSize(32, 1);
2572     ChunkExpr = IntegerLiteral::Create(
2573         CGF.getContext(), ChunkSize,
2574         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2575         SourceLocation());
2576   }
2577 }
2578 
2579 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2580                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2581                                       bool ForceSimpleCall) {
2582   // Check if we should use the OMPBuilder
2583   auto *OMPRegionInfo =
2584       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2585   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2586     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2587         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2588     return;
2589   }
2590 
2591   if (!CGF.HaveInsertPoint())
2592     return;
2593   // Build call __kmpc_cancel_barrier(loc, thread_id);
2594   // Build call __kmpc_barrier(loc, thread_id);
2595   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2596   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2597   // thread_id);
2598   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2599                          getThreadID(CGF, Loc)};
2600   if (OMPRegionInfo) {
2601     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2602       llvm::Value *Result = CGF.EmitRuntimeCall(
2603           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2604                                                 OMPRTL___kmpc_cancel_barrier),
2605           Args);
2606       if (EmitChecks) {
2607         // if (__kmpc_cancel_barrier()) {
2608         //   exit from construct;
2609         // }
2610         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2611         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2612         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2613         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2614         CGF.EmitBlock(ExitBB);
2615         //   exit from construct;
2616         CodeGenFunction::JumpDest CancelDestination =
2617             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2618         CGF.EmitBranchThroughCleanup(CancelDestination);
2619         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2620       }
2621       return;
2622     }
2623   }
2624   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2625                           CGM.getModule(), OMPRTL___kmpc_barrier),
2626                       Args);
2627 }
2628 
2629 /// Map the OpenMP loop schedule to the runtime enumeration.
2630 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2631                                           bool Chunked, bool Ordered) {
2632   switch (ScheduleKind) {
2633   case OMPC_SCHEDULE_static:
2634     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2635                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2636   case OMPC_SCHEDULE_dynamic:
2637     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2638   case OMPC_SCHEDULE_guided:
2639     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2640   case OMPC_SCHEDULE_runtime:
2641     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2642   case OMPC_SCHEDULE_auto:
2643     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2644   case OMPC_SCHEDULE_unknown:
2645     assert(!Chunked && "chunk was specified but schedule kind not known");
2646     return Ordered ? OMP_ord_static : OMP_sch_static;
2647   }
2648   llvm_unreachable("Unexpected runtime schedule");
2649 }
2650 
2651 /// Map the OpenMP distribute schedule to the runtime enumeration.
2652 static OpenMPSchedType
2653 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2654   // only static is allowed for dist_schedule
2655   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2656 }
2657 
2658 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2659                                          bool Chunked) const {
2660   OpenMPSchedType Schedule =
2661       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2662   return Schedule == OMP_sch_static;
2663 }
2664 
2665 bool CGOpenMPRuntime::isStaticNonchunked(
2666     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2667   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2668   return Schedule == OMP_dist_sch_static;
2669 }
2670 
2671 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2672                                       bool Chunked) const {
2673   OpenMPSchedType Schedule =
2674       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2675   return Schedule == OMP_sch_static_chunked;
2676 }
2677 
2678 bool CGOpenMPRuntime::isStaticChunked(
2679     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2680   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2681   return Schedule == OMP_dist_sch_static_chunked;
2682 }
2683 
2684 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2685   OpenMPSchedType Schedule =
2686       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2687   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2688   return Schedule != OMP_sch_static;
2689 }
2690 
2691 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2692                                   OpenMPScheduleClauseModifier M1,
2693                                   OpenMPScheduleClauseModifier M2) {
2694   int Modifier = 0;
2695   switch (M1) {
2696   case OMPC_SCHEDULE_MODIFIER_monotonic:
2697     Modifier = OMP_sch_modifier_monotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2700     Modifier = OMP_sch_modifier_nonmonotonic;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_simd:
2703     if (Schedule == OMP_sch_static_chunked)
2704       Schedule = OMP_sch_static_balanced_chunked;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_last:
2707   case OMPC_SCHEDULE_MODIFIER_unknown:
2708     break;
2709   }
2710   switch (M2) {
2711   case OMPC_SCHEDULE_MODIFIER_monotonic:
2712     Modifier = OMP_sch_modifier_monotonic;
2713     break;
2714   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2715     Modifier = OMP_sch_modifier_nonmonotonic;
2716     break;
2717   case OMPC_SCHEDULE_MODIFIER_simd:
2718     if (Schedule == OMP_sch_static_chunked)
2719       Schedule = OMP_sch_static_balanced_chunked;
2720     break;
2721   case OMPC_SCHEDULE_MODIFIER_last:
2722   case OMPC_SCHEDULE_MODIFIER_unknown:
2723     break;
2724   }
2725   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2726   // If the static schedule kind is specified or if the ordered clause is
2727   // specified, and if the nonmonotonic modifier is not specified, the effect is
2728   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2729   // modifier is specified, the effect is as if the nonmonotonic modifier is
2730   // specified.
2731   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2732     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2733           Schedule == OMP_sch_static_balanced_chunked ||
2734           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2735           Schedule == OMP_dist_sch_static_chunked ||
2736           Schedule == OMP_dist_sch_static))
2737       Modifier = OMP_sch_modifier_nonmonotonic;
2738   }
2739   return Schedule | Modifier;
2740 }
2741 
2742 void CGOpenMPRuntime::emitForDispatchInit(
2743     CodeGenFunction &CGF, SourceLocation Loc,
2744     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2745     bool Ordered, const DispatchRTInput &DispatchValues) {
2746   if (!CGF.HaveInsertPoint())
2747     return;
2748   OpenMPSchedType Schedule = getRuntimeSchedule(
2749       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2750   assert(Ordered ||
2751          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2752           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2753           Schedule != OMP_sch_static_balanced_chunked));
2754   // Call __kmpc_dispatch_init(
2755   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2756   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2757   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2758 
2759   // If the Chunk was not specified in the clause - use default value 1.
2760   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2761                                             : CGF.Builder.getIntN(IVSize, 1);
2762   llvm::Value *Args[] = {
2763       emitUpdateLocation(CGF, Loc),
2764       getThreadID(CGF, Loc),
2765       CGF.Builder.getInt32(addMonoNonMonoModifier(
2766           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2767       DispatchValues.LB,                                     // Lower
2768       DispatchValues.UB,                                     // Upper
2769       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2770       Chunk                                                  // Chunk
2771   };
2772   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2773 }
2774 
2775 static void emitForStaticInitCall(
2776     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2777     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2778     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2779     const CGOpenMPRuntime::StaticRTInput &Values) {
2780   if (!CGF.HaveInsertPoint())
2781     return;
2782 
2783   assert(!Values.Ordered);
2784   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2785          Schedule == OMP_sch_static_balanced_chunked ||
2786          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2787          Schedule == OMP_dist_sch_static ||
2788          Schedule == OMP_dist_sch_static_chunked);
2789 
2790   // Call __kmpc_for_static_init(
2791   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2792   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2793   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2794   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2795   llvm::Value *Chunk = Values.Chunk;
2796   if (Chunk == nullptr) {
2797     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2798             Schedule == OMP_dist_sch_static) &&
2799            "expected static non-chunked schedule");
2800     // If the Chunk was not specified in the clause - use default value 1.
2801     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2802   } else {
2803     assert((Schedule == OMP_sch_static_chunked ||
2804             Schedule == OMP_sch_static_balanced_chunked ||
2805             Schedule == OMP_ord_static_chunked ||
2806             Schedule == OMP_dist_sch_static_chunked) &&
2807            "expected static chunked schedule");
2808   }
2809   llvm::Value *Args[] = {
2810       UpdateLocation,
2811       ThreadId,
2812       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2813                                                   M2)), // Schedule type
2814       Values.IL.getPointer(),                           // &isLastIter
2815       Values.LB.getPointer(),                           // &LB
2816       Values.UB.getPointer(),                           // &UB
2817       Values.ST.getPointer(),                           // &Stride
2818       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2819       Chunk                                             // Chunk
2820   };
2821   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2822 }
2823 
2824 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2825                                         SourceLocation Loc,
2826                                         OpenMPDirectiveKind DKind,
2827                                         const OpenMPScheduleTy &ScheduleKind,
2828                                         const StaticRTInput &Values) {
2829   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2830       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2831   assert(isOpenMPWorksharingDirective(DKind) &&
2832          "Expected loop-based or sections-based directive.");
2833   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2834                                              isOpenMPLoopDirective(DKind)
2835                                                  ? OMP_IDENT_WORK_LOOP
2836                                                  : OMP_IDENT_WORK_SECTIONS);
2837   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2838   llvm::FunctionCallee StaticInitFunction =
2839       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2840   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2841   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2842                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2843 }
2844 
2845 void CGOpenMPRuntime::emitDistributeStaticInit(
2846     CodeGenFunction &CGF, SourceLocation Loc,
2847     OpenMPDistScheduleClauseKind SchedKind,
2848     const CGOpenMPRuntime::StaticRTInput &Values) {
2849   OpenMPSchedType ScheduleNum =
2850       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2851   llvm::Value *UpdatedLocation =
2852       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2853   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2854   llvm::FunctionCallee StaticInitFunction;
2855   bool isGPUDistribute =
2856       CGM.getLangOpts().OpenMPIsDevice &&
2857       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2858   StaticInitFunction = createForStaticInitFunction(
2859       Values.IVSize, Values.IVSigned, isGPUDistribute);
2860 
2861   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2862                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2863                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2864 }
2865 
2866 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2867                                           SourceLocation Loc,
2868                                           OpenMPDirectiveKind DKind) {
2869   if (!CGF.HaveInsertPoint())
2870     return;
2871   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2872   llvm::Value *Args[] = {
2873       emitUpdateLocation(CGF, Loc,
2874                          isOpenMPDistributeDirective(DKind)
2875                              ? OMP_IDENT_WORK_DISTRIBUTE
2876                              : isOpenMPLoopDirective(DKind)
2877                                    ? OMP_IDENT_WORK_LOOP
2878                                    : OMP_IDENT_WORK_SECTIONS),
2879       getThreadID(CGF, Loc)};
2880   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2881   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2882       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2883     CGF.EmitRuntimeCall(
2884         OMPBuilder.getOrCreateRuntimeFunction(
2885             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2886         Args);
2887   else
2888     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2889                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2890                         Args);
2891 }
2892 
2893 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2894                                                  SourceLocation Loc,
2895                                                  unsigned IVSize,
2896                                                  bool IVSigned) {
2897   if (!CGF.HaveInsertPoint())
2898     return;
2899   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2900   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2901   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2902 }
2903 
2904 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2905                                           SourceLocation Loc, unsigned IVSize,
2906                                           bool IVSigned, Address IL,
2907                                           Address LB, Address UB,
2908                                           Address ST) {
2909   // Call __kmpc_dispatch_next(
2910   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2911   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2912   //          kmp_int[32|64] *p_stride);
2913   llvm::Value *Args[] = {
2914       emitUpdateLocation(CGF, Loc),
2915       getThreadID(CGF, Loc),
2916       IL.getPointer(), // &isLastIter
2917       LB.getPointer(), // &Lower
2918       UB.getPointer(), // &Upper
2919       ST.getPointer()  // &Stride
2920   };
2921   llvm::Value *Call =
2922       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2923   return CGF.EmitScalarConversion(
2924       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2925       CGF.getContext().BoolTy, Loc);
2926 }
2927 
2928 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2929                                            llvm::Value *NumThreads,
2930                                            SourceLocation Loc) {
2931   if (!CGF.HaveInsertPoint())
2932     return;
2933   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2937   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2943                                          ProcBindKind ProcBind,
2944                                          SourceLocation Loc) {
2945   if (!CGF.HaveInsertPoint())
2946     return;
2947   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2948   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2949   llvm::Value *Args[] = {
2950       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2951       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2952   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2953                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2954                       Args);
2955 }
2956 
2957 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2958                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2959   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2960     OMPBuilder.createFlush(CGF.Builder);
2961   } else {
2962     if (!CGF.HaveInsertPoint())
2963       return;
2964     // Build call void __kmpc_flush(ident_t *loc)
2965     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2966                             CGM.getModule(), OMPRTL___kmpc_flush),
2967                         emitUpdateLocation(CGF, Loc));
2968   }
2969 }
2970 
2971 namespace {
2972 /// Indexes of fields for type kmp_task_t.
2973 enum KmpTaskTFields {
2974   /// List of shared variables.
2975   KmpTaskTShareds,
2976   /// Task routine.
2977   KmpTaskTRoutine,
2978   /// Partition id for the untied tasks.
2979   KmpTaskTPartId,
2980   /// Function with call of destructors for private variables.
2981   Data1,
2982   /// Task priority.
2983   Data2,
2984   /// (Taskloops only) Lower bound.
2985   KmpTaskTLowerBound,
2986   /// (Taskloops only) Upper bound.
2987   KmpTaskTUpperBound,
2988   /// (Taskloops only) Stride.
2989   KmpTaskTStride,
2990   /// (Taskloops only) Is last iteration flag.
2991   KmpTaskTLastIter,
2992   /// (Taskloops only) Reduction data.
2993   KmpTaskTReductions,
2994 };
2995 } // anonymous namespace
2996 
2997 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2998   return OffloadEntriesTargetRegion.empty() &&
2999          OffloadEntriesDeviceGlobalVar.empty();
3000 }
3001 
3002 /// Initialize target region entry.
3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3004     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3005                                     StringRef ParentName, unsigned LineNum,
3006                                     unsigned Order) {
3007   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3008                                              "only required for the device "
3009                                              "code generation.");
3010   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3011       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3012                                    OMPTargetRegionEntryTargetRegion);
3013   ++OffloadingEntriesNum;
3014 }
3015 
3016 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3017     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3018                                   StringRef ParentName, unsigned LineNum,
3019                                   llvm::Constant *Addr, llvm::Constant *ID,
3020                                   OMPTargetRegionEntryKind Flags) {
3021   // If we are emitting code for a target, the entry is already initialized,
3022   // only has to be registered.
3023   if (CGM.getLangOpts().OpenMPIsDevice) {
3024     // This could happen if the device compilation is invoked standalone.
3025     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3026       return;
3027     auto &Entry =
3028         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3029     Entry.setAddress(Addr);
3030     Entry.setID(ID);
3031     Entry.setFlags(Flags);
3032   } else {
3033     if (Flags ==
3034             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3035         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3036                                  /*IgnoreAddressId*/ true))
3037       return;
3038     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3039            "Target region entry already registered!");
3040     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3041     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3042     ++OffloadingEntriesNum;
3043   }
3044 }
3045 
3046 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3047     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3048     bool IgnoreAddressId) const {
3049   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3050   if (PerDevice == OffloadEntriesTargetRegion.end())
3051     return false;
3052   auto PerFile = PerDevice->second.find(FileID);
3053   if (PerFile == PerDevice->second.end())
3054     return false;
3055   auto PerParentName = PerFile->second.find(ParentName);
3056   if (PerParentName == PerFile->second.end())
3057     return false;
3058   auto PerLine = PerParentName->second.find(LineNum);
3059   if (PerLine == PerParentName->second.end())
3060     return false;
3061   // Fail if this entry is already registered.
3062   if (!IgnoreAddressId &&
3063       (PerLine->second.getAddress() || PerLine->second.getID()))
3064     return false;
3065   return true;
3066 }
3067 
3068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3069     const OffloadTargetRegionEntryInfoActTy &Action) {
3070   // Scan all target region entries and perform the provided action.
3071   for (const auto &D : OffloadEntriesTargetRegion)
3072     for (const auto &F : D.second)
3073       for (const auto &P : F.second)
3074         for (const auto &L : P.second)
3075           Action(D.first, F.first, P.first(), L.first, L.second);
3076 }
3077 
3078 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3079     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3080                                        OMPTargetGlobalVarEntryKind Flags,
3081                                        unsigned Order) {
3082   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3083                                              "only required for the device "
3084                                              "code generation.");
3085   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3086   ++OffloadingEntriesNum;
3087 }
3088 
3089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3090     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3091                                      CharUnits VarSize,
3092                                      OMPTargetGlobalVarEntryKind Flags,
3093                                      llvm::GlobalValue::LinkageTypes Linkage) {
3094   if (CGM.getLangOpts().OpenMPIsDevice) {
3095     // This could happen if the device compilation is invoked standalone.
3096     if (!hasDeviceGlobalVarEntryInfo(VarName))
3097       return;
3098     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3099     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3100       if (Entry.getVarSize().isZero()) {
3101         Entry.setVarSize(VarSize);
3102         Entry.setLinkage(Linkage);
3103       }
3104       return;
3105     }
3106     Entry.setVarSize(VarSize);
3107     Entry.setLinkage(Linkage);
3108     Entry.setAddress(Addr);
3109   } else {
3110     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3111       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3112       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3113              "Entry not initialized!");
3114       if (Entry.getVarSize().isZero()) {
3115         Entry.setVarSize(VarSize);
3116         Entry.setLinkage(Linkage);
3117       }
3118       return;
3119     }
3120     OffloadEntriesDeviceGlobalVar.try_emplace(
3121         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3122     ++OffloadingEntriesNum;
3123   }
3124 }
3125 
3126 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3127     actOnDeviceGlobalVarEntriesInfo(
3128         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3129   // Scan all target region entries and perform the provided action.
3130   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3131     Action(E.getKey(), E.getValue());
3132 }
3133 
3134 void CGOpenMPRuntime::createOffloadEntry(
3135     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3136     llvm::GlobalValue::LinkageTypes Linkage) {
3137   StringRef Name = Addr->getName();
3138   llvm::Module &M = CGM.getModule();
3139   llvm::LLVMContext &C = M.getContext();
3140 
3141   // Create constant string with the name.
3142   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3143 
3144   std::string StringName = getName({"omp_offloading", "entry_name"});
3145   auto *Str = new llvm::GlobalVariable(
3146       M, StrPtrInit->getType(), /*isConstant=*/true,
3147       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3148   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3149 
3150   llvm::Constant *Data[] = {
3151       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3152       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3153       llvm::ConstantInt::get(CGM.SizeTy, Size),
3154       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3155       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3156   std::string EntryName = getName({"omp_offloading", "entry", ""});
3157   llvm::GlobalVariable *Entry = createGlobalStruct(
3158       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3159       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3160 
3161   // The entry has to be created in the section the linker expects it to be.
3162   Entry->setSection("omp_offloading_entries");
3163 }
3164 
3165 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3166   // Emit the offloading entries and metadata so that the device codegen side
3167   // can easily figure out what to emit. The produced metadata looks like
3168   // this:
3169   //
3170   // !omp_offload.info = !{!1, ...}
3171   //
3172   // Right now we only generate metadata for function that contain target
3173   // regions.
3174 
3175   // If we are in simd mode or there are no entries, we don't need to do
3176   // anything.
3177   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3178     return;
3179 
3180   llvm::Module &M = CGM.getModule();
3181   llvm::LLVMContext &C = M.getContext();
3182   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3183                          SourceLocation, StringRef>,
3184               16>
3185       OrderedEntries(OffloadEntriesInfoManager.size());
3186   llvm::SmallVector<StringRef, 16> ParentFunctions(
3187       OffloadEntriesInfoManager.size());
3188 
3189   // Auxiliary methods to create metadata values and strings.
3190   auto &&GetMDInt = [this](unsigned V) {
3191     return llvm::ConstantAsMetadata::get(
3192         llvm::ConstantInt::get(CGM.Int32Ty, V));
3193   };
3194 
3195   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3196 
3197   // Create the offloading info metadata node.
3198   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3199 
3200   // Create function that emits metadata for each target region entry;
3201   auto &&TargetRegionMetadataEmitter =
3202       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3203        &GetMDString](
3204           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3205           unsigned Line,
3206           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3207         // Generate metadata for target regions. Each entry of this metadata
3208         // contains:
3209         // - Entry 0 -> Kind of this type of metadata (0).
3210         // - Entry 1 -> Device ID of the file where the entry was identified.
3211         // - Entry 2 -> File ID of the file where the entry was identified.
3212         // - Entry 3 -> Mangled name of the function where the entry was
3213         // identified.
3214         // - Entry 4 -> Line in the file where the entry was identified.
3215         // - Entry 5 -> Order the entry was created.
3216         // The first element of the metadata node is the kind.
3217         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3218                                  GetMDInt(FileID),      GetMDString(ParentName),
3219                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3220 
3221         SourceLocation Loc;
3222         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3223                   E = CGM.getContext().getSourceManager().fileinfo_end();
3224              I != E; ++I) {
3225           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3226               I->getFirst()->getUniqueID().getFile() == FileID) {
3227             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3228                 I->getFirst(), Line, 1);
3229             break;
3230           }
3231         }
3232         // Save this entry in the right position of the ordered entries array.
3233         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3234         ParentFunctions[E.getOrder()] = ParentName;
3235 
3236         // Add metadata to the named metadata node.
3237         MD->addOperand(llvm::MDNode::get(C, Ops));
3238       };
3239 
3240   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3241       TargetRegionMetadataEmitter);
3242 
3243   // Create function that emits metadata for each device global variable entry;
3244   auto &&DeviceGlobalVarMetadataEmitter =
3245       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3246        MD](StringRef MangledName,
3247            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3248                &E) {
3249         // Generate metadata for global variables. Each entry of this metadata
3250         // contains:
3251         // - Entry 0 -> Kind of this type of metadata (1).
3252         // - Entry 1 -> Mangled name of the variable.
3253         // - Entry 2 -> Declare target kind.
3254         // - Entry 3 -> Order the entry was created.
3255         // The first element of the metadata node is the kind.
3256         llvm::Metadata *Ops[] = {
3257             GetMDInt(E.getKind()), GetMDString(MangledName),
3258             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3259 
3260         // Save this entry in the right position of the ordered entries array.
3261         OrderedEntries[E.getOrder()] =
3262             std::make_tuple(&E, SourceLocation(), MangledName);
3263 
3264         // Add metadata to the named metadata node.
3265         MD->addOperand(llvm::MDNode::get(C, Ops));
3266       };
3267 
3268   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3269       DeviceGlobalVarMetadataEmitter);
3270 
3271   for (const auto &E : OrderedEntries) {
3272     assert(std::get<0>(E) && "All ordered entries must exist!");
3273     if (const auto *CE =
3274             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3275                 std::get<0>(E))) {
3276       if (!CE->getID() || !CE->getAddress()) {
3277         // Do not blame the entry if the parent funtion is not emitted.
3278         StringRef FnName = ParentFunctions[CE->getOrder()];
3279         if (!CGM.GetGlobalValue(FnName))
3280           continue;
3281         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3282             DiagnosticsEngine::Error,
3283             "Offloading entry for target region in %0 is incorrect: either the "
3284             "address or the ID is invalid.");
3285         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3286         continue;
3287       }
3288       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3289                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3290     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3291                                              OffloadEntryInfoDeviceGlobalVar>(
3292                    std::get<0>(E))) {
3293       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3294           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3295               CE->getFlags());
3296       switch (Flags) {
3297       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3298         if (CGM.getLangOpts().OpenMPIsDevice &&
3299             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3300           continue;
3301         if (!CE->getAddress()) {
3302           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303               DiagnosticsEngine::Error, "Offloading entry for declare target "
3304                                         "variable %0 is incorrect: the "
3305                                         "address is invalid.");
3306           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3307           continue;
3308         }
3309         // The vaiable has no definition - no need to add the entry.
3310         if (CE->getVarSize().isZero())
3311           continue;
3312         break;
3313       }
3314       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3315         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3316                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3317                "Declaret target link address is set.");
3318         if (CGM.getLangOpts().OpenMPIsDevice)
3319           continue;
3320         if (!CE->getAddress()) {
3321           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3322               DiagnosticsEngine::Error,
3323               "Offloading entry for declare target variable is incorrect: the "
3324               "address is invalid.");
3325           CGM.getDiags().Report(DiagID);
3326           continue;
3327         }
3328         break;
3329       }
3330       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3331                          CE->getVarSize().getQuantity(), Flags,
3332                          CE->getLinkage());
3333     } else {
3334       llvm_unreachable("Unsupported entry kind.");
3335     }
3336   }
3337 }
3338 
3339 /// Loads all the offload entries information from the host IR
3340 /// metadata.
3341 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3342   // If we are in target mode, load the metadata from the host IR. This code has
3343   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3344 
3345   if (!CGM.getLangOpts().OpenMPIsDevice)
3346     return;
3347 
3348   if (CGM.getLangOpts().OMPHostIRFile.empty())
3349     return;
3350 
3351   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3352   if (auto EC = Buf.getError()) {
3353     CGM.getDiags().Report(diag::err_cannot_open_file)
3354         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3355     return;
3356   }
3357 
3358   llvm::LLVMContext C;
3359   auto ME = expectedToErrorOrAndEmitErrors(
3360       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3361 
3362   if (auto EC = ME.getError()) {
3363     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3364         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3365     CGM.getDiags().Report(DiagID)
3366         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3367     return;
3368   }
3369 
3370   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3371   if (!MD)
3372     return;
3373 
3374   for (llvm::MDNode *MN : MD->operands()) {
3375     auto &&GetMDInt = [MN](unsigned Idx) {
3376       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3377       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3378     };
3379 
3380     auto &&GetMDString = [MN](unsigned Idx) {
3381       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3382       return V->getString();
3383     };
3384 
3385     switch (GetMDInt(0)) {
3386     default:
3387       llvm_unreachable("Unexpected metadata!");
3388       break;
3389     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3390         OffloadingEntryInfoTargetRegion:
3391       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3392           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3393           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3394           /*Order=*/GetMDInt(5));
3395       break;
3396     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3397         OffloadingEntryInfoDeviceGlobalVar:
3398       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3399           /*MangledName=*/GetMDString(1),
3400           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3401               /*Flags=*/GetMDInt(2)),
3402           /*Order=*/GetMDInt(3));
3403       break;
3404     }
3405   }
3406 }
3407 
3408 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3409   if (!KmpRoutineEntryPtrTy) {
3410     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3411     ASTContext &C = CGM.getContext();
3412     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3413     FunctionProtoType::ExtProtoInfo EPI;
3414     KmpRoutineEntryPtrQTy = C.getPointerType(
3415         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3416     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3417   }
3418 }
3419 
3420 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3421   // Make sure the type of the entry is already created. This is the type we
3422   // have to create:
3423   // struct __tgt_offload_entry{
3424   //   void      *addr;       // Pointer to the offload entry info.
3425   //                          // (function or global)
3426   //   char      *name;       // Name of the function or global.
3427   //   size_t     size;       // Size of the entry info (0 if it a function).
3428   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3429   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3430   // };
3431   if (TgtOffloadEntryQTy.isNull()) {
3432     ASTContext &C = CGM.getContext();
3433     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3434     RD->startDefinition();
3435     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3436     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3437     addFieldToRecordDecl(C, RD, C.getSizeType());
3438     addFieldToRecordDecl(
3439         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3440     addFieldToRecordDecl(
3441         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3442     RD->completeDefinition();
3443     RD->addAttr(PackedAttr::CreateImplicit(C));
3444     TgtOffloadEntryQTy = C.getRecordType(RD);
3445   }
3446   return TgtOffloadEntryQTy;
3447 }
3448 
3449 namespace {
3450 struct PrivateHelpersTy {
3451   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3452                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3453       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3454         PrivateElemInit(PrivateElemInit) {}
3455   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3456   const Expr *OriginalRef = nullptr;
3457   const VarDecl *Original = nullptr;
3458   const VarDecl *PrivateCopy = nullptr;
3459   const VarDecl *PrivateElemInit = nullptr;
3460   bool isLocalPrivate() const {
3461     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3462   }
3463 };
3464 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3465 } // anonymous namespace
3466 
3467 static bool isAllocatableDecl(const VarDecl *VD) {
3468   const VarDecl *CVD = VD->getCanonicalDecl();
3469   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3470     return false;
3471   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3472   // Use the default allocation.
3473   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3474            !AA->getAllocator());
3475 }
3476 
3477 static RecordDecl *
3478 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3479   if (!Privates.empty()) {
3480     ASTContext &C = CGM.getContext();
3481     // Build struct .kmp_privates_t. {
3482     //         /*  private vars  */
3483     //       };
3484     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3485     RD->startDefinition();
3486     for (const auto &Pair : Privates) {
3487       const VarDecl *VD = Pair.second.Original;
3488       QualType Type = VD->getType().getNonReferenceType();
3489       // If the private variable is a local variable with lvalue ref type,
3490       // allocate the pointer instead of the pointee type.
3491       if (Pair.second.isLocalPrivate()) {
3492         if (VD->getType()->isLValueReferenceType())
3493           Type = C.getPointerType(Type);
3494         if (isAllocatableDecl(VD))
3495           Type = C.getPointerType(Type);
3496       }
3497       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3498       if (VD->hasAttrs()) {
3499         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3500              E(VD->getAttrs().end());
3501              I != E; ++I)
3502           FD->addAttr(*I);
3503       }
3504     }
3505     RD->completeDefinition();
3506     return RD;
3507   }
3508   return nullptr;
3509 }
3510 
3511 static RecordDecl *
3512 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3513                          QualType KmpInt32Ty,
3514                          QualType KmpRoutineEntryPointerQTy) {
3515   ASTContext &C = CGM.getContext();
3516   // Build struct kmp_task_t {
3517   //         void *              shareds;
3518   //         kmp_routine_entry_t routine;
3519   //         kmp_int32           part_id;
3520   //         kmp_cmplrdata_t data1;
3521   //         kmp_cmplrdata_t data2;
3522   // For taskloops additional fields:
3523   //         kmp_uint64          lb;
3524   //         kmp_uint64          ub;
3525   //         kmp_int64           st;
3526   //         kmp_int32           liter;
3527   //         void *              reductions;
3528   //       };
3529   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3530   UD->startDefinition();
3531   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3532   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3533   UD->completeDefinition();
3534   QualType KmpCmplrdataTy = C.getRecordType(UD);
3535   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3536   RD->startDefinition();
3537   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3538   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3539   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3540   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3541   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3542   if (isOpenMPTaskLoopDirective(Kind)) {
3543     QualType KmpUInt64Ty =
3544         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3545     QualType KmpInt64Ty =
3546         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3547     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3548     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3549     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3550     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3551     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3552   }
3553   RD->completeDefinition();
3554   return RD;
3555 }
3556 
3557 static RecordDecl *
3558 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3559                                      ArrayRef<PrivateDataTy> Privates) {
3560   ASTContext &C = CGM.getContext();
3561   // Build struct kmp_task_t_with_privates {
3562   //         kmp_task_t task_data;
3563   //         .kmp_privates_t. privates;
3564   //       };
3565   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3566   RD->startDefinition();
3567   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3568   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3569     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3570   RD->completeDefinition();
3571   return RD;
3572 }
3573 
3574 /// Emit a proxy function which accepts kmp_task_t as the second
3575 /// argument.
3576 /// \code
3577 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3578 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3579 ///   For taskloops:
3580 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581 ///   tt->reductions, tt->shareds);
3582 ///   return 0;
3583 /// }
3584 /// \endcode
3585 static llvm::Function *
3586 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3587                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3588                       QualType KmpTaskTWithPrivatesPtrQTy,
3589                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3590                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3591                       llvm::Value *TaskPrivatesMap) {
3592   ASTContext &C = CGM.getContext();
3593   FunctionArgList Args;
3594   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3595                             ImplicitParamDecl::Other);
3596   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3597                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3598                                 ImplicitParamDecl::Other);
3599   Args.push_back(&GtidArg);
3600   Args.push_back(&TaskTypeArg);
3601   const auto &TaskEntryFnInfo =
3602       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3603   llvm::FunctionType *TaskEntryTy =
3604       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3605   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3606   auto *TaskEntry = llvm::Function::Create(
3607       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3608   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3609   TaskEntry->setDoesNotRecurse();
3610   CodeGenFunction CGF(CGM);
3611   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3612                     Loc, Loc);
3613 
3614   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3615   // tt,
3616   // For taskloops:
3617   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3618   // tt->task_data.shareds);
3619   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3620       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3621   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3622       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3623       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3624   const auto *KmpTaskTWithPrivatesQTyRD =
3625       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3626   LValue Base =
3627       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3628   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3629   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3630   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3631   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3632 
3633   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3634   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3635   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3636       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3637       CGF.ConvertTypeForMem(SharedsPtrTy));
3638 
3639   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3640   llvm::Value *PrivatesParam;
3641   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3642     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3643     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3644         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3645   } else {
3646     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3647   }
3648 
3649   llvm::Value *CommonArgs[] = {
3650       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3651       CGF.Builder
3652           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3653                                                CGF.VoidPtrTy, CGF.Int8Ty)
3654           .getPointer()};
3655   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3656                                           std::end(CommonArgs));
3657   if (isOpenMPTaskLoopDirective(Kind)) {
3658     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3659     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3660     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3661     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3662     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3663     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3664     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3665     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3666     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3667     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3668     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3669     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3670     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3671     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3672     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3673     CallArgs.push_back(LBParam);
3674     CallArgs.push_back(UBParam);
3675     CallArgs.push_back(StParam);
3676     CallArgs.push_back(LIParam);
3677     CallArgs.push_back(RParam);
3678   }
3679   CallArgs.push_back(SharedsParam);
3680 
3681   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3682                                                   CallArgs);
3683   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3684                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3685   CGF.FinishFunction();
3686   return TaskEntry;
3687 }
3688 
3689 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3690                                             SourceLocation Loc,
3691                                             QualType KmpInt32Ty,
3692                                             QualType KmpTaskTWithPrivatesPtrQTy,
3693                                             QualType KmpTaskTWithPrivatesQTy) {
3694   ASTContext &C = CGM.getContext();
3695   FunctionArgList Args;
3696   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3697                             ImplicitParamDecl::Other);
3698   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3699                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3700                                 ImplicitParamDecl::Other);
3701   Args.push_back(&GtidArg);
3702   Args.push_back(&TaskTypeArg);
3703   const auto &DestructorFnInfo =
3704       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3705   llvm::FunctionType *DestructorFnTy =
3706       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3707   std::string Name =
3708       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3709   auto *DestructorFn =
3710       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3711                              Name, &CGM.getModule());
3712   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3713                                     DestructorFnInfo);
3714   DestructorFn->setDoesNotRecurse();
3715   CodeGenFunction CGF(CGM);
3716   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3717                     Args, Loc, Loc);
3718 
3719   LValue Base = CGF.EmitLoadOfPointerLValue(
3720       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3721       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3722   const auto *KmpTaskTWithPrivatesQTyRD =
3723       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3724   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3725   Base = CGF.EmitLValueForField(Base, *FI);
3726   for (const auto *Field :
3727        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3728     if (QualType::DestructionKind DtorKind =
3729             Field->getType().isDestructedType()) {
3730       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3731       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3732     }
3733   }
3734   CGF.FinishFunction();
3735   return DestructorFn;
3736 }
3737 
3738 /// Emit a privates mapping function for correct handling of private and
3739 /// firstprivate variables.
3740 /// \code
3741 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3742 /// **noalias priv1,...,  <tyn> **noalias privn) {
3743 ///   *priv1 = &.privates.priv1;
3744 ///   ...;
3745 ///   *privn = &.privates.privn;
3746 /// }
3747 /// \endcode
3748 static llvm::Value *
3749 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3750                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3751                                ArrayRef<PrivateDataTy> Privates) {
3752   ASTContext &C = CGM.getContext();
3753   FunctionArgList Args;
3754   ImplicitParamDecl TaskPrivatesArg(
3755       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3756       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3757       ImplicitParamDecl::Other);
3758   Args.push_back(&TaskPrivatesArg);
3759   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3760   unsigned Counter = 1;
3761   for (const Expr *E : Data.PrivateVars) {
3762     Args.push_back(ImplicitParamDecl::Create(
3763         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764         C.getPointerType(C.getPointerType(E->getType()))
3765             .withConst()
3766             .withRestrict(),
3767         ImplicitParamDecl::Other));
3768     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769     PrivateVarsPos[VD] = Counter;
3770     ++Counter;
3771   }
3772   for (const Expr *E : Data.FirstprivateVars) {
3773     Args.push_back(ImplicitParamDecl::Create(
3774         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3775         C.getPointerType(C.getPointerType(E->getType()))
3776             .withConst()
3777             .withRestrict(),
3778         ImplicitParamDecl::Other));
3779     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3780     PrivateVarsPos[VD] = Counter;
3781     ++Counter;
3782   }
3783   for (const Expr *E : Data.LastprivateVars) {
3784     Args.push_back(ImplicitParamDecl::Create(
3785         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3786         C.getPointerType(C.getPointerType(E->getType()))
3787             .withConst()
3788             .withRestrict(),
3789         ImplicitParamDecl::Other));
3790     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3791     PrivateVarsPos[VD] = Counter;
3792     ++Counter;
3793   }
3794   for (const VarDecl *VD : Data.PrivateLocals) {
3795     QualType Ty = VD->getType().getNonReferenceType();
3796     if (VD->getType()->isLValueReferenceType())
3797       Ty = C.getPointerType(Ty);
3798     if (isAllocatableDecl(VD))
3799       Ty = C.getPointerType(Ty);
3800     Args.push_back(ImplicitParamDecl::Create(
3801         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3802         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3803         ImplicitParamDecl::Other));
3804     PrivateVarsPos[VD] = Counter;
3805     ++Counter;
3806   }
3807   const auto &TaskPrivatesMapFnInfo =
3808       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3809   llvm::FunctionType *TaskPrivatesMapTy =
3810       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3811   std::string Name =
3812       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3813   auto *TaskPrivatesMap = llvm::Function::Create(
3814       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3815       &CGM.getModule());
3816   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3817                                     TaskPrivatesMapFnInfo);
3818   if (CGM.getLangOpts().Optimize) {
3819     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3820     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3821     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3822   }
3823   CodeGenFunction CGF(CGM);
3824   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3825                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3826 
3827   // *privi = &.privates.privi;
3828   LValue Base = CGF.EmitLoadOfPointerLValue(
3829       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3830       TaskPrivatesArg.getType()->castAs<PointerType>());
3831   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3832   Counter = 0;
3833   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3834     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3835     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3836     LValue RefLVal =
3837         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3838     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3839         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3840     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3841     ++Counter;
3842   }
3843   CGF.FinishFunction();
3844   return TaskPrivatesMap;
3845 }
3846 
3847 /// Emit initialization for private variables in task-based directives.
3848 static void emitPrivatesInit(CodeGenFunction &CGF,
3849                              const OMPExecutableDirective &D,
3850                              Address KmpTaskSharedsPtr, LValue TDBase,
3851                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3852                              QualType SharedsTy, QualType SharedsPtrTy,
3853                              const OMPTaskDataTy &Data,
3854                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3855   ASTContext &C = CGF.getContext();
3856   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3857   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3858   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3859                                  ? OMPD_taskloop
3860                                  : OMPD_task;
3861   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3862   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3863   LValue SrcBase;
3864   bool IsTargetTask =
3865       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3866       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3867   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3868   // PointersArray, SizesArray, and MappersArray. The original variables for
3869   // these arrays are not captured and we get their addresses explicitly.
3870   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3871       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3872     SrcBase = CGF.MakeAddrLValue(
3873         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3874             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3875             CGF.ConvertTypeForMem(SharedsTy)),
3876         SharedsTy);
3877   }
3878   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3879   for (const PrivateDataTy &Pair : Privates) {
3880     // Do not initialize private locals.
3881     if (Pair.second.isLocalPrivate()) {
3882       ++FI;
3883       continue;
3884     }
3885     const VarDecl *VD = Pair.second.PrivateCopy;
3886     const Expr *Init = VD->getAnyInitializer();
3887     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3888                              !CGF.isTrivialInitializer(Init)))) {
3889       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3890       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3891         const VarDecl *OriginalVD = Pair.second.Original;
3892         // Check if the variable is the target-based BasePointersArray,
3893         // PointersArray, SizesArray, or MappersArray.
3894         LValue SharedRefLValue;
3895         QualType Type = PrivateLValue.getType();
3896         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3897         if (IsTargetTask && !SharedField) {
3898           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3899                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3900                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3901                          ->getNumParams() == 0 &&
3902                  isa<TranslationUnitDecl>(
3903                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3904                          ->getDeclContext()) &&
3905                  "Expected artificial target data variable.");
3906           SharedRefLValue =
3907               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3908         } else if (ForDup) {
3909           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3910           SharedRefLValue = CGF.MakeAddrLValue(
3911               SharedRefLValue.getAddress(CGF).withAlignment(
3912                   C.getDeclAlign(OriginalVD)),
3913               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3914               SharedRefLValue.getTBAAInfo());
3915         } else if (CGF.LambdaCaptureFields.count(
3916                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3917                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3918           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3919         } else {
3920           // Processing for implicitly captured variables.
3921           InlinedOpenMPRegionRAII Region(
3922               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3923               /*HasCancel=*/false, /*NoInheritance=*/true);
3924           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3925         }
3926         if (Type->isArrayType()) {
3927           // Initialize firstprivate array.
3928           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3929             // Perform simple memcpy.
3930             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3931           } else {
3932             // Initialize firstprivate array using element-by-element
3933             // initialization.
3934             CGF.EmitOMPAggregateAssign(
3935                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3936                 Type,
3937                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3938                                                   Address SrcElement) {
3939                   // Clean up any temporaries needed by the initialization.
3940                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3941                   InitScope.addPrivate(
3942                       Elem, [SrcElement]() -> Address { return SrcElement; });
3943                   (void)InitScope.Privatize();
3944                   // Emit initialization for single element.
3945                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3946                       CGF, &CapturesInfo);
3947                   CGF.EmitAnyExprToMem(Init, DestElement,
3948                                        Init->getType().getQualifiers(),
3949                                        /*IsInitializer=*/false);
3950                 });
3951           }
3952         } else {
3953           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3954           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3955             return SharedRefLValue.getAddress(CGF);
3956           });
3957           (void)InitScope.Privatize();
3958           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3959           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3960                              /*capturedByInit=*/false);
3961         }
3962       } else {
3963         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3964       }
3965     }
3966     ++FI;
3967   }
3968 }
3969 
3970 /// Check if duplication function is required for taskloops.
3971 static bool checkInitIsRequired(CodeGenFunction &CGF,
3972                                 ArrayRef<PrivateDataTy> Privates) {
3973   bool InitRequired = false;
3974   for (const PrivateDataTy &Pair : Privates) {
3975     if (Pair.second.isLocalPrivate())
3976       continue;
3977     const VarDecl *VD = Pair.second.PrivateCopy;
3978     const Expr *Init = VD->getAnyInitializer();
3979     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3980                                     !CGF.isTrivialInitializer(Init));
3981     if (InitRequired)
3982       break;
3983   }
3984   return InitRequired;
3985 }
3986 
3987 
3988 /// Emit task_dup function (for initialization of
3989 /// private/firstprivate/lastprivate vars and last_iter flag)
3990 /// \code
3991 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3992 /// lastpriv) {
3993 /// // setup lastprivate flag
3994 ///    task_dst->last = lastpriv;
3995 /// // could be constructor calls here...
3996 /// }
3997 /// \endcode
3998 static llvm::Value *
3999 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4000                     const OMPExecutableDirective &D,
4001                     QualType KmpTaskTWithPrivatesPtrQTy,
4002                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4003                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4004                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4005                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4006   ASTContext &C = CGM.getContext();
4007   FunctionArgList Args;
4008   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4009                            KmpTaskTWithPrivatesPtrQTy,
4010                            ImplicitParamDecl::Other);
4011   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4012                            KmpTaskTWithPrivatesPtrQTy,
4013                            ImplicitParamDecl::Other);
4014   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4015                                 ImplicitParamDecl::Other);
4016   Args.push_back(&DstArg);
4017   Args.push_back(&SrcArg);
4018   Args.push_back(&LastprivArg);
4019   const auto &TaskDupFnInfo =
4020       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4021   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4022   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4023   auto *TaskDup = llvm::Function::Create(
4024       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4025   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4026   TaskDup->setDoesNotRecurse();
4027   CodeGenFunction CGF(CGM);
4028   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4029                     Loc);
4030 
4031   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4032       CGF.GetAddrOfLocalVar(&DstArg),
4033       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4034   // task_dst->liter = lastpriv;
4035   if (WithLastIter) {
4036     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4037     LValue Base = CGF.EmitLValueForField(
4038         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4039     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4040     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4041         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4042     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4043   }
4044 
4045   // Emit initial values for private copies (if any).
4046   assert(!Privates.empty());
4047   Address KmpTaskSharedsPtr = Address::invalid();
4048   if (!Data.FirstprivateVars.empty()) {
4049     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4050         CGF.GetAddrOfLocalVar(&SrcArg),
4051         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4052     LValue Base = CGF.EmitLValueForField(
4053         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4054     KmpTaskSharedsPtr = Address::deprecated(
4055         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4056                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4057                                                   KmpTaskTShareds)),
4058                              Loc),
4059         CGM.getNaturalTypeAlignment(SharedsTy));
4060   }
4061   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4062                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4063   CGF.FinishFunction();
4064   return TaskDup;
4065 }
4066 
4067 /// Checks if destructor function is required to be generated.
4068 /// \return true if cleanups are required, false otherwise.
4069 static bool
4070 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4071                          ArrayRef<PrivateDataTy> Privates) {
4072   for (const PrivateDataTy &P : Privates) {
4073     if (P.second.isLocalPrivate())
4074       continue;
4075     QualType Ty = P.second.Original->getType().getNonReferenceType();
4076     if (Ty.isDestructedType())
4077       return true;
4078   }
4079   return false;
4080 }
4081 
4082 namespace {
4083 /// Loop generator for OpenMP iterator expression.
4084 class OMPIteratorGeneratorScope final
4085     : public CodeGenFunction::OMPPrivateScope {
4086   CodeGenFunction &CGF;
4087   const OMPIteratorExpr *E = nullptr;
4088   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4089   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4090   OMPIteratorGeneratorScope() = delete;
4091   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4092 
4093 public:
4094   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4095       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4096     if (!E)
4097       return;
4098     SmallVector<llvm::Value *, 4> Uppers;
4099     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4100       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4101       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4102       addPrivate(VD, [&CGF, VD]() {
4103         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4104       });
4105       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4106       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4107         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4108                                  "counter.addr");
4109       });
4110     }
4111     Privatize();
4112 
4113     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4114       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4115       LValue CLVal =
4116           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4117                              HelperData.CounterVD->getType());
4118       // Counter = 0;
4119       CGF.EmitStoreOfScalar(
4120           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4121           CLVal);
4122       CodeGenFunction::JumpDest &ContDest =
4123           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4124       CodeGenFunction::JumpDest &ExitDest =
4125           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4126       // N = <number-of_iterations>;
4127       llvm::Value *N = Uppers[I];
4128       // cont:
4129       // if (Counter < N) goto body; else goto exit;
4130       CGF.EmitBlock(ContDest.getBlock());
4131       auto *CVal =
4132           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4133       llvm::Value *Cmp =
4134           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4135               ? CGF.Builder.CreateICmpSLT(CVal, N)
4136               : CGF.Builder.CreateICmpULT(CVal, N);
4137       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4138       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4139       // body:
4140       CGF.EmitBlock(BodyBB);
4141       // Iteri = Begini + Counter * Stepi;
4142       CGF.EmitIgnoredExpr(HelperData.Update);
4143     }
4144   }
4145   ~OMPIteratorGeneratorScope() {
4146     if (!E)
4147       return;
4148     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4149       // Counter = Counter + 1;
4150       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4151       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4152       // goto cont;
4153       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4154       // exit:
4155       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4156     }
4157   }
4158 };
4159 } // namespace
4160 
4161 static std::pair<llvm::Value *, llvm::Value *>
4162 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4163   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4164   llvm::Value *Addr;
4165   if (OASE) {
4166     const Expr *Base = OASE->getBase();
4167     Addr = CGF.EmitScalarExpr(Base);
4168   } else {
4169     Addr = CGF.EmitLValue(E).getPointer(CGF);
4170   }
4171   llvm::Value *SizeVal;
4172   QualType Ty = E->getType();
4173   if (OASE) {
4174     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4175     for (const Expr *SE : OASE->getDimensions()) {
4176       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4177       Sz = CGF.EmitScalarConversion(
4178           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4179       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4180     }
4181   } else if (const auto *ASE =
4182                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4183     LValue UpAddrLVal =
4184         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4185     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4186     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4187         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4188     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4189     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4190     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4191   } else {
4192     SizeVal = CGF.getTypeSize(Ty);
4193   }
4194   return std::make_pair(Addr, SizeVal);
4195 }
4196 
4197 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4198 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4199   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4200   if (KmpTaskAffinityInfoTy.isNull()) {
4201     RecordDecl *KmpAffinityInfoRD =
4202         C.buildImplicitRecord("kmp_task_affinity_info_t");
4203     KmpAffinityInfoRD->startDefinition();
4204     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4205     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4206     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4207     KmpAffinityInfoRD->completeDefinition();
4208     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4209   }
4210 }
4211 
4212 CGOpenMPRuntime::TaskResultTy
4213 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4214                               const OMPExecutableDirective &D,
4215                               llvm::Function *TaskFunction, QualType SharedsTy,
4216                               Address Shareds, const OMPTaskDataTy &Data) {
4217   ASTContext &C = CGM.getContext();
4218   llvm::SmallVector<PrivateDataTy, 4> Privates;
4219   // Aggregate privates and sort them by the alignment.
4220   const auto *I = Data.PrivateCopies.begin();
4221   for (const Expr *E : Data.PrivateVars) {
4222     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4223     Privates.emplace_back(
4224         C.getDeclAlign(VD),
4225         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4226                          /*PrivateElemInit=*/nullptr));
4227     ++I;
4228   }
4229   I = Data.FirstprivateCopies.begin();
4230   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4231   for (const Expr *E : Data.FirstprivateVars) {
4232     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4233     Privates.emplace_back(
4234         C.getDeclAlign(VD),
4235         PrivateHelpersTy(
4236             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4237             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4238     ++I;
4239     ++IElemInitRef;
4240   }
4241   I = Data.LastprivateCopies.begin();
4242   for (const Expr *E : Data.LastprivateVars) {
4243     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4244     Privates.emplace_back(
4245         C.getDeclAlign(VD),
4246         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4247                          /*PrivateElemInit=*/nullptr));
4248     ++I;
4249   }
4250   for (const VarDecl *VD : Data.PrivateLocals) {
4251     if (isAllocatableDecl(VD))
4252       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4253     else
4254       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4255   }
4256   llvm::stable_sort(Privates,
4257                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4258                       return L.first > R.first;
4259                     });
4260   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4261   // Build type kmp_routine_entry_t (if not built yet).
4262   emitKmpRoutineEntryT(KmpInt32Ty);
4263   // Build type kmp_task_t (if not built yet).
4264   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4265     if (SavedKmpTaskloopTQTy.isNull()) {
4266       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4267           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4268     }
4269     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4270   } else {
4271     assert((D.getDirectiveKind() == OMPD_task ||
4272             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4273             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4274            "Expected taskloop, task or target directive");
4275     if (SavedKmpTaskTQTy.isNull()) {
4276       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4277           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4278     }
4279     KmpTaskTQTy = SavedKmpTaskTQTy;
4280   }
4281   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4282   // Build particular struct kmp_task_t for the given task.
4283   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4284       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4285   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4286   QualType KmpTaskTWithPrivatesPtrQTy =
4287       C.getPointerType(KmpTaskTWithPrivatesQTy);
4288   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4289   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4290       KmpTaskTWithPrivatesTy->getPointerTo();
4291   llvm::Value *KmpTaskTWithPrivatesTySize =
4292       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4293   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4294 
4295   // Emit initial values for private copies (if any).
4296   llvm::Value *TaskPrivatesMap = nullptr;
4297   llvm::Type *TaskPrivatesMapTy =
4298       std::next(TaskFunction->arg_begin(), 3)->getType();
4299   if (!Privates.empty()) {
4300     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4301     TaskPrivatesMap =
4302         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4303     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4304         TaskPrivatesMap, TaskPrivatesMapTy);
4305   } else {
4306     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4307         cast<llvm::PointerType>(TaskPrivatesMapTy));
4308   }
4309   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4310   // kmp_task_t *tt);
4311   llvm::Function *TaskEntry = emitProxyTaskFunction(
4312       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4313       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4314       TaskPrivatesMap);
4315 
4316   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4317   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4318   // kmp_routine_entry_t *task_entry);
4319   // Task flags. Format is taken from
4320   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4321   // description of kmp_tasking_flags struct.
4322   enum {
4323     TiedFlag = 0x1,
4324     FinalFlag = 0x2,
4325     DestructorsFlag = 0x8,
4326     PriorityFlag = 0x20,
4327     DetachableFlag = 0x40,
4328   };
4329   unsigned Flags = Data.Tied ? TiedFlag : 0;
4330   bool NeedsCleanup = false;
4331   if (!Privates.empty()) {
4332     NeedsCleanup =
4333         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4334     if (NeedsCleanup)
4335       Flags = Flags | DestructorsFlag;
4336   }
4337   if (Data.Priority.getInt())
4338     Flags = Flags | PriorityFlag;
4339   if (D.hasClausesOfKind<OMPDetachClause>())
4340     Flags = Flags | DetachableFlag;
4341   llvm::Value *TaskFlags =
4342       Data.Final.getPointer()
4343           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4344                                      CGF.Builder.getInt32(FinalFlag),
4345                                      CGF.Builder.getInt32(/*C=*/0))
4346           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4347   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4348   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4349   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4350       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4351       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4352           TaskEntry, KmpRoutineEntryPtrTy)};
4353   llvm::Value *NewTask;
4354   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4355     // Check if we have any device clause associated with the directive.
4356     const Expr *Device = nullptr;
4357     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4358       Device = C->getDevice();
4359     // Emit device ID if any otherwise use default value.
4360     llvm::Value *DeviceID;
4361     if (Device)
4362       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4363                                            CGF.Int64Ty, /*isSigned=*/true);
4364     else
4365       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4366     AllocArgs.push_back(DeviceID);
4367     NewTask = CGF.EmitRuntimeCall(
4368         OMPBuilder.getOrCreateRuntimeFunction(
4369             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4370         AllocArgs);
4371   } else {
4372     NewTask =
4373         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4374                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4375                             AllocArgs);
4376   }
4377   // Emit detach clause initialization.
4378   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4379   // task_descriptor);
4380   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4381     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4382     LValue EvtLVal = CGF.EmitLValue(Evt);
4383 
4384     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4385     // int gtid, kmp_task_t *task);
4386     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4387     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4388     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4389     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4390         OMPBuilder.getOrCreateRuntimeFunction(
4391             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4392         {Loc, Tid, NewTask});
4393     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4394                                       Evt->getExprLoc());
4395     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4396   }
4397   // Process affinity clauses.
4398   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4399     // Process list of affinity data.
4400     ASTContext &C = CGM.getContext();
4401     Address AffinitiesArray = Address::invalid();
4402     // Calculate number of elements to form the array of affinity data.
4403     llvm::Value *NumOfElements = nullptr;
4404     unsigned NumAffinities = 0;
4405     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4406       if (const Expr *Modifier = C->getModifier()) {
4407         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4408         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4409           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4410           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4411           NumOfElements =
4412               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4413         }
4414       } else {
4415         NumAffinities += C->varlist_size();
4416       }
4417     }
4418     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4419     // Fields ids in kmp_task_affinity_info record.
4420     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4421 
4422     QualType KmpTaskAffinityInfoArrayTy;
4423     if (NumOfElements) {
4424       NumOfElements = CGF.Builder.CreateNUWAdd(
4425           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4426       auto *OVE = new (C) OpaqueValueExpr(
4427           Loc,
4428           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4429           VK_PRValue);
4430       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4431                                                     RValue::get(NumOfElements));
4432       KmpTaskAffinityInfoArrayTy =
4433           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4434                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4435       // Properly emit variable-sized array.
4436       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4437                                            ImplicitParamDecl::Other);
4438       CGF.EmitVarDecl(*PD);
4439       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4440       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4441                                                 /*isSigned=*/false);
4442     } else {
4443       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4444           KmpTaskAffinityInfoTy,
4445           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4446           ArrayType::Normal, /*IndexTypeQuals=*/0);
4447       AffinitiesArray =
4448           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4449       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4450       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4451                                              /*isSigned=*/false);
4452     }
4453 
4454     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4455     // Fill array by elements without iterators.
4456     unsigned Pos = 0;
4457     bool HasIterator = false;
4458     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4459       if (C->getModifier()) {
4460         HasIterator = true;
4461         continue;
4462       }
4463       for (const Expr *E : C->varlists()) {
4464         llvm::Value *Addr;
4465         llvm::Value *Size;
4466         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4467         LValue Base =
4468             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4469                                KmpTaskAffinityInfoTy);
4470         // affs[i].base_addr = &<Affinities[i].second>;
4471         LValue BaseAddrLVal = CGF.EmitLValueForField(
4472             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4473         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4474                               BaseAddrLVal);
4475         // affs[i].len = sizeof(<Affinities[i].second>);
4476         LValue LenLVal = CGF.EmitLValueForField(
4477             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4478         CGF.EmitStoreOfScalar(Size, LenLVal);
4479         ++Pos;
4480       }
4481     }
4482     LValue PosLVal;
4483     if (HasIterator) {
4484       PosLVal = CGF.MakeAddrLValue(
4485           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4486           C.getSizeType());
4487       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4488     }
4489     // Process elements with iterators.
4490     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4491       const Expr *Modifier = C->getModifier();
4492       if (!Modifier)
4493         continue;
4494       OMPIteratorGeneratorScope IteratorScope(
4495           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4496       for (const Expr *E : C->varlists()) {
4497         llvm::Value *Addr;
4498         llvm::Value *Size;
4499         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4500         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4501         LValue Base = CGF.MakeAddrLValue(
4502             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4503         // affs[i].base_addr = &<Affinities[i].second>;
4504         LValue BaseAddrLVal = CGF.EmitLValueForField(
4505             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4506         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4507                               BaseAddrLVal);
4508         // affs[i].len = sizeof(<Affinities[i].second>);
4509         LValue LenLVal = CGF.EmitLValueForField(
4510             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4511         CGF.EmitStoreOfScalar(Size, LenLVal);
4512         Idx = CGF.Builder.CreateNUWAdd(
4513             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4514         CGF.EmitStoreOfScalar(Idx, PosLVal);
4515       }
4516     }
4517     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4518     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4519     // naffins, kmp_task_affinity_info_t *affin_list);
4520     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4521     llvm::Value *GTid = getThreadID(CGF, Loc);
4522     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4523         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4524     // FIXME: Emit the function and ignore its result for now unless the
4525     // runtime function is properly implemented.
4526     (void)CGF.EmitRuntimeCall(
4527         OMPBuilder.getOrCreateRuntimeFunction(
4528             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4529         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4530   }
4531   llvm::Value *NewTaskNewTaskTTy =
4532       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4533           NewTask, KmpTaskTWithPrivatesPtrTy);
4534   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4535                                                KmpTaskTWithPrivatesQTy);
4536   LValue TDBase =
4537       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4538   // Fill the data in the resulting kmp_task_t record.
4539   // Copy shareds if there are any.
4540   Address KmpTaskSharedsPtr = Address::invalid();
4541   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4542     KmpTaskSharedsPtr = Address::deprecated(
4543         CGF.EmitLoadOfScalar(
4544             CGF.EmitLValueForField(
4545                 TDBase,
4546                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4547             Loc),
4548         CGM.getNaturalTypeAlignment(SharedsTy));
4549     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4550     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4551     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4552   }
4553   // Emit initial values for private copies (if any).
4554   TaskResultTy Result;
4555   if (!Privates.empty()) {
4556     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4557                      SharedsTy, SharedsPtrTy, Data, Privates,
4558                      /*ForDup=*/false);
4559     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4560         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4561       Result.TaskDupFn = emitTaskDupFunction(
4562           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4563           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4564           /*WithLastIter=*/!Data.LastprivateVars.empty());
4565     }
4566   }
4567   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4568   enum { Priority = 0, Destructors = 1 };
4569   // Provide pointer to function with destructors for privates.
4570   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4571   const RecordDecl *KmpCmplrdataUD =
4572       (*FI)->getType()->getAsUnionType()->getDecl();
4573   if (NeedsCleanup) {
4574     llvm::Value *DestructorFn = emitDestructorsFunction(
4575         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4576         KmpTaskTWithPrivatesQTy);
4577     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4578     LValue DestructorsLV = CGF.EmitLValueForField(
4579         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4580     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4581                               DestructorFn, KmpRoutineEntryPtrTy),
4582                           DestructorsLV);
4583   }
4584   // Set priority.
4585   if (Data.Priority.getInt()) {
4586     LValue Data2LV = CGF.EmitLValueForField(
4587         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4588     LValue PriorityLV = CGF.EmitLValueForField(
4589         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4590     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4591   }
4592   Result.NewTask = NewTask;
4593   Result.TaskEntry = TaskEntry;
4594   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4595   Result.TDBase = TDBase;
4596   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4597   return Result;
4598 }
4599 
4600 namespace {
4601 /// Dependence kind for RTL.
4602 enum RTLDependenceKindTy {
4603   DepIn = 0x01,
4604   DepInOut = 0x3,
4605   DepMutexInOutSet = 0x4,
4606   DepInOutSet = 0x8
4607 };
4608 /// Fields ids in kmp_depend_info record.
4609 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4610 } // namespace
4611 
4612 /// Translates internal dependency kind into the runtime kind.
4613 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4614   RTLDependenceKindTy DepKind;
4615   switch (K) {
4616   case OMPC_DEPEND_in:
4617     DepKind = DepIn;
4618     break;
4619   // Out and InOut dependencies must use the same code.
4620   case OMPC_DEPEND_out:
4621   case OMPC_DEPEND_inout:
4622     DepKind = DepInOut;
4623     break;
4624   case OMPC_DEPEND_mutexinoutset:
4625     DepKind = DepMutexInOutSet;
4626     break;
4627   case OMPC_DEPEND_inoutset:
4628     DepKind = DepInOutSet;
4629     break;
4630   case OMPC_DEPEND_source:
4631   case OMPC_DEPEND_sink:
4632   case OMPC_DEPEND_depobj:
4633   case OMPC_DEPEND_unknown:
4634     llvm_unreachable("Unknown task dependence type");
4635   }
4636   return DepKind;
4637 }
4638 
4639 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4640 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4641                            QualType &FlagsTy) {
4642   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4643   if (KmpDependInfoTy.isNull()) {
4644     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4645     KmpDependInfoRD->startDefinition();
4646     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4647     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4648     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4649     KmpDependInfoRD->completeDefinition();
4650     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4651   }
4652 }
4653 
4654 std::pair<llvm::Value *, LValue>
4655 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4656                                    SourceLocation Loc) {
4657   ASTContext &C = CGM.getContext();
4658   QualType FlagsTy;
4659   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4660   RecordDecl *KmpDependInfoRD =
4661       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4662   LValue Base = CGF.EmitLoadOfPointerLValue(
4663       DepobjLVal.getAddress(CGF),
4664       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4665   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4666   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4667       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4668       CGF.ConvertTypeForMem(KmpDependInfoTy));
4669   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4670                             Base.getTBAAInfo());
4671   Address DepObjAddr = CGF.Builder.CreateGEP(
4672       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4673   LValue NumDepsBase = CGF.MakeAddrLValue(
4674       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4675   // NumDeps = deps[i].base_addr;
4676   LValue BaseAddrLVal = CGF.EmitLValueForField(
4677       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4678   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4679   return std::make_pair(NumDeps, Base);
4680 }
4681 
4682 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4683                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4684                            const OMPTaskDataTy::DependData &Data,
4685                            Address DependenciesArray) {
4686   CodeGenModule &CGM = CGF.CGM;
4687   ASTContext &C = CGM.getContext();
4688   QualType FlagsTy;
4689   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4690   RecordDecl *KmpDependInfoRD =
4691       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4692   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4693 
4694   OMPIteratorGeneratorScope IteratorScope(
4695       CGF, cast_or_null<OMPIteratorExpr>(
4696                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4697                                  : nullptr));
4698   for (const Expr *E : Data.DepExprs) {
4699     llvm::Value *Addr;
4700     llvm::Value *Size;
4701     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4702     LValue Base;
4703     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4704       Base = CGF.MakeAddrLValue(
4705           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4706     } else {
4707       LValue &PosLVal = *Pos.get<LValue *>();
4708       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4709       Base = CGF.MakeAddrLValue(
4710           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4711     }
4712     // deps[i].base_addr = &<Dependencies[i].second>;
4713     LValue BaseAddrLVal = CGF.EmitLValueForField(
4714         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4715     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4716                           BaseAddrLVal);
4717     // deps[i].len = sizeof(<Dependencies[i].second>);
4718     LValue LenLVal = CGF.EmitLValueForField(
4719         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4720     CGF.EmitStoreOfScalar(Size, LenLVal);
4721     // deps[i].flags = <Dependencies[i].first>;
4722     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4723     LValue FlagsLVal = CGF.EmitLValueForField(
4724         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4725     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4726                           FlagsLVal);
4727     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4728       ++(*P);
4729     } else {
4730       LValue &PosLVal = *Pos.get<LValue *>();
4731       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4732       Idx = CGF.Builder.CreateNUWAdd(Idx,
4733                                      llvm::ConstantInt::get(Idx->getType(), 1));
4734       CGF.EmitStoreOfScalar(Idx, PosLVal);
4735     }
4736   }
4737 }
4738 
4739 static SmallVector<llvm::Value *, 4>
4740 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4741                         const OMPTaskDataTy::DependData &Data) {
4742   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4743          "Expected depobj dependecy kind.");
4744   SmallVector<llvm::Value *, 4> Sizes;
4745   SmallVector<LValue, 4> SizeLVals;
4746   ASTContext &C = CGF.getContext();
4747   QualType FlagsTy;
4748   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4749   RecordDecl *KmpDependInfoRD =
4750       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4751   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4752   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4753   {
4754     OMPIteratorGeneratorScope IteratorScope(
4755         CGF, cast_or_null<OMPIteratorExpr>(
4756                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4757                                    : nullptr));
4758     for (const Expr *E : Data.DepExprs) {
4759       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4760       LValue Base = CGF.EmitLoadOfPointerLValue(
4761           DepobjLVal.getAddress(CGF),
4762           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4763       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4764           Base.getAddress(CGF), KmpDependInfoPtrT,
4765           CGF.ConvertTypeForMem(KmpDependInfoTy));
4766       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4767                                 Base.getTBAAInfo());
4768       Address DepObjAddr = CGF.Builder.CreateGEP(
4769           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4770       LValue NumDepsBase = CGF.MakeAddrLValue(
4771           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4772       // NumDeps = deps[i].base_addr;
4773       LValue BaseAddrLVal = CGF.EmitLValueForField(
4774           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4775       llvm::Value *NumDeps =
4776           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4777       LValue NumLVal = CGF.MakeAddrLValue(
4778           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4779           C.getUIntPtrType());
4780       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4781                               NumLVal.getAddress(CGF));
4782       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4783       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4784       CGF.EmitStoreOfScalar(Add, NumLVal);
4785       SizeLVals.push_back(NumLVal);
4786     }
4787   }
4788   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4789     llvm::Value *Size =
4790         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4791     Sizes.push_back(Size);
4792   }
4793   return Sizes;
4794 }
4795 
4796 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4797                                LValue PosLVal,
4798                                const OMPTaskDataTy::DependData &Data,
4799                                Address DependenciesArray) {
4800   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4801          "Expected depobj dependecy kind.");
4802   ASTContext &C = CGF.getContext();
4803   QualType FlagsTy;
4804   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4805   RecordDecl *KmpDependInfoRD =
4806       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4807   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4808   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4809   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4810   {
4811     OMPIteratorGeneratorScope IteratorScope(
4812         CGF, cast_or_null<OMPIteratorExpr>(
4813                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4814                                    : nullptr));
4815     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4816       const Expr *E = Data.DepExprs[I];
4817       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4818       LValue Base = CGF.EmitLoadOfPointerLValue(
4819           DepobjLVal.getAddress(CGF),
4820           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4821       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4822           Base.getAddress(CGF), KmpDependInfoPtrT,
4823           CGF.ConvertTypeForMem(KmpDependInfoTy));
4824       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4825                                 Base.getTBAAInfo());
4826 
4827       // Get number of elements in a single depobj.
4828       Address DepObjAddr = CGF.Builder.CreateGEP(
4829           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4830       LValue NumDepsBase = CGF.MakeAddrLValue(
4831           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4832       // NumDeps = deps[i].base_addr;
4833       LValue BaseAddrLVal = CGF.EmitLValueForField(
4834           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4835       llvm::Value *NumDeps =
4836           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4837 
4838       // memcopy dependency data.
4839       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4840           ElSize,
4841           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4842       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4843       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4844       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4845 
4846       // Increase pos.
4847       // pos += size;
4848       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4849       CGF.EmitStoreOfScalar(Add, PosLVal);
4850     }
4851   }
4852 }
4853 
4854 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4855     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4856     SourceLocation Loc) {
4857   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4858         return D.DepExprs.empty();
4859       }))
4860     return std::make_pair(nullptr, Address::invalid());
4861   // Process list of dependencies.
4862   ASTContext &C = CGM.getContext();
4863   Address DependenciesArray = Address::invalid();
4864   llvm::Value *NumOfElements = nullptr;
4865   unsigned NumDependencies = std::accumulate(
4866       Dependencies.begin(), Dependencies.end(), 0,
4867       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4868         return D.DepKind == OMPC_DEPEND_depobj
4869                    ? V
4870                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4871       });
4872   QualType FlagsTy;
4873   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4874   bool HasDepobjDeps = false;
4875   bool HasRegularWithIterators = false;
4876   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4877   llvm::Value *NumOfRegularWithIterators =
4878       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4879   // Calculate number of depobj dependecies and regular deps with the iterators.
4880   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4881     if (D.DepKind == OMPC_DEPEND_depobj) {
4882       SmallVector<llvm::Value *, 4> Sizes =
4883           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4884       for (llvm::Value *Size : Sizes) {
4885         NumOfDepobjElements =
4886             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4887       }
4888       HasDepobjDeps = true;
4889       continue;
4890     }
4891     // Include number of iterations, if any.
4892 
4893     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4894       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4895         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4896         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4897         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4898             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4899         NumOfRegularWithIterators =
4900             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4901       }
4902       HasRegularWithIterators = true;
4903       continue;
4904     }
4905   }
4906 
4907   QualType KmpDependInfoArrayTy;
4908   if (HasDepobjDeps || HasRegularWithIterators) {
4909     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4910                                            /*isSigned=*/false);
4911     if (HasDepobjDeps) {
4912       NumOfElements =
4913           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4914     }
4915     if (HasRegularWithIterators) {
4916       NumOfElements =
4917           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4918     }
4919     auto *OVE = new (C) OpaqueValueExpr(
4920         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4921         VK_PRValue);
4922     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4923                                                   RValue::get(NumOfElements));
4924     KmpDependInfoArrayTy =
4925         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4926                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4927     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4928     // Properly emit variable-sized array.
4929     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4930                                          ImplicitParamDecl::Other);
4931     CGF.EmitVarDecl(*PD);
4932     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4933     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4934                                               /*isSigned=*/false);
4935   } else {
4936     KmpDependInfoArrayTy = C.getConstantArrayType(
4937         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4938         ArrayType::Normal, /*IndexTypeQuals=*/0);
4939     DependenciesArray =
4940         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4941     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4942     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4943                                            /*isSigned=*/false);
4944   }
4945   unsigned Pos = 0;
4946   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4947     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4948         Dependencies[I].IteratorExpr)
4949       continue;
4950     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4951                    DependenciesArray);
4952   }
4953   // Copy regular dependecies with iterators.
4954   LValue PosLVal = CGF.MakeAddrLValue(
4955       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4956   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4957   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4958     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4959         !Dependencies[I].IteratorExpr)
4960       continue;
4961     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4962                    DependenciesArray);
4963   }
4964   // Copy final depobj arrays without iterators.
4965   if (HasDepobjDeps) {
4966     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4967       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4968         continue;
4969       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4970                          DependenciesArray);
4971     }
4972   }
4973   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4974       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4975   return std::make_pair(NumOfElements, DependenciesArray);
4976 }
4977 
4978 Address CGOpenMPRuntime::emitDepobjDependClause(
4979     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4980     SourceLocation Loc) {
4981   if (Dependencies.DepExprs.empty())
4982     return Address::invalid();
4983   // Process list of dependencies.
4984   ASTContext &C = CGM.getContext();
4985   Address DependenciesArray = Address::invalid();
4986   unsigned NumDependencies = Dependencies.DepExprs.size();
4987   QualType FlagsTy;
4988   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4989   RecordDecl *KmpDependInfoRD =
4990       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4991 
4992   llvm::Value *Size;
4993   // Define type kmp_depend_info[<Dependencies.size()>];
4994   // For depobj reserve one extra element to store the number of elements.
4995   // It is required to handle depobj(x) update(in) construct.
4996   // kmp_depend_info[<Dependencies.size()>] deps;
4997   llvm::Value *NumDepsVal;
4998   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4999   if (const auto *IE =
5000           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
5001     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
5002     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
5003       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
5004       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
5005       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
5006     }
5007     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5008                                     NumDepsVal);
5009     CharUnits SizeInBytes =
5010         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5011     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5012     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5013     NumDepsVal =
5014         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5015   } else {
5016     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5017         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5018         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5019     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5020     Size = CGM.getSize(Sz.alignTo(Align));
5021     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5022   }
5023   // Need to allocate on the dynamic memory.
5024   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5025   // Use default allocator.
5026   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5027   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5028 
5029   llvm::Value *Addr =
5030       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5031                               CGM.getModule(), OMPRTL___kmpc_alloc),
5032                           Args, ".dep.arr.addr");
5033   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5034       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5035   DependenciesArray = Address::deprecated(Addr, Align);
5036   // Write number of elements in the first element of array for depobj.
5037   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5038   // deps[i].base_addr = NumDependencies;
5039   LValue BaseAddrLVal = CGF.EmitLValueForField(
5040       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5041   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5042   llvm::PointerUnion<unsigned *, LValue *> Pos;
5043   unsigned Idx = 1;
5044   LValue PosLVal;
5045   if (Dependencies.IteratorExpr) {
5046     PosLVal = CGF.MakeAddrLValue(
5047         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5048         C.getSizeType());
5049     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5050                           /*IsInit=*/true);
5051     Pos = &PosLVal;
5052   } else {
5053     Pos = &Idx;
5054   }
5055   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5056   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5057       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
5058       CGF.Int8Ty);
5059   return DependenciesArray;
5060 }
5061 
5062 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5063                                         SourceLocation Loc) {
5064   ASTContext &C = CGM.getContext();
5065   QualType FlagsTy;
5066   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5067   LValue Base = CGF.EmitLoadOfPointerLValue(
5068       DepobjLVal.getAddress(CGF),
5069       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5070   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5071   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5072       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
5073       CGF.ConvertTypeForMem(KmpDependInfoTy));
5074   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5075       Addr.getElementType(), Addr.getPointer(),
5076       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5077   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5078                                                                CGF.VoidPtrTy);
5079   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5080   // Use default allocator.
5081   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5082   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5083 
5084   // _kmpc_free(gtid, addr, nullptr);
5085   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5086                                 CGM.getModule(), OMPRTL___kmpc_free),
5087                             Args);
5088 }
5089 
5090 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5091                                        OpenMPDependClauseKind NewDepKind,
5092                                        SourceLocation Loc) {
5093   ASTContext &C = CGM.getContext();
5094   QualType FlagsTy;
5095   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5096   RecordDecl *KmpDependInfoRD =
5097       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5098   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5099   llvm::Value *NumDeps;
5100   LValue Base;
5101   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5102 
5103   Address Begin = Base.getAddress(CGF);
5104   // Cast from pointer to array type to pointer to single element.
5105   llvm::Value *End = CGF.Builder.CreateGEP(
5106       Begin.getElementType(), Begin.getPointer(), NumDeps);
5107   // The basic structure here is a while-do loop.
5108   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5109   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5110   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5111   CGF.EmitBlock(BodyBB);
5112   llvm::PHINode *ElementPHI =
5113       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5114   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5115   Begin = Begin.withPointer(ElementPHI);
5116   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5117                             Base.getTBAAInfo());
5118   // deps[i].flags = NewDepKind;
5119   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5120   LValue FlagsLVal = CGF.EmitLValueForField(
5121       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5122   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5123                         FlagsLVal);
5124 
5125   // Shift the address forward by one element.
5126   Address ElementNext =
5127       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5128   ElementPHI->addIncoming(ElementNext.getPointer(),
5129                           CGF.Builder.GetInsertBlock());
5130   llvm::Value *IsEmpty =
5131       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5132   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5133   // Done.
5134   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5135 }
5136 
5137 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5138                                    const OMPExecutableDirective &D,
5139                                    llvm::Function *TaskFunction,
5140                                    QualType SharedsTy, Address Shareds,
5141                                    const Expr *IfCond,
5142                                    const OMPTaskDataTy &Data) {
5143   if (!CGF.HaveInsertPoint())
5144     return;
5145 
5146   TaskResultTy Result =
5147       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5148   llvm::Value *NewTask = Result.NewTask;
5149   llvm::Function *TaskEntry = Result.TaskEntry;
5150   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5151   LValue TDBase = Result.TDBase;
5152   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5153   // Process list of dependences.
5154   Address DependenciesArray = Address::invalid();
5155   llvm::Value *NumOfElements;
5156   std::tie(NumOfElements, DependenciesArray) =
5157       emitDependClause(CGF, Data.Dependences, Loc);
5158 
5159   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5160   // libcall.
5161   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5162   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5163   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5164   // list is not empty
5165   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5166   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5167   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5168   llvm::Value *DepTaskArgs[7];
5169   if (!Data.Dependences.empty()) {
5170     DepTaskArgs[0] = UpLoc;
5171     DepTaskArgs[1] = ThreadID;
5172     DepTaskArgs[2] = NewTask;
5173     DepTaskArgs[3] = NumOfElements;
5174     DepTaskArgs[4] = DependenciesArray.getPointer();
5175     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5176     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5177   }
5178   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5179                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5180     if (!Data.Tied) {
5181       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5182       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5183       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5184     }
5185     if (!Data.Dependences.empty()) {
5186       CGF.EmitRuntimeCall(
5187           OMPBuilder.getOrCreateRuntimeFunction(
5188               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5189           DepTaskArgs);
5190     } else {
5191       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5192                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5193                           TaskArgs);
5194     }
5195     // Check if parent region is untied and build return for untied task;
5196     if (auto *Region =
5197             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5198       Region->emitUntiedSwitch(CGF);
5199   };
5200 
5201   llvm::Value *DepWaitTaskArgs[6];
5202   if (!Data.Dependences.empty()) {
5203     DepWaitTaskArgs[0] = UpLoc;
5204     DepWaitTaskArgs[1] = ThreadID;
5205     DepWaitTaskArgs[2] = NumOfElements;
5206     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5207     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5208     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5209   }
5210   auto &M = CGM.getModule();
5211   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5212                         TaskEntry, &Data, &DepWaitTaskArgs,
5213                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5214     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5215     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5216     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5217     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5218     // is specified.
5219     if (!Data.Dependences.empty())
5220       CGF.EmitRuntimeCall(
5221           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5222           DepWaitTaskArgs);
5223     // Call proxy_task_entry(gtid, new_task);
5224     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5225                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5226       Action.Enter(CGF);
5227       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5228       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5229                                                           OutlinedFnArgs);
5230     };
5231 
5232     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5233     // kmp_task_t *new_task);
5234     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5235     // kmp_task_t *new_task);
5236     RegionCodeGenTy RCG(CodeGen);
5237     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5238                               M, OMPRTL___kmpc_omp_task_begin_if0),
5239                           TaskArgs,
5240                           OMPBuilder.getOrCreateRuntimeFunction(
5241                               M, OMPRTL___kmpc_omp_task_complete_if0),
5242                           TaskArgs);
5243     RCG.setAction(Action);
5244     RCG(CGF);
5245   };
5246 
5247   if (IfCond) {
5248     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5249   } else {
5250     RegionCodeGenTy ThenRCG(ThenCodeGen);
5251     ThenRCG(CGF);
5252   }
5253 }
5254 
5255 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5256                                        const OMPLoopDirective &D,
5257                                        llvm::Function *TaskFunction,
5258                                        QualType SharedsTy, Address Shareds,
5259                                        const Expr *IfCond,
5260                                        const OMPTaskDataTy &Data) {
5261   if (!CGF.HaveInsertPoint())
5262     return;
5263   TaskResultTy Result =
5264       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5265   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5266   // libcall.
5267   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5268   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5269   // sched, kmp_uint64 grainsize, void *task_dup);
5270   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5271   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5272   llvm::Value *IfVal;
5273   if (IfCond) {
5274     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5275                                       /*isSigned=*/true);
5276   } else {
5277     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5278   }
5279 
5280   LValue LBLVal = CGF.EmitLValueForField(
5281       Result.TDBase,
5282       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5283   const auto *LBVar =
5284       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5285   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5286                        LBLVal.getQuals(),
5287                        /*IsInitializer=*/true);
5288   LValue UBLVal = CGF.EmitLValueForField(
5289       Result.TDBase,
5290       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5291   const auto *UBVar =
5292       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5293   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5294                        UBLVal.getQuals(),
5295                        /*IsInitializer=*/true);
5296   LValue StLVal = CGF.EmitLValueForField(
5297       Result.TDBase,
5298       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5299   const auto *StVar =
5300       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5301   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5302                        StLVal.getQuals(),
5303                        /*IsInitializer=*/true);
5304   // Store reductions address.
5305   LValue RedLVal = CGF.EmitLValueForField(
5306       Result.TDBase,
5307       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5308   if (Data.Reductions) {
5309     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5310   } else {
5311     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5312                                CGF.getContext().VoidPtrTy);
5313   }
5314   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5315   llvm::Value *TaskArgs[] = {
5316       UpLoc,
5317       ThreadID,
5318       Result.NewTask,
5319       IfVal,
5320       LBLVal.getPointer(CGF),
5321       UBLVal.getPointer(CGF),
5322       CGF.EmitLoadOfScalar(StLVal, Loc),
5323       llvm::ConstantInt::getSigned(
5324           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5325       llvm::ConstantInt::getSigned(
5326           CGF.IntTy, Data.Schedule.getPointer()
5327                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5328                          : NoSchedule),
5329       Data.Schedule.getPointer()
5330           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5331                                       /*isSigned=*/false)
5332           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5333       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5334                              Result.TaskDupFn, CGF.VoidPtrTy)
5335                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5336   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5337                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5338                       TaskArgs);
5339 }
5340 
5341 /// Emit reduction operation for each element of array (required for
5342 /// array sections) LHS op = RHS.
5343 /// \param Type Type of array.
5344 /// \param LHSVar Variable on the left side of the reduction operation
5345 /// (references element of array in original variable).
5346 /// \param RHSVar Variable on the right side of the reduction operation
5347 /// (references element of array in original variable).
5348 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5349 /// RHSVar.
5350 static void EmitOMPAggregateReduction(
5351     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5352     const VarDecl *RHSVar,
5353     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5354                                   const Expr *, const Expr *)> &RedOpGen,
5355     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5356     const Expr *UpExpr = nullptr) {
5357   // Perform element-by-element initialization.
5358   QualType ElementTy;
5359   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5360   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5361 
5362   // Drill down to the base element type on both arrays.
5363   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5364   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5365 
5366   llvm::Value *RHSBegin = RHSAddr.getPointer();
5367   llvm::Value *LHSBegin = LHSAddr.getPointer();
5368   // Cast from pointer to array type to pointer to single element.
5369   llvm::Value *LHSEnd =
5370       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5371   // The basic structure here is a while-do loop.
5372   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5373   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5374   llvm::Value *IsEmpty =
5375       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5376   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5377 
5378   // Enter the loop body, making that address the current address.
5379   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5380   CGF.EmitBlock(BodyBB);
5381 
5382   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5383 
5384   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5385       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5386   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5387   Address RHSElementCurrent = Address::deprecated(
5388       RHSElementPHI,
5389       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5390 
5391   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5392       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5393   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5394   Address LHSElementCurrent = Address::deprecated(
5395       LHSElementPHI,
5396       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5397 
5398   // Emit copy.
5399   CodeGenFunction::OMPPrivateScope Scope(CGF);
5400   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5401   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5402   Scope.Privatize();
5403   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5404   Scope.ForceCleanup();
5405 
5406   // Shift the address forward by one element.
5407   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5408       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5409       "omp.arraycpy.dest.element");
5410   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5411       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5412       "omp.arraycpy.src.element");
5413   // Check whether we've reached the end.
5414   llvm::Value *Done =
5415       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5416   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5417   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5418   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5419 
5420   // Done.
5421   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5422 }
5423 
5424 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5425 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5426 /// UDR combiner function.
5427 static void emitReductionCombiner(CodeGenFunction &CGF,
5428                                   const Expr *ReductionOp) {
5429   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5430     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5431       if (const auto *DRE =
5432               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5433         if (const auto *DRD =
5434                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5435           std::pair<llvm::Function *, llvm::Function *> Reduction =
5436               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5437           RValue Func = RValue::get(Reduction.first);
5438           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5439           CGF.EmitIgnoredExpr(ReductionOp);
5440           return;
5441         }
5442   CGF.EmitIgnoredExpr(ReductionOp);
5443 }
5444 
5445 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5446     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5447     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5448     ArrayRef<const Expr *> ReductionOps) {
5449   ASTContext &C = CGM.getContext();
5450 
5451   // void reduction_func(void *LHSArg, void *RHSArg);
5452   FunctionArgList Args;
5453   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5454                            ImplicitParamDecl::Other);
5455   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5456                            ImplicitParamDecl::Other);
5457   Args.push_back(&LHSArg);
5458   Args.push_back(&RHSArg);
5459   const auto &CGFI =
5460       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5461   std::string Name = getName({"omp", "reduction", "reduction_func"});
5462   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5463                                     llvm::GlobalValue::InternalLinkage, Name,
5464                                     &CGM.getModule());
5465   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5466   Fn->setDoesNotRecurse();
5467   CodeGenFunction CGF(CGM);
5468   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5469 
5470   // Dst = (void*[n])(LHSArg);
5471   // Src = (void*[n])(RHSArg);
5472   Address LHS = Address::deprecated(
5473       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5474           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
5475       CGF.getPointerAlign());
5476   Address RHS = Address::deprecated(
5477       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5478           CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
5479       CGF.getPointerAlign());
5480 
5481   //  ...
5482   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5483   //  ...
5484   CodeGenFunction::OMPPrivateScope Scope(CGF);
5485   const auto *IPriv = Privates.begin();
5486   unsigned Idx = 0;
5487   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5488     const auto *RHSVar =
5489         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5490     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5491       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5492     });
5493     const auto *LHSVar =
5494         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5495     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5496       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5497     });
5498     QualType PrivTy = (*IPriv)->getType();
5499     if (PrivTy->isVariablyModifiedType()) {
5500       // Get array size and emit VLA type.
5501       ++Idx;
5502       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5503       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5504       const VariableArrayType *VLA =
5505           CGF.getContext().getAsVariableArrayType(PrivTy);
5506       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5507       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5508           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5509       CGF.EmitVariablyModifiedType(PrivTy);
5510     }
5511   }
5512   Scope.Privatize();
5513   IPriv = Privates.begin();
5514   const auto *ILHS = LHSExprs.begin();
5515   const auto *IRHS = RHSExprs.begin();
5516   for (const Expr *E : ReductionOps) {
5517     if ((*IPriv)->getType()->isArrayType()) {
5518       // Emit reduction for array section.
5519       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5520       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5521       EmitOMPAggregateReduction(
5522           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5523           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5524             emitReductionCombiner(CGF, E);
5525           });
5526     } else {
5527       // Emit reduction for array subscript or single variable.
5528       emitReductionCombiner(CGF, E);
5529     }
5530     ++IPriv;
5531     ++ILHS;
5532     ++IRHS;
5533   }
5534   Scope.ForceCleanup();
5535   CGF.FinishFunction();
5536   return Fn;
5537 }
5538 
5539 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5540                                                   const Expr *ReductionOp,
5541                                                   const Expr *PrivateRef,
5542                                                   const DeclRefExpr *LHS,
5543                                                   const DeclRefExpr *RHS) {
5544   if (PrivateRef->getType()->isArrayType()) {
5545     // Emit reduction for array section.
5546     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5547     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5548     EmitOMPAggregateReduction(
5549         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5550         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5551           emitReductionCombiner(CGF, ReductionOp);
5552         });
5553   } else {
5554     // Emit reduction for array subscript or single variable.
5555     emitReductionCombiner(CGF, ReductionOp);
5556   }
5557 }
5558 
5559 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5560                                     ArrayRef<const Expr *> Privates,
5561                                     ArrayRef<const Expr *> LHSExprs,
5562                                     ArrayRef<const Expr *> RHSExprs,
5563                                     ArrayRef<const Expr *> ReductionOps,
5564                                     ReductionOptionsTy Options) {
5565   if (!CGF.HaveInsertPoint())
5566     return;
5567 
5568   bool WithNowait = Options.WithNowait;
5569   bool SimpleReduction = Options.SimpleReduction;
5570 
5571   // Next code should be emitted for reduction:
5572   //
5573   // static kmp_critical_name lock = { 0 };
5574   //
5575   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5576   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5577   //  ...
5578   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5579   //  *(Type<n>-1*)rhs[<n>-1]);
5580   // }
5581   //
5582   // ...
5583   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5584   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5585   // RedList, reduce_func, &<lock>)) {
5586   // case 1:
5587   //  ...
5588   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5589   //  ...
5590   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5591   // break;
5592   // case 2:
5593   //  ...
5594   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5595   //  ...
5596   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5597   // break;
5598   // default:;
5599   // }
5600   //
5601   // if SimpleReduction is true, only the next code is generated:
5602   //  ...
5603   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5604   //  ...
5605 
5606   ASTContext &C = CGM.getContext();
5607 
5608   if (SimpleReduction) {
5609     CodeGenFunction::RunCleanupsScope Scope(CGF);
5610     const auto *IPriv = Privates.begin();
5611     const auto *ILHS = LHSExprs.begin();
5612     const auto *IRHS = RHSExprs.begin();
5613     for (const Expr *E : ReductionOps) {
5614       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5615                                   cast<DeclRefExpr>(*IRHS));
5616       ++IPriv;
5617       ++ILHS;
5618       ++IRHS;
5619     }
5620     return;
5621   }
5622 
5623   // 1. Build a list of reduction variables.
5624   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5625   auto Size = RHSExprs.size();
5626   for (const Expr *E : Privates) {
5627     if (E->getType()->isVariablyModifiedType())
5628       // Reserve place for array size.
5629       ++Size;
5630   }
5631   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5632   QualType ReductionArrayTy =
5633       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5634                              /*IndexTypeQuals=*/0);
5635   Address ReductionList =
5636       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5637   const auto *IPriv = Privates.begin();
5638   unsigned Idx = 0;
5639   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5640     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5641     CGF.Builder.CreateStore(
5642         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5643             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5644         Elem);
5645     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5646       // Store array size.
5647       ++Idx;
5648       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5649       llvm::Value *Size = CGF.Builder.CreateIntCast(
5650           CGF.getVLASize(
5651                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5652               .NumElts,
5653           CGF.SizeTy, /*isSigned=*/false);
5654       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5655                               Elem);
5656     }
5657   }
5658 
5659   // 2. Emit reduce_func().
5660   llvm::Function *ReductionFn = emitReductionFunction(
5661       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5662       LHSExprs, RHSExprs, ReductionOps);
5663 
5664   // 3. Create static kmp_critical_name lock = { 0 };
5665   std::string Name = getName({"reduction"});
5666   llvm::Value *Lock = getCriticalRegionLock(Name);
5667 
5668   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5669   // RedList, reduce_func, &<lock>);
5670   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5671   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5672   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5673   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5674       ReductionList.getPointer(), CGF.VoidPtrTy);
5675   llvm::Value *Args[] = {
5676       IdentTLoc,                             // ident_t *<loc>
5677       ThreadId,                              // i32 <gtid>
5678       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5679       ReductionArrayTySize,                  // size_type sizeof(RedList)
5680       RL,                                    // void *RedList
5681       ReductionFn, // void (*) (void *, void *) <reduce_func>
5682       Lock         // kmp_critical_name *&<lock>
5683   };
5684   llvm::Value *Res = CGF.EmitRuntimeCall(
5685       OMPBuilder.getOrCreateRuntimeFunction(
5686           CGM.getModule(),
5687           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5688       Args);
5689 
5690   // 5. Build switch(res)
5691   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5692   llvm::SwitchInst *SwInst =
5693       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5694 
5695   // 6. Build case 1:
5696   //  ...
5697   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5698   //  ...
5699   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5700   // break;
5701   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5702   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5703   CGF.EmitBlock(Case1BB);
5704 
5705   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5706   llvm::Value *EndArgs[] = {
5707       IdentTLoc, // ident_t *<loc>
5708       ThreadId,  // i32 <gtid>
5709       Lock       // kmp_critical_name *&<lock>
5710   };
5711   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5712                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5713     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5714     const auto *IPriv = Privates.begin();
5715     const auto *ILHS = LHSExprs.begin();
5716     const auto *IRHS = RHSExprs.begin();
5717     for (const Expr *E : ReductionOps) {
5718       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5719                                      cast<DeclRefExpr>(*IRHS));
5720       ++IPriv;
5721       ++ILHS;
5722       ++IRHS;
5723     }
5724   };
5725   RegionCodeGenTy RCG(CodeGen);
5726   CommonActionTy Action(
5727       nullptr, llvm::None,
5728       OMPBuilder.getOrCreateRuntimeFunction(
5729           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5730                                       : OMPRTL___kmpc_end_reduce),
5731       EndArgs);
5732   RCG.setAction(Action);
5733   RCG(CGF);
5734 
5735   CGF.EmitBranch(DefaultBB);
5736 
5737   // 7. Build case 2:
5738   //  ...
5739   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5740   //  ...
5741   // break;
5742   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5743   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5744   CGF.EmitBlock(Case2BB);
5745 
5746   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5747                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5748     const auto *ILHS = LHSExprs.begin();
5749     const auto *IRHS = RHSExprs.begin();
5750     const auto *IPriv = Privates.begin();
5751     for (const Expr *E : ReductionOps) {
5752       const Expr *XExpr = nullptr;
5753       const Expr *EExpr = nullptr;
5754       const Expr *UpExpr = nullptr;
5755       BinaryOperatorKind BO = BO_Comma;
5756       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5757         if (BO->getOpcode() == BO_Assign) {
5758           XExpr = BO->getLHS();
5759           UpExpr = BO->getRHS();
5760         }
5761       }
5762       // Try to emit update expression as a simple atomic.
5763       const Expr *RHSExpr = UpExpr;
5764       if (RHSExpr) {
5765         // Analyze RHS part of the whole expression.
5766         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5767                 RHSExpr->IgnoreParenImpCasts())) {
5768           // If this is a conditional operator, analyze its condition for
5769           // min/max reduction operator.
5770           RHSExpr = ACO->getCond();
5771         }
5772         if (const auto *BORHS =
5773                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5774           EExpr = BORHS->getRHS();
5775           BO = BORHS->getOpcode();
5776         }
5777       }
5778       if (XExpr) {
5779         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5780         auto &&AtomicRedGen = [BO, VD,
5781                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5782                                     const Expr *EExpr, const Expr *UpExpr) {
5783           LValue X = CGF.EmitLValue(XExpr);
5784           RValue E;
5785           if (EExpr)
5786             E = CGF.EmitAnyExpr(EExpr);
5787           CGF.EmitOMPAtomicSimpleUpdateExpr(
5788               X, E, BO, /*IsXLHSInRHSPart=*/true,
5789               llvm::AtomicOrdering::Monotonic, Loc,
5790               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5791                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5792                 PrivateScope.addPrivate(
5793                     VD, [&CGF, VD, XRValue, Loc]() {
5794                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5795                       CGF.emitOMPSimpleStore(
5796                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5797                           VD->getType().getNonReferenceType(), Loc);
5798                       return LHSTemp;
5799                     });
5800                 (void)PrivateScope.Privatize();
5801                 return CGF.EmitAnyExpr(UpExpr);
5802               });
5803         };
5804         if ((*IPriv)->getType()->isArrayType()) {
5805           // Emit atomic reduction for array section.
5806           const auto *RHSVar =
5807               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5808           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5809                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5810         } else {
5811           // Emit atomic reduction for array subscript or single variable.
5812           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5813         }
5814       } else {
5815         // Emit as a critical region.
5816         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5817                                            const Expr *, const Expr *) {
5818           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5819           std::string Name = RT.getName({"atomic_reduction"});
5820           RT.emitCriticalRegion(
5821               CGF, Name,
5822               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5823                 Action.Enter(CGF);
5824                 emitReductionCombiner(CGF, E);
5825               },
5826               Loc);
5827         };
5828         if ((*IPriv)->getType()->isArrayType()) {
5829           const auto *LHSVar =
5830               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5831           const auto *RHSVar =
5832               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5833           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5834                                     CritRedGen);
5835         } else {
5836           CritRedGen(CGF, nullptr, nullptr, nullptr);
5837         }
5838       }
5839       ++ILHS;
5840       ++IRHS;
5841       ++IPriv;
5842     }
5843   };
5844   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5845   if (!WithNowait) {
5846     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5847     llvm::Value *EndArgs[] = {
5848         IdentTLoc, // ident_t *<loc>
5849         ThreadId,  // i32 <gtid>
5850         Lock       // kmp_critical_name *&<lock>
5851     };
5852     CommonActionTy Action(nullptr, llvm::None,
5853                           OMPBuilder.getOrCreateRuntimeFunction(
5854                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5855                           EndArgs);
5856     AtomicRCG.setAction(Action);
5857     AtomicRCG(CGF);
5858   } else {
5859     AtomicRCG(CGF);
5860   }
5861 
5862   CGF.EmitBranch(DefaultBB);
5863   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5864 }
5865 
5866 /// Generates unique name for artificial threadprivate variables.
5867 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5868 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5869                                       const Expr *Ref) {
5870   SmallString<256> Buffer;
5871   llvm::raw_svector_ostream Out(Buffer);
5872   const clang::DeclRefExpr *DE;
5873   const VarDecl *D = ::getBaseDecl(Ref, DE);
5874   if (!D)
5875     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5876   D = D->getCanonicalDecl();
5877   std::string Name = CGM.getOpenMPRuntime().getName(
5878       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5879   Out << Prefix << Name << "_"
5880       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5881   return std::string(Out.str());
5882 }
5883 
5884 /// Emits reduction initializer function:
5885 /// \code
5886 /// void @.red_init(void* %arg, void* %orig) {
5887 /// %0 = bitcast void* %arg to <type>*
5888 /// store <type> <init>, <type>* %0
5889 /// ret void
5890 /// }
5891 /// \endcode
5892 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5893                                            SourceLocation Loc,
5894                                            ReductionCodeGen &RCG, unsigned N) {
5895   ASTContext &C = CGM.getContext();
5896   QualType VoidPtrTy = C.VoidPtrTy;
5897   VoidPtrTy.addRestrict();
5898   FunctionArgList Args;
5899   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5900                           ImplicitParamDecl::Other);
5901   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5902                               ImplicitParamDecl::Other);
5903   Args.emplace_back(&Param);
5904   Args.emplace_back(&ParamOrig);
5905   const auto &FnInfo =
5906       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5907   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5908   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5909   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5910                                     Name, &CGM.getModule());
5911   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5912   Fn->setDoesNotRecurse();
5913   CodeGenFunction CGF(CGM);
5914   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5915   Address PrivateAddr = CGF.EmitLoadOfPointer(
5916       CGF.GetAddrOfLocalVar(&Param),
5917       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5918   llvm::Value *Size = nullptr;
5919   // If the size of the reduction item is non-constant, load it from global
5920   // threadprivate variable.
5921   if (RCG.getSizes(N).second) {
5922     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5923         CGF, CGM.getContext().getSizeType(),
5924         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5925     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5926                                 CGM.getContext().getSizeType(), Loc);
5927   }
5928   RCG.emitAggregateType(CGF, N, Size);
5929   Address OrigAddr = Address::invalid();
5930   // If initializer uses initializer from declare reduction construct, emit a
5931   // pointer to the address of the original reduction item (reuired by reduction
5932   // initializer)
5933   if (RCG.usesReductionInitializer(N)) {
5934     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5935     OrigAddr = CGF.EmitLoadOfPointer(
5936         SharedAddr,
5937         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5938   }
5939   // Emit the initializer:
5940   // %0 = bitcast void* %arg to <type>*
5941   // store <type> <init>, <type>* %0
5942   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5943                          [](CodeGenFunction &) { return false; });
5944   CGF.FinishFunction();
5945   return Fn;
5946 }
5947 
5948 /// Emits reduction combiner function:
5949 /// \code
5950 /// void @.red_comb(void* %arg0, void* %arg1) {
5951 /// %lhs = bitcast void* %arg0 to <type>*
5952 /// %rhs = bitcast void* %arg1 to <type>*
5953 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5954 /// store <type> %2, <type>* %lhs
5955 /// ret void
5956 /// }
5957 /// \endcode
5958 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5959                                            SourceLocation Loc,
5960                                            ReductionCodeGen &RCG, unsigned N,
5961                                            const Expr *ReductionOp,
5962                                            const Expr *LHS, const Expr *RHS,
5963                                            const Expr *PrivateRef) {
5964   ASTContext &C = CGM.getContext();
5965   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5966   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5967   FunctionArgList Args;
5968   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5969                                C.VoidPtrTy, ImplicitParamDecl::Other);
5970   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5971                             ImplicitParamDecl::Other);
5972   Args.emplace_back(&ParamInOut);
5973   Args.emplace_back(&ParamIn);
5974   const auto &FnInfo =
5975       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5976   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5977   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5978   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5979                                     Name, &CGM.getModule());
5980   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5981   Fn->setDoesNotRecurse();
5982   CodeGenFunction CGF(CGM);
5983   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5984   llvm::Value *Size = nullptr;
5985   // If the size of the reduction item is non-constant, load it from global
5986   // threadprivate variable.
5987   if (RCG.getSizes(N).second) {
5988     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5989         CGF, CGM.getContext().getSizeType(),
5990         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5991     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5992                                 CGM.getContext().getSizeType(), Loc);
5993   }
5994   RCG.emitAggregateType(CGF, N, Size);
5995   // Remap lhs and rhs variables to the addresses of the function arguments.
5996   // %lhs = bitcast void* %arg0 to <type>*
5997   // %rhs = bitcast void* %arg1 to <type>*
5998   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5999   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6000     // Pull out the pointer to the variable.
6001     Address PtrAddr = CGF.EmitLoadOfPointer(
6002         CGF.GetAddrOfLocalVar(&ParamInOut),
6003         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6004     return CGF.Builder.CreateElementBitCast(
6005         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6006   });
6007   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6008     // Pull out the pointer to the variable.
6009     Address PtrAddr = CGF.EmitLoadOfPointer(
6010         CGF.GetAddrOfLocalVar(&ParamIn),
6011         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6012     return CGF.Builder.CreateElementBitCast(
6013         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6014   });
6015   PrivateScope.Privatize();
6016   // Emit the combiner body:
6017   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6018   // store <type> %2, <type>* %lhs
6019   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6020       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6021       cast<DeclRefExpr>(RHS));
6022   CGF.FinishFunction();
6023   return Fn;
6024 }
6025 
6026 /// Emits reduction finalizer function:
6027 /// \code
6028 /// void @.red_fini(void* %arg) {
6029 /// %0 = bitcast void* %arg to <type>*
6030 /// <destroy>(<type>* %0)
6031 /// ret void
6032 /// }
6033 /// \endcode
6034 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6035                                            SourceLocation Loc,
6036                                            ReductionCodeGen &RCG, unsigned N) {
6037   if (!RCG.needCleanups(N))
6038     return nullptr;
6039   ASTContext &C = CGM.getContext();
6040   FunctionArgList Args;
6041   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6042                           ImplicitParamDecl::Other);
6043   Args.emplace_back(&Param);
6044   const auto &FnInfo =
6045       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6046   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6047   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6048   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6049                                     Name, &CGM.getModule());
6050   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6051   Fn->setDoesNotRecurse();
6052   CodeGenFunction CGF(CGM);
6053   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6054   Address PrivateAddr = CGF.EmitLoadOfPointer(
6055       CGF.GetAddrOfLocalVar(&Param),
6056       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6057   llvm::Value *Size = nullptr;
6058   // If the size of the reduction item is non-constant, load it from global
6059   // threadprivate variable.
6060   if (RCG.getSizes(N).second) {
6061     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6062         CGF, CGM.getContext().getSizeType(),
6063         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6064     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6065                                 CGM.getContext().getSizeType(), Loc);
6066   }
6067   RCG.emitAggregateType(CGF, N, Size);
6068   // Emit the finalizer body:
6069   // <destroy>(<type>* %0)
6070   RCG.emitCleanups(CGF, N, PrivateAddr);
6071   CGF.FinishFunction(Loc);
6072   return Fn;
6073 }
6074 
6075 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6076     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6077     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6078   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6079     return nullptr;
6080 
6081   // Build typedef struct:
6082   // kmp_taskred_input {
6083   //   void *reduce_shar; // shared reduction item
6084   //   void *reduce_orig; // original reduction item used for initialization
6085   //   size_t reduce_size; // size of data item
6086   //   void *reduce_init; // data initialization routine
6087   //   void *reduce_fini; // data finalization routine
6088   //   void *reduce_comb; // data combiner routine
6089   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6090   // } kmp_taskred_input_t;
6091   ASTContext &C = CGM.getContext();
6092   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6093   RD->startDefinition();
6094   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6095   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6096   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6097   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6098   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6099   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6100   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6101       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6102   RD->completeDefinition();
6103   QualType RDType = C.getRecordType(RD);
6104   unsigned Size = Data.ReductionVars.size();
6105   llvm::APInt ArraySize(/*numBits=*/64, Size);
6106   QualType ArrayRDType = C.getConstantArrayType(
6107       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6108   // kmp_task_red_input_t .rd_input.[Size];
6109   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6110   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6111                        Data.ReductionCopies, Data.ReductionOps);
6112   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6113     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6114     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6115                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6116     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6117         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6118         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6119         ".rd_input.gep.");
6120     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6121     // ElemLVal.reduce_shar = &Shareds[Cnt];
6122     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6123     RCG.emitSharedOrigLValue(CGF, Cnt);
6124     llvm::Value *CastedShared =
6125         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6126     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6127     // ElemLVal.reduce_orig = &Origs[Cnt];
6128     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6129     llvm::Value *CastedOrig =
6130         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6131     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6132     RCG.emitAggregateType(CGF, Cnt);
6133     llvm::Value *SizeValInChars;
6134     llvm::Value *SizeVal;
6135     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6136     // We use delayed creation/initialization for VLAs and array sections. It is
6137     // required because runtime does not provide the way to pass the sizes of
6138     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6139     // threadprivate global variables are used to store these values and use
6140     // them in the functions.
6141     bool DelayedCreation = !!SizeVal;
6142     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6143                                                /*isSigned=*/false);
6144     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6145     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6146     // ElemLVal.reduce_init = init;
6147     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6148     llvm::Value *InitAddr =
6149         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6150     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6151     // ElemLVal.reduce_fini = fini;
6152     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6153     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6154     llvm::Value *FiniAddr = Fini
6155                                 ? CGF.EmitCastToVoidPtr(Fini)
6156                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6157     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6158     // ElemLVal.reduce_comb = comb;
6159     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6160     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6161         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6162         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6163     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6164     // ElemLVal.flags = 0;
6165     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6166     if (DelayedCreation) {
6167       CGF.EmitStoreOfScalar(
6168           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6169           FlagsLVal);
6170     } else
6171       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6172                                  FlagsLVal.getType());
6173   }
6174   if (Data.IsReductionWithTaskMod) {
6175     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6176     // is_ws, int num, void *data);
6177     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6178     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6179                                                   CGM.IntTy, /*isSigned=*/true);
6180     llvm::Value *Args[] = {
6181         IdentTLoc, GTid,
6182         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6183                                /*isSigned=*/true),
6184         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6185         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6186             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6187     return CGF.EmitRuntimeCall(
6188         OMPBuilder.getOrCreateRuntimeFunction(
6189             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6190         Args);
6191   }
6192   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6193   llvm::Value *Args[] = {
6194       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6195                                 /*isSigned=*/true),
6196       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6197       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6198                                                       CGM.VoidPtrTy)};
6199   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6200                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6201                              Args);
6202 }
6203 
6204 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6205                                             SourceLocation Loc,
6206                                             bool IsWorksharingReduction) {
6207   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6208   // is_ws, int num, void *data);
6209   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6210   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6211                                                 CGM.IntTy, /*isSigned=*/true);
6212   llvm::Value *Args[] = {IdentTLoc, GTid,
6213                          llvm::ConstantInt::get(CGM.IntTy,
6214                                                 IsWorksharingReduction ? 1 : 0,
6215                                                 /*isSigned=*/true)};
6216   (void)CGF.EmitRuntimeCall(
6217       OMPBuilder.getOrCreateRuntimeFunction(
6218           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6219       Args);
6220 }
6221 
6222 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6223                                               SourceLocation Loc,
6224                                               ReductionCodeGen &RCG,
6225                                               unsigned N) {
6226   auto Sizes = RCG.getSizes(N);
6227   // Emit threadprivate global variable if the type is non-constant
6228   // (Sizes.second = nullptr).
6229   if (Sizes.second) {
6230     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6231                                                      /*isSigned=*/false);
6232     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6233         CGF, CGM.getContext().getSizeType(),
6234         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6235     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6236   }
6237 }
6238 
6239 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6240                                               SourceLocation Loc,
6241                                               llvm::Value *ReductionsPtr,
6242                                               LValue SharedLVal) {
6243   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6244   // *d);
6245   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6246                                                    CGM.IntTy,
6247                                                    /*isSigned=*/true),
6248                          ReductionsPtr,
6249                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6250                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6251   return Address::deprecated(
6252       CGF.EmitRuntimeCall(
6253           OMPBuilder.getOrCreateRuntimeFunction(
6254               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6255           Args),
6256       SharedLVal.getAlignment());
6257 }
6258 
6259 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6260                                        const OMPTaskDataTy &Data) {
6261   if (!CGF.HaveInsertPoint())
6262     return;
6263 
6264   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6265     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6266     OMPBuilder.createTaskwait(CGF.Builder);
6267   } else {
6268     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6269     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6270     auto &M = CGM.getModule();
6271     Address DependenciesArray = Address::invalid();
6272     llvm::Value *NumOfElements;
6273     std::tie(NumOfElements, DependenciesArray) =
6274         emitDependClause(CGF, Data.Dependences, Loc);
6275     llvm::Value *DepWaitTaskArgs[6];
6276     if (!Data.Dependences.empty()) {
6277       DepWaitTaskArgs[0] = UpLoc;
6278       DepWaitTaskArgs[1] = ThreadID;
6279       DepWaitTaskArgs[2] = NumOfElements;
6280       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6281       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6282       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6283 
6284       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6285 
6286       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6287       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6288       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6289       // is specified.
6290       CGF.EmitRuntimeCall(
6291           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6292           DepWaitTaskArgs);
6293 
6294     } else {
6295 
6296       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6297       // global_tid);
6298       llvm::Value *Args[] = {UpLoc, ThreadID};
6299       // Ignore return result until untied tasks are supported.
6300       CGF.EmitRuntimeCall(
6301           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6302           Args);
6303     }
6304   }
6305 
6306   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6307     Region->emitUntiedSwitch(CGF);
6308 }
6309 
6310 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6311                                            OpenMPDirectiveKind InnerKind,
6312                                            const RegionCodeGenTy &CodeGen,
6313                                            bool HasCancel) {
6314   if (!CGF.HaveInsertPoint())
6315     return;
6316   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6317                                  InnerKind != OMPD_critical &&
6318                                      InnerKind != OMPD_master &&
6319                                      InnerKind != OMPD_masked);
6320   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6321 }
6322 
6323 namespace {
6324 enum RTCancelKind {
6325   CancelNoreq = 0,
6326   CancelParallel = 1,
6327   CancelLoop = 2,
6328   CancelSections = 3,
6329   CancelTaskgroup = 4
6330 };
6331 } // anonymous namespace
6332 
6333 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6334   RTCancelKind CancelKind = CancelNoreq;
6335   if (CancelRegion == OMPD_parallel)
6336     CancelKind = CancelParallel;
6337   else if (CancelRegion == OMPD_for)
6338     CancelKind = CancelLoop;
6339   else if (CancelRegion == OMPD_sections)
6340     CancelKind = CancelSections;
6341   else {
6342     assert(CancelRegion == OMPD_taskgroup);
6343     CancelKind = CancelTaskgroup;
6344   }
6345   return CancelKind;
6346 }
6347 
6348 void CGOpenMPRuntime::emitCancellationPointCall(
6349     CodeGenFunction &CGF, SourceLocation Loc,
6350     OpenMPDirectiveKind CancelRegion) {
6351   if (!CGF.HaveInsertPoint())
6352     return;
6353   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6354   // global_tid, kmp_int32 cncl_kind);
6355   if (auto *OMPRegionInfo =
6356           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6357     // For 'cancellation point taskgroup', the task region info may not have a
6358     // cancel. This may instead happen in another adjacent task.
6359     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6360       llvm::Value *Args[] = {
6361           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6362           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6363       // Ignore return result until untied tasks are supported.
6364       llvm::Value *Result = CGF.EmitRuntimeCall(
6365           OMPBuilder.getOrCreateRuntimeFunction(
6366               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6367           Args);
6368       // if (__kmpc_cancellationpoint()) {
6369       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6370       //   exit from construct;
6371       // }
6372       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6373       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6374       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6375       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6376       CGF.EmitBlock(ExitBB);
6377       if (CancelRegion == OMPD_parallel)
6378         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6379       // exit from construct;
6380       CodeGenFunction::JumpDest CancelDest =
6381           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6382       CGF.EmitBranchThroughCleanup(CancelDest);
6383       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6384     }
6385   }
6386 }
6387 
6388 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6389                                      const Expr *IfCond,
6390                                      OpenMPDirectiveKind CancelRegion) {
6391   if (!CGF.HaveInsertPoint())
6392     return;
6393   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6394   // kmp_int32 cncl_kind);
6395   auto &M = CGM.getModule();
6396   if (auto *OMPRegionInfo =
6397           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6398     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6399                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6400       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6401       llvm::Value *Args[] = {
6402           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6403           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6404       // Ignore return result until untied tasks are supported.
6405       llvm::Value *Result = CGF.EmitRuntimeCall(
6406           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6407       // if (__kmpc_cancel()) {
6408       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6409       //   exit from construct;
6410       // }
6411       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6412       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6413       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6414       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6415       CGF.EmitBlock(ExitBB);
6416       if (CancelRegion == OMPD_parallel)
6417         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6418       // exit from construct;
6419       CodeGenFunction::JumpDest CancelDest =
6420           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6421       CGF.EmitBranchThroughCleanup(CancelDest);
6422       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6423     };
6424     if (IfCond) {
6425       emitIfClause(CGF, IfCond, ThenGen,
6426                    [](CodeGenFunction &, PrePostActionTy &) {});
6427     } else {
6428       RegionCodeGenTy ThenRCG(ThenGen);
6429       ThenRCG(CGF);
6430     }
6431   }
6432 }
6433 
6434 namespace {
6435 /// Cleanup action for uses_allocators support.
6436 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6437   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6438 
6439 public:
6440   OMPUsesAllocatorsActionTy(
6441       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6442       : Allocators(Allocators) {}
6443   void Enter(CodeGenFunction &CGF) override {
6444     if (!CGF.HaveInsertPoint())
6445       return;
6446     for (const auto &AllocatorData : Allocators) {
6447       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6448           CGF, AllocatorData.first, AllocatorData.second);
6449     }
6450   }
6451   void Exit(CodeGenFunction &CGF) override {
6452     if (!CGF.HaveInsertPoint())
6453       return;
6454     for (const auto &AllocatorData : Allocators) {
6455       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6456                                                         AllocatorData.first);
6457     }
6458   }
6459 };
6460 } // namespace
6461 
6462 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6463     const OMPExecutableDirective &D, StringRef ParentName,
6464     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6465     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6466   assert(!ParentName.empty() && "Invalid target region parent name!");
6467   HasEmittedTargetRegion = true;
6468   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6469   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6470     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6471       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6472       if (!D.AllocatorTraits)
6473         continue;
6474       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6475     }
6476   }
6477   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6478   CodeGen.setAction(UsesAllocatorAction);
6479   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6480                                    IsOffloadEntry, CodeGen);
6481 }
6482 
6483 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6484                                              const Expr *Allocator,
6485                                              const Expr *AllocatorTraits) {
6486   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6487   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6488   // Use default memspace handle.
6489   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6490   llvm::Value *NumTraits = llvm::ConstantInt::get(
6491       CGF.IntTy, cast<ConstantArrayType>(
6492                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6493                      ->getSize()
6494                      .getLimitedValue());
6495   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6496   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6497       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6498   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6499                                            AllocatorTraitsLVal.getBaseInfo(),
6500                                            AllocatorTraitsLVal.getTBAAInfo());
6501   llvm::Value *Traits =
6502       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6503 
6504   llvm::Value *AllocatorVal =
6505       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6506                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6507                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6508   // Store to allocator.
6509   CGF.EmitVarDecl(*cast<VarDecl>(
6510       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6511   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6512   AllocatorVal =
6513       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6514                                Allocator->getType(), Allocator->getExprLoc());
6515   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6516 }
6517 
6518 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6519                                              const Expr *Allocator) {
6520   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6521   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6522   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6523   llvm::Value *AllocatorVal =
6524       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6525   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6526                                           CGF.getContext().VoidPtrTy,
6527                                           Allocator->getExprLoc());
6528   (void)CGF.EmitRuntimeCall(
6529       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6530                                             OMPRTL___kmpc_destroy_allocator),
6531       {ThreadId, AllocatorVal});
6532 }
6533 
6534 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6535     const OMPExecutableDirective &D, StringRef ParentName,
6536     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6537     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6538   // Create a unique name for the entry function using the source location
6539   // information of the current target region. The name will be something like:
6540   //
6541   // __omp_offloading_DD_FFFF_PP_lBB
6542   //
6543   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6544   // mangled name of the function that encloses the target region and BB is the
6545   // line number of the target region.
6546 
6547   const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6548                                !CGM.getLangOpts().OpenMPOffloadMandatory;
6549   unsigned DeviceID;
6550   unsigned FileID;
6551   unsigned Line;
6552   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6553                            Line);
6554   SmallString<64> EntryFnName;
6555   {
6556     llvm::raw_svector_ostream OS(EntryFnName);
6557     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6558        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6559   }
6560 
6561   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6562 
6563   CodeGenFunction CGF(CGM, true);
6564   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6565   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6566 
6567   if (BuildOutlinedFn)
6568     OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6569 
6570   // If this target outline function is not an offload entry, we don't need to
6571   // register it.
6572   if (!IsOffloadEntry)
6573     return;
6574 
6575   // The target region ID is used by the runtime library to identify the current
6576   // target region, so it only has to be unique and not necessarily point to
6577   // anything. It could be the pointer to the outlined function that implements
6578   // the target region, but we aren't using that so that the compiler doesn't
6579   // need to keep that, and could therefore inline the host function if proven
6580   // worthwhile during optimization. In the other hand, if emitting code for the
6581   // device, the ID has to be the function address so that it can retrieved from
6582   // the offloading entry and launched by the runtime library. We also mark the
6583   // outlined function to have external linkage in case we are emitting code for
6584   // the device, because these functions will be entry points to the device.
6585 
6586   if (CGM.getLangOpts().OpenMPIsDevice) {
6587     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6588     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6589     OutlinedFn->setDSOLocal(false);
6590     if (CGM.getTriple().isAMDGCN())
6591       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6592   } else {
6593     std::string Name = getName({EntryFnName, "region_id"});
6594     OutlinedFnID = new llvm::GlobalVariable(
6595         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6596         llvm::GlobalValue::WeakAnyLinkage,
6597         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6598   }
6599 
6600   // If we do not allow host fallback we still need a named address to use.
6601   llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6602   if (!BuildOutlinedFn) {
6603     assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
6604            "Named kernel already exists?");
6605     TargetRegionEntryAddr = new llvm::GlobalVariable(
6606         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6607         llvm::GlobalValue::InternalLinkage,
6608         llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6609   }
6610 
6611   // Register the information for the entry associated with this target region.
6612   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6613       DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6614       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6615 
6616   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6617   int32_t DefaultValTeams = -1;
6618   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6619   if (DefaultValTeams > 0 && OutlinedFn) {
6620     OutlinedFn->addFnAttr("omp_target_num_teams",
6621                           std::to_string(DefaultValTeams));
6622   }
6623   int32_t DefaultValThreads = -1;
6624   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6625   if (DefaultValThreads > 0 && OutlinedFn) {
6626     OutlinedFn->addFnAttr("omp_target_thread_limit",
6627                           std::to_string(DefaultValThreads));
6628   }
6629 
6630   if (BuildOutlinedFn)
6631     CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6632 }
6633 
6634 /// Checks if the expression is constant or does not have non-trivial function
6635 /// calls.
6636 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6637   // We can skip constant expressions.
6638   // We can skip expressions with trivial calls or simple expressions.
6639   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6640           !E->hasNonTrivialCall(Ctx)) &&
6641          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6642 }
6643 
6644 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6645                                                     const Stmt *Body) {
6646   const Stmt *Child = Body->IgnoreContainers();
6647   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6648     Child = nullptr;
6649     for (const Stmt *S : C->body()) {
6650       if (const auto *E = dyn_cast<Expr>(S)) {
6651         if (isTrivial(Ctx, E))
6652           continue;
6653       }
6654       // Some of the statements can be ignored.
6655       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6656           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6657         continue;
6658       // Analyze declarations.
6659       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6660         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6661               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6662                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6663                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6664                   isa<UsingDirectiveDecl>(D) ||
6665                   isa<OMPDeclareReductionDecl>(D) ||
6666                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6667                 return true;
6668               const auto *VD = dyn_cast<VarDecl>(D);
6669               if (!VD)
6670                 return false;
6671               return VD->hasGlobalStorage() || !VD->isUsed();
6672             }))
6673           continue;
6674       }
6675       // Found multiple children - cannot get the one child only.
6676       if (Child)
6677         return nullptr;
6678       Child = S;
6679     }
6680     if (Child)
6681       Child = Child->IgnoreContainers();
6682   }
6683   return Child;
6684 }
6685 
6686 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6687     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6688     int32_t &DefaultVal) {
6689 
6690   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6691   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6692          "Expected target-based executable directive.");
6693   switch (DirectiveKind) {
6694   case OMPD_target: {
6695     const auto *CS = D.getInnermostCapturedStmt();
6696     const auto *Body =
6697         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6698     const Stmt *ChildStmt =
6699         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6700     if (const auto *NestedDir =
6701             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6702       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6703         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6704           const Expr *NumTeams =
6705               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6706           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6707             if (auto Constant =
6708                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6709               DefaultVal = Constant->getExtValue();
6710           return NumTeams;
6711         }
6712         DefaultVal = 0;
6713         return nullptr;
6714       }
6715       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6716           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6717         DefaultVal = 1;
6718         return nullptr;
6719       }
6720       DefaultVal = 1;
6721       return nullptr;
6722     }
6723     // A value of -1 is used to check if we need to emit no teams region
6724     DefaultVal = -1;
6725     return nullptr;
6726   }
6727   case OMPD_target_teams:
6728   case OMPD_target_teams_distribute:
6729   case OMPD_target_teams_distribute_simd:
6730   case OMPD_target_teams_distribute_parallel_for:
6731   case OMPD_target_teams_distribute_parallel_for_simd: {
6732     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6733       const Expr *NumTeams =
6734           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6735       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6736         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6737           DefaultVal = Constant->getExtValue();
6738       return NumTeams;
6739     }
6740     DefaultVal = 0;
6741     return nullptr;
6742   }
6743   case OMPD_target_parallel:
6744   case OMPD_target_parallel_for:
6745   case OMPD_target_parallel_for_simd:
6746   case OMPD_target_simd:
6747     DefaultVal = 1;
6748     return nullptr;
6749   case OMPD_parallel:
6750   case OMPD_for:
6751   case OMPD_parallel_for:
6752   case OMPD_parallel_master:
6753   case OMPD_parallel_sections:
6754   case OMPD_for_simd:
6755   case OMPD_parallel_for_simd:
6756   case OMPD_cancel:
6757   case OMPD_cancellation_point:
6758   case OMPD_ordered:
6759   case OMPD_threadprivate:
6760   case OMPD_allocate:
6761   case OMPD_task:
6762   case OMPD_simd:
6763   case OMPD_tile:
6764   case OMPD_unroll:
6765   case OMPD_sections:
6766   case OMPD_section:
6767   case OMPD_single:
6768   case OMPD_master:
6769   case OMPD_critical:
6770   case OMPD_taskyield:
6771   case OMPD_barrier:
6772   case OMPD_taskwait:
6773   case OMPD_taskgroup:
6774   case OMPD_atomic:
6775   case OMPD_flush:
6776   case OMPD_depobj:
6777   case OMPD_scan:
6778   case OMPD_teams:
6779   case OMPD_target_data:
6780   case OMPD_target_exit_data:
6781   case OMPD_target_enter_data:
6782   case OMPD_distribute:
6783   case OMPD_distribute_simd:
6784   case OMPD_distribute_parallel_for:
6785   case OMPD_distribute_parallel_for_simd:
6786   case OMPD_teams_distribute:
6787   case OMPD_teams_distribute_simd:
6788   case OMPD_teams_distribute_parallel_for:
6789   case OMPD_teams_distribute_parallel_for_simd:
6790   case OMPD_target_update:
6791   case OMPD_declare_simd:
6792   case OMPD_declare_variant:
6793   case OMPD_begin_declare_variant:
6794   case OMPD_end_declare_variant:
6795   case OMPD_declare_target:
6796   case OMPD_end_declare_target:
6797   case OMPD_declare_reduction:
6798   case OMPD_declare_mapper:
6799   case OMPD_taskloop:
6800   case OMPD_taskloop_simd:
6801   case OMPD_master_taskloop:
6802   case OMPD_master_taskloop_simd:
6803   case OMPD_parallel_master_taskloop:
6804   case OMPD_parallel_master_taskloop_simd:
6805   case OMPD_requires:
6806   case OMPD_metadirective:
6807   case OMPD_unknown:
6808     break;
6809   default:
6810     break;
6811   }
6812   llvm_unreachable("Unexpected directive kind.");
6813 }
6814 
6815 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6816     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6817   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6818          "Clauses associated with the teams directive expected to be emitted "
6819          "only for the host!");
6820   CGBuilderTy &Bld = CGF.Builder;
6821   int32_t DefaultNT = -1;
6822   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6823   if (NumTeams != nullptr) {
6824     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6825 
6826     switch (DirectiveKind) {
6827     case OMPD_target: {
6828       const auto *CS = D.getInnermostCapturedStmt();
6829       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6830       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6831       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6832                                                   /*IgnoreResultAssign*/ true);
6833       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6834                              /*isSigned=*/true);
6835     }
6836     case OMPD_target_teams:
6837     case OMPD_target_teams_distribute:
6838     case OMPD_target_teams_distribute_simd:
6839     case OMPD_target_teams_distribute_parallel_for:
6840     case OMPD_target_teams_distribute_parallel_for_simd: {
6841       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6842       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6843                                                   /*IgnoreResultAssign*/ true);
6844       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6845                              /*isSigned=*/true);
6846     }
6847     default:
6848       break;
6849     }
6850   } else if (DefaultNT == -1) {
6851     return nullptr;
6852   }
6853 
6854   return Bld.getInt32(DefaultNT);
6855 }
6856 
6857 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6858                                   llvm::Value *DefaultThreadLimitVal) {
6859   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6860       CGF.getContext(), CS->getCapturedStmt());
6861   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6862     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6863       llvm::Value *NumThreads = nullptr;
6864       llvm::Value *CondVal = nullptr;
6865       // Handle if clause. If if clause present, the number of threads is
6866       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6867       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6868         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6869         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6870         const OMPIfClause *IfClause = nullptr;
6871         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6872           if (C->getNameModifier() == OMPD_unknown ||
6873               C->getNameModifier() == OMPD_parallel) {
6874             IfClause = C;
6875             break;
6876           }
6877         }
6878         if (IfClause) {
6879           const Expr *Cond = IfClause->getCondition();
6880           bool Result;
6881           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6882             if (!Result)
6883               return CGF.Builder.getInt32(1);
6884           } else {
6885             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6886             if (const auto *PreInit =
6887                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6888               for (const auto *I : PreInit->decls()) {
6889                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6890                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6891                 } else {
6892                   CodeGenFunction::AutoVarEmission Emission =
6893                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6894                   CGF.EmitAutoVarCleanups(Emission);
6895                 }
6896               }
6897             }
6898             CondVal = CGF.EvaluateExprAsBool(Cond);
6899           }
6900         }
6901       }
6902       // Check the value of num_threads clause iff if clause was not specified
6903       // or is not evaluated to false.
6904       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6905         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6906         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6907         const auto *NumThreadsClause =
6908             Dir->getSingleClause<OMPNumThreadsClause>();
6909         CodeGenFunction::LexicalScope Scope(
6910             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6911         if (const auto *PreInit =
6912                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6913           for (const auto *I : PreInit->decls()) {
6914             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6915               CGF.EmitVarDecl(cast<VarDecl>(*I));
6916             } else {
6917               CodeGenFunction::AutoVarEmission Emission =
6918                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6919               CGF.EmitAutoVarCleanups(Emission);
6920             }
6921           }
6922         }
6923         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6924         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6925                                                /*isSigned=*/false);
6926         if (DefaultThreadLimitVal)
6927           NumThreads = CGF.Builder.CreateSelect(
6928               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6929               DefaultThreadLimitVal, NumThreads);
6930       } else {
6931         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6932                                            : CGF.Builder.getInt32(0);
6933       }
6934       // Process condition of the if clause.
6935       if (CondVal) {
6936         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6937                                               CGF.Builder.getInt32(1));
6938       }
6939       return NumThreads;
6940     }
6941     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6942       return CGF.Builder.getInt32(1);
6943     return DefaultThreadLimitVal;
6944   }
6945   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6946                                : CGF.Builder.getInt32(0);
6947 }
6948 
6949 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6950     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6951     int32_t &DefaultVal) {
6952   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6953   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6954          "Expected target-based executable directive.");
6955 
6956   switch (DirectiveKind) {
6957   case OMPD_target:
6958     // Teams have no clause thread_limit
6959     return nullptr;
6960   case OMPD_target_teams:
6961   case OMPD_target_teams_distribute:
6962     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6963       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6964       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6965       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6966         if (auto Constant =
6967                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6968           DefaultVal = Constant->getExtValue();
6969       return ThreadLimit;
6970     }
6971     return nullptr;
6972   case OMPD_target_parallel:
6973   case OMPD_target_parallel_for:
6974   case OMPD_target_parallel_for_simd:
6975   case OMPD_target_teams_distribute_parallel_for:
6976   case OMPD_target_teams_distribute_parallel_for_simd: {
6977     Expr *ThreadLimit = nullptr;
6978     Expr *NumThreads = nullptr;
6979     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6980       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6981       ThreadLimit = ThreadLimitClause->getThreadLimit();
6982       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6983         if (auto Constant =
6984                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6985           DefaultVal = Constant->getExtValue();
6986     }
6987     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6988       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6989       NumThreads = NumThreadsClause->getNumThreads();
6990       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6991         if (auto Constant =
6992                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6993           if (Constant->getExtValue() < DefaultVal) {
6994             DefaultVal = Constant->getExtValue();
6995             ThreadLimit = NumThreads;
6996           }
6997         }
6998       }
6999     }
7000     return ThreadLimit;
7001   }
7002   case OMPD_target_teams_distribute_simd:
7003   case OMPD_target_simd:
7004     DefaultVal = 1;
7005     return nullptr;
7006   case OMPD_parallel:
7007   case OMPD_for:
7008   case OMPD_parallel_for:
7009   case OMPD_parallel_master:
7010   case OMPD_parallel_sections:
7011   case OMPD_for_simd:
7012   case OMPD_parallel_for_simd:
7013   case OMPD_cancel:
7014   case OMPD_cancellation_point:
7015   case OMPD_ordered:
7016   case OMPD_threadprivate:
7017   case OMPD_allocate:
7018   case OMPD_task:
7019   case OMPD_simd:
7020   case OMPD_tile:
7021   case OMPD_unroll:
7022   case OMPD_sections:
7023   case OMPD_section:
7024   case OMPD_single:
7025   case OMPD_master:
7026   case OMPD_critical:
7027   case OMPD_taskyield:
7028   case OMPD_barrier:
7029   case OMPD_taskwait:
7030   case OMPD_taskgroup:
7031   case OMPD_atomic:
7032   case OMPD_flush:
7033   case OMPD_depobj:
7034   case OMPD_scan:
7035   case OMPD_teams:
7036   case OMPD_target_data:
7037   case OMPD_target_exit_data:
7038   case OMPD_target_enter_data:
7039   case OMPD_distribute:
7040   case OMPD_distribute_simd:
7041   case OMPD_distribute_parallel_for:
7042   case OMPD_distribute_parallel_for_simd:
7043   case OMPD_teams_distribute:
7044   case OMPD_teams_distribute_simd:
7045   case OMPD_teams_distribute_parallel_for:
7046   case OMPD_teams_distribute_parallel_for_simd:
7047   case OMPD_target_update:
7048   case OMPD_declare_simd:
7049   case OMPD_declare_variant:
7050   case OMPD_begin_declare_variant:
7051   case OMPD_end_declare_variant:
7052   case OMPD_declare_target:
7053   case OMPD_end_declare_target:
7054   case OMPD_declare_reduction:
7055   case OMPD_declare_mapper:
7056   case OMPD_taskloop:
7057   case OMPD_taskloop_simd:
7058   case OMPD_master_taskloop:
7059   case OMPD_master_taskloop_simd:
7060   case OMPD_parallel_master_taskloop:
7061   case OMPD_parallel_master_taskloop_simd:
7062   case OMPD_requires:
7063   case OMPD_unknown:
7064     break;
7065   default:
7066     break;
7067   }
7068   llvm_unreachable("Unsupported directive kind.");
7069 }
7070 
7071 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7072     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7073   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7074          "Clauses associated with the teams directive expected to be emitted "
7075          "only for the host!");
7076   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7077   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7078          "Expected target-based executable directive.");
7079   CGBuilderTy &Bld = CGF.Builder;
7080   llvm::Value *ThreadLimitVal = nullptr;
7081   llvm::Value *NumThreadsVal = nullptr;
7082   switch (DirectiveKind) {
7083   case OMPD_target: {
7084     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7085     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7086       return NumThreads;
7087     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7088         CGF.getContext(), CS->getCapturedStmt());
7089     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7090       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7091         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7092         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7093         const auto *ThreadLimitClause =
7094             Dir->getSingleClause<OMPThreadLimitClause>();
7095         CodeGenFunction::LexicalScope Scope(
7096             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7097         if (const auto *PreInit =
7098                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7099           for (const auto *I : PreInit->decls()) {
7100             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7101               CGF.EmitVarDecl(cast<VarDecl>(*I));
7102             } else {
7103               CodeGenFunction::AutoVarEmission Emission =
7104                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7105               CGF.EmitAutoVarCleanups(Emission);
7106             }
7107           }
7108         }
7109         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7110             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7111         ThreadLimitVal =
7112             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7113       }
7114       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7115           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7116         CS = Dir->getInnermostCapturedStmt();
7117         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7118             CGF.getContext(), CS->getCapturedStmt());
7119         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7120       }
7121       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7122           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7123         CS = Dir->getInnermostCapturedStmt();
7124         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7125           return NumThreads;
7126       }
7127       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7128         return Bld.getInt32(1);
7129     }
7130     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7131   }
7132   case OMPD_target_teams: {
7133     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7134       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7135       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7136       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7137           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7138       ThreadLimitVal =
7139           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7140     }
7141     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7142     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7143       return NumThreads;
7144     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7145         CGF.getContext(), CS->getCapturedStmt());
7146     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7147       if (Dir->getDirectiveKind() == OMPD_distribute) {
7148         CS = Dir->getInnermostCapturedStmt();
7149         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7150           return NumThreads;
7151       }
7152     }
7153     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7154   }
7155   case OMPD_target_teams_distribute:
7156     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7157       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7158       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7159       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7160           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7161       ThreadLimitVal =
7162           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7163     }
7164     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7165   case OMPD_target_parallel:
7166   case OMPD_target_parallel_for:
7167   case OMPD_target_parallel_for_simd:
7168   case OMPD_target_teams_distribute_parallel_for:
7169   case OMPD_target_teams_distribute_parallel_for_simd: {
7170     llvm::Value *CondVal = nullptr;
7171     // Handle if clause. If if clause present, the number of threads is
7172     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7173     if (D.hasClausesOfKind<OMPIfClause>()) {
7174       const OMPIfClause *IfClause = nullptr;
7175       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7176         if (C->getNameModifier() == OMPD_unknown ||
7177             C->getNameModifier() == OMPD_parallel) {
7178           IfClause = C;
7179           break;
7180         }
7181       }
7182       if (IfClause) {
7183         const Expr *Cond = IfClause->getCondition();
7184         bool Result;
7185         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7186           if (!Result)
7187             return Bld.getInt32(1);
7188         } else {
7189           CodeGenFunction::RunCleanupsScope Scope(CGF);
7190           CondVal = CGF.EvaluateExprAsBool(Cond);
7191         }
7192       }
7193     }
7194     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7195       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7196       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7197       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7198           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7199       ThreadLimitVal =
7200           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7201     }
7202     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7203       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7204       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7205       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7206           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7207       NumThreadsVal =
7208           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7209       ThreadLimitVal = ThreadLimitVal
7210                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7211                                                                 ThreadLimitVal),
7212                                               NumThreadsVal, ThreadLimitVal)
7213                            : NumThreadsVal;
7214     }
7215     if (!ThreadLimitVal)
7216       ThreadLimitVal = Bld.getInt32(0);
7217     if (CondVal)
7218       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7219     return ThreadLimitVal;
7220   }
7221   case OMPD_target_teams_distribute_simd:
7222   case OMPD_target_simd:
7223     return Bld.getInt32(1);
7224   case OMPD_parallel:
7225   case OMPD_for:
7226   case OMPD_parallel_for:
7227   case OMPD_parallel_master:
7228   case OMPD_parallel_sections:
7229   case OMPD_for_simd:
7230   case OMPD_parallel_for_simd:
7231   case OMPD_cancel:
7232   case OMPD_cancellation_point:
7233   case OMPD_ordered:
7234   case OMPD_threadprivate:
7235   case OMPD_allocate:
7236   case OMPD_task:
7237   case OMPD_simd:
7238   case OMPD_tile:
7239   case OMPD_unroll:
7240   case OMPD_sections:
7241   case OMPD_section:
7242   case OMPD_single:
7243   case OMPD_master:
7244   case OMPD_critical:
7245   case OMPD_taskyield:
7246   case OMPD_barrier:
7247   case OMPD_taskwait:
7248   case OMPD_taskgroup:
7249   case OMPD_atomic:
7250   case OMPD_flush:
7251   case OMPD_depobj:
7252   case OMPD_scan:
7253   case OMPD_teams:
7254   case OMPD_target_data:
7255   case OMPD_target_exit_data:
7256   case OMPD_target_enter_data:
7257   case OMPD_distribute:
7258   case OMPD_distribute_simd:
7259   case OMPD_distribute_parallel_for:
7260   case OMPD_distribute_parallel_for_simd:
7261   case OMPD_teams_distribute:
7262   case OMPD_teams_distribute_simd:
7263   case OMPD_teams_distribute_parallel_for:
7264   case OMPD_teams_distribute_parallel_for_simd:
7265   case OMPD_target_update:
7266   case OMPD_declare_simd:
7267   case OMPD_declare_variant:
7268   case OMPD_begin_declare_variant:
7269   case OMPD_end_declare_variant:
7270   case OMPD_declare_target:
7271   case OMPD_end_declare_target:
7272   case OMPD_declare_reduction:
7273   case OMPD_declare_mapper:
7274   case OMPD_taskloop:
7275   case OMPD_taskloop_simd:
7276   case OMPD_master_taskloop:
7277   case OMPD_master_taskloop_simd:
7278   case OMPD_parallel_master_taskloop:
7279   case OMPD_parallel_master_taskloop_simd:
7280   case OMPD_requires:
7281   case OMPD_metadirective:
7282   case OMPD_unknown:
7283     break;
7284   default:
7285     break;
7286   }
7287   llvm_unreachable("Unsupported directive kind.");
7288 }
7289 
7290 namespace {
7291 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7292 
7293 // Utility to handle information from clauses associated with a given
7294 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7295 // It provides a convenient interface to obtain the information and generate
7296 // code for that information.
7297 class MappableExprsHandler {
7298 public:
7299   /// Values for bit flags used to specify the mapping type for
7300   /// offloading.
7301   enum OpenMPOffloadMappingFlags : uint64_t {
7302     /// No flags
7303     OMP_MAP_NONE = 0x0,
7304     /// Allocate memory on the device and move data from host to device.
7305     OMP_MAP_TO = 0x01,
7306     /// Allocate memory on the device and move data from device to host.
7307     OMP_MAP_FROM = 0x02,
7308     /// Always perform the requested mapping action on the element, even
7309     /// if it was already mapped before.
7310     OMP_MAP_ALWAYS = 0x04,
7311     /// Delete the element from the device environment, ignoring the
7312     /// current reference count associated with the element.
7313     OMP_MAP_DELETE = 0x08,
7314     /// The element being mapped is a pointer-pointee pair; both the
7315     /// pointer and the pointee should be mapped.
7316     OMP_MAP_PTR_AND_OBJ = 0x10,
7317     /// This flags signals that the base address of an entry should be
7318     /// passed to the target kernel as an argument.
7319     OMP_MAP_TARGET_PARAM = 0x20,
7320     /// Signal that the runtime library has to return the device pointer
7321     /// in the current position for the data being mapped. Used when we have the
7322     /// use_device_ptr or use_device_addr clause.
7323     OMP_MAP_RETURN_PARAM = 0x40,
7324     /// This flag signals that the reference being passed is a pointer to
7325     /// private data.
7326     OMP_MAP_PRIVATE = 0x80,
7327     /// Pass the element to the device by value.
7328     OMP_MAP_LITERAL = 0x100,
7329     /// Implicit map
7330     OMP_MAP_IMPLICIT = 0x200,
7331     /// Close is a hint to the runtime to allocate memory close to
7332     /// the target device.
7333     OMP_MAP_CLOSE = 0x400,
7334     /// 0x800 is reserved for compatibility with XLC.
7335     /// Produce a runtime error if the data is not already allocated.
7336     OMP_MAP_PRESENT = 0x1000,
7337     // Increment and decrement a separate reference counter so that the data
7338     // cannot be unmapped within the associated region.  Thus, this flag is
7339     // intended to be used on 'target' and 'target data' directives because they
7340     // are inherently structured.  It is not intended to be used on 'target
7341     // enter data' and 'target exit data' directives because they are inherently
7342     // dynamic.
7343     // This is an OpenMP extension for the sake of OpenACC support.
7344     OMP_MAP_OMPX_HOLD = 0x2000,
7345     /// Signal that the runtime library should use args as an array of
7346     /// descriptor_dim pointers and use args_size as dims. Used when we have
7347     /// non-contiguous list items in target update directive
7348     OMP_MAP_NON_CONTIG = 0x100000000000,
7349     /// The 16 MSBs of the flags indicate whether the entry is member of some
7350     /// struct/class.
7351     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7352     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7353   };
7354 
7355   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7356   static unsigned getFlagMemberOffset() {
7357     unsigned Offset = 0;
7358     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7359          Remain = Remain >> 1)
7360       Offset++;
7361     return Offset;
7362   }
7363 
7364   /// Class that holds debugging information for a data mapping to be passed to
7365   /// the runtime library.
7366   class MappingExprInfo {
7367     /// The variable declaration used for the data mapping.
7368     const ValueDecl *MapDecl = nullptr;
7369     /// The original expression used in the map clause, or null if there is
7370     /// none.
7371     const Expr *MapExpr = nullptr;
7372 
7373   public:
7374     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7375         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7376 
7377     const ValueDecl *getMapDecl() const { return MapDecl; }
7378     const Expr *getMapExpr() const { return MapExpr; }
7379   };
7380 
7381   /// Class that associates information with a base pointer to be passed to the
7382   /// runtime library.
7383   class BasePointerInfo {
7384     /// The base pointer.
7385     llvm::Value *Ptr = nullptr;
7386     /// The base declaration that refers to this device pointer, or null if
7387     /// there is none.
7388     const ValueDecl *DevPtrDecl = nullptr;
7389 
7390   public:
7391     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7392         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7393     llvm::Value *operator*() const { return Ptr; }
7394     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7395     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7396   };
7397 
7398   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7399   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7400   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7401   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7402   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7403   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7404   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7405 
7406   /// This structure contains combined information generated for mappable
7407   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7408   /// mappers, and non-contiguous information.
7409   struct MapCombinedInfoTy {
7410     struct StructNonContiguousInfo {
7411       bool IsNonContiguous = false;
7412       MapDimArrayTy Dims;
7413       MapNonContiguousArrayTy Offsets;
7414       MapNonContiguousArrayTy Counts;
7415       MapNonContiguousArrayTy Strides;
7416     };
7417     MapExprsArrayTy Exprs;
7418     MapBaseValuesArrayTy BasePointers;
7419     MapValuesArrayTy Pointers;
7420     MapValuesArrayTy Sizes;
7421     MapFlagsArrayTy Types;
7422     MapMappersArrayTy Mappers;
7423     StructNonContiguousInfo NonContigInfo;
7424 
7425     /// Append arrays in \a CurInfo.
7426     void append(MapCombinedInfoTy &CurInfo) {
7427       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7428       BasePointers.append(CurInfo.BasePointers.begin(),
7429                           CurInfo.BasePointers.end());
7430       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7431       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7432       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7433       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7434       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7435                                  CurInfo.NonContigInfo.Dims.end());
7436       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7437                                     CurInfo.NonContigInfo.Offsets.end());
7438       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7439                                    CurInfo.NonContigInfo.Counts.end());
7440       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7441                                     CurInfo.NonContigInfo.Strides.end());
7442     }
7443   };
7444 
7445   /// Map between a struct and the its lowest & highest elements which have been
7446   /// mapped.
7447   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7448   ///                    HE(FieldIndex, Pointer)}
7449   struct StructRangeInfoTy {
7450     MapCombinedInfoTy PreliminaryMapData;
7451     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7452         0, Address::invalid()};
7453     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7454         0, Address::invalid()};
7455     Address Base = Address::invalid();
7456     Address LB = Address::invalid();
7457     bool IsArraySection = false;
7458     bool HasCompleteRecord = false;
7459   };
7460 
7461 private:
7462   /// Kind that defines how a device pointer has to be returned.
7463   struct MapInfo {
7464     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7465     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7466     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7467     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7468     bool ReturnDevicePointer = false;
7469     bool IsImplicit = false;
7470     const ValueDecl *Mapper = nullptr;
7471     const Expr *VarRef = nullptr;
7472     bool ForDeviceAddr = false;
7473 
7474     MapInfo() = default;
7475     MapInfo(
7476         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7477         OpenMPMapClauseKind MapType,
7478         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7479         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7480         bool ReturnDevicePointer, bool IsImplicit,
7481         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7482         bool ForDeviceAddr = false)
7483         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7484           MotionModifiers(MotionModifiers),
7485           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7486           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7487   };
7488 
7489   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7490   /// member and there is no map information about it, then emission of that
7491   /// entry is deferred until the whole struct has been processed.
7492   struct DeferredDevicePtrEntryTy {
7493     const Expr *IE = nullptr;
7494     const ValueDecl *VD = nullptr;
7495     bool ForDeviceAddr = false;
7496 
7497     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7498                              bool ForDeviceAddr)
7499         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7500   };
7501 
7502   /// The target directive from where the mappable clauses were extracted. It
7503   /// is either a executable directive or a user-defined mapper directive.
7504   llvm::PointerUnion<const OMPExecutableDirective *,
7505                      const OMPDeclareMapperDecl *>
7506       CurDir;
7507 
7508   /// Function the directive is being generated for.
7509   CodeGenFunction &CGF;
7510 
7511   /// Set of all first private variables in the current directive.
7512   /// bool data is set to true if the variable is implicitly marked as
7513   /// firstprivate, false otherwise.
7514   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7515 
7516   /// Map between device pointer declarations and their expression components.
7517   /// The key value for declarations in 'this' is null.
7518   llvm::DenseMap<
7519       const ValueDecl *,
7520       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7521       DevPointersMap;
7522 
7523   /// Map between lambda declarations and their map type.
7524   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7525 
7526   llvm::Value *getExprTypeSize(const Expr *E) const {
7527     QualType ExprTy = E->getType().getCanonicalType();
7528 
7529     // Calculate the size for array shaping expression.
7530     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7531       llvm::Value *Size =
7532           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7533       for (const Expr *SE : OAE->getDimensions()) {
7534         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7535         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7536                                       CGF.getContext().getSizeType(),
7537                                       SE->getExprLoc());
7538         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7539       }
7540       return Size;
7541     }
7542 
7543     // Reference types are ignored for mapping purposes.
7544     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7545       ExprTy = RefTy->getPointeeType().getCanonicalType();
7546 
7547     // Given that an array section is considered a built-in type, we need to
7548     // do the calculation based on the length of the section instead of relying
7549     // on CGF.getTypeSize(E->getType()).
7550     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7551       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7552                             OAE->getBase()->IgnoreParenImpCasts())
7553                             .getCanonicalType();
7554 
7555       // If there is no length associated with the expression and lower bound is
7556       // not specified too, that means we are using the whole length of the
7557       // base.
7558       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7559           !OAE->getLowerBound())
7560         return CGF.getTypeSize(BaseTy);
7561 
7562       llvm::Value *ElemSize;
7563       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7564         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7565       } else {
7566         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7567         assert(ATy && "Expecting array type if not a pointer type.");
7568         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7569       }
7570 
7571       // If we don't have a length at this point, that is because we have an
7572       // array section with a single element.
7573       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7574         return ElemSize;
7575 
7576       if (const Expr *LenExpr = OAE->getLength()) {
7577         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7578         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7579                                              CGF.getContext().getSizeType(),
7580                                              LenExpr->getExprLoc());
7581         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7582       }
7583       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7584              OAE->getLowerBound() && "expected array_section[lb:].");
7585       // Size = sizetype - lb * elemtype;
7586       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7587       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7588       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7589                                        CGF.getContext().getSizeType(),
7590                                        OAE->getLowerBound()->getExprLoc());
7591       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7592       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7593       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7594       LengthVal = CGF.Builder.CreateSelect(
7595           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7596       return LengthVal;
7597     }
7598     return CGF.getTypeSize(ExprTy);
7599   }
7600 
7601   /// Return the corresponding bits for a given map clause modifier. Add
7602   /// a flag marking the map as a pointer if requested. Add a flag marking the
7603   /// map as the first one of a series of maps that relate to the same map
7604   /// expression.
7605   OpenMPOffloadMappingFlags getMapTypeBits(
7606       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7607       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7608       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7609     OpenMPOffloadMappingFlags Bits =
7610         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7611     switch (MapType) {
7612     case OMPC_MAP_alloc:
7613     case OMPC_MAP_release:
7614       // alloc and release is the default behavior in the runtime library,  i.e.
7615       // if we don't pass any bits alloc/release that is what the runtime is
7616       // going to do. Therefore, we don't need to signal anything for these two
7617       // type modifiers.
7618       break;
7619     case OMPC_MAP_to:
7620       Bits |= OMP_MAP_TO;
7621       break;
7622     case OMPC_MAP_from:
7623       Bits |= OMP_MAP_FROM;
7624       break;
7625     case OMPC_MAP_tofrom:
7626       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7627       break;
7628     case OMPC_MAP_delete:
7629       Bits |= OMP_MAP_DELETE;
7630       break;
7631     case OMPC_MAP_unknown:
7632       llvm_unreachable("Unexpected map type!");
7633     }
7634     if (AddPtrFlag)
7635       Bits |= OMP_MAP_PTR_AND_OBJ;
7636     if (AddIsTargetParamFlag)
7637       Bits |= OMP_MAP_TARGET_PARAM;
7638     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7639       Bits |= OMP_MAP_ALWAYS;
7640     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7641       Bits |= OMP_MAP_CLOSE;
7642     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7643         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7644       Bits |= OMP_MAP_PRESENT;
7645     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7646       Bits |= OMP_MAP_OMPX_HOLD;
7647     if (IsNonContiguous)
7648       Bits |= OMP_MAP_NON_CONTIG;
7649     return Bits;
7650   }
7651 
7652   /// Return true if the provided expression is a final array section. A
7653   /// final array section, is one whose length can't be proved to be one.
7654   bool isFinalArraySectionExpression(const Expr *E) const {
7655     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7656 
7657     // It is not an array section and therefore not a unity-size one.
7658     if (!OASE)
7659       return false;
7660 
7661     // An array section with no colon always refer to a single element.
7662     if (OASE->getColonLocFirst().isInvalid())
7663       return false;
7664 
7665     const Expr *Length = OASE->getLength();
7666 
7667     // If we don't have a length we have to check if the array has size 1
7668     // for this dimension. Also, we should always expect a length if the
7669     // base type is pointer.
7670     if (!Length) {
7671       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7672                              OASE->getBase()->IgnoreParenImpCasts())
7673                              .getCanonicalType();
7674       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7675         return ATy->getSize().getSExtValue() != 1;
7676       // If we don't have a constant dimension length, we have to consider
7677       // the current section as having any size, so it is not necessarily
7678       // unitary. If it happen to be unity size, that's user fault.
7679       return true;
7680     }
7681 
7682     // Check if the length evaluates to 1.
7683     Expr::EvalResult Result;
7684     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7685       return true; // Can have more that size 1.
7686 
7687     llvm::APSInt ConstLength = Result.Val.getInt();
7688     return ConstLength.getSExtValue() != 1;
7689   }
7690 
7691   /// Generate the base pointers, section pointers, sizes, map type bits, and
7692   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7693   /// map type, map or motion modifiers, and expression components.
7694   /// \a IsFirstComponent should be set to true if the provided set of
7695   /// components is the first associated with a capture.
7696   void generateInfoForComponentList(
7697       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7698       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7699       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7700       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7701       bool IsFirstComponentList, bool IsImplicit,
7702       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7703       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7704       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7705           OverlappedElements = llvm::None) const {
7706     // The following summarizes what has to be generated for each map and the
7707     // types below. The generated information is expressed in this order:
7708     // base pointer, section pointer, size, flags
7709     // (to add to the ones that come from the map type and modifier).
7710     //
7711     // double d;
7712     // int i[100];
7713     // float *p;
7714     //
7715     // struct S1 {
7716     //   int i;
7717     //   float f[50];
7718     // }
7719     // struct S2 {
7720     //   int i;
7721     //   float f[50];
7722     //   S1 s;
7723     //   double *p;
7724     //   struct S2 *ps;
7725     //   int &ref;
7726     // }
7727     // S2 s;
7728     // S2 *ps;
7729     //
7730     // map(d)
7731     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7732     //
7733     // map(i)
7734     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7735     //
7736     // map(i[1:23])
7737     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7738     //
7739     // map(p)
7740     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7741     //
7742     // map(p[1:24])
7743     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7744     // in unified shared memory mode or for local pointers
7745     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7746     //
7747     // map(s)
7748     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7749     //
7750     // map(s.i)
7751     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7752     //
7753     // map(s.s.f)
7754     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7755     //
7756     // map(s.p)
7757     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7758     //
7759     // map(to: s.p[:22])
7760     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7761     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7762     // &(s.p), &(s.p[0]), 22*sizeof(double),
7763     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7764     // (*) alloc space for struct members, only this is a target parameter
7765     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7766     //      optimizes this entry out, same in the examples below)
7767     // (***) map the pointee (map: to)
7768     //
7769     // map(to: s.ref)
7770     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7771     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7772     // (*) alloc space for struct members, only this is a target parameter
7773     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7774     //      optimizes this entry out, same in the examples below)
7775     // (***) map the pointee (map: to)
7776     //
7777     // map(s.ps)
7778     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7779     //
7780     // map(from: s.ps->s.i)
7781     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7782     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7783     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7784     //
7785     // map(to: s.ps->ps)
7786     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7787     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7788     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7789     //
7790     // map(s.ps->ps->ps)
7791     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7792     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7793     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7794     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7795     //
7796     // map(to: s.ps->ps->s.f[:22])
7797     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7798     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7799     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7800     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7801     //
7802     // map(ps)
7803     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7804     //
7805     // map(ps->i)
7806     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7807     //
7808     // map(ps->s.f)
7809     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7810     //
7811     // map(from: ps->p)
7812     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7813     //
7814     // map(to: ps->p[:22])
7815     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7816     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7817     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7818     //
7819     // map(ps->ps)
7820     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7821     //
7822     // map(from: ps->ps->s.i)
7823     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7824     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7825     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7826     //
7827     // map(from: ps->ps->ps)
7828     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7829     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7830     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7831     //
7832     // map(ps->ps->ps->ps)
7833     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7834     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7835     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7836     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7837     //
7838     // map(to: ps->ps->ps->s.f[:22])
7839     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7840     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7841     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7842     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7843     //
7844     // map(to: s.f[:22]) map(from: s.p[:33])
7845     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7846     //     sizeof(double*) (**), TARGET_PARAM
7847     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7848     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7849     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7850     // (*) allocate contiguous space needed to fit all mapped members even if
7851     //     we allocate space for members not mapped (in this example,
7852     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7853     //     them as well because they fall between &s.f[0] and &s.p)
7854     //
7855     // map(from: s.f[:22]) map(to: ps->p[:33])
7856     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7857     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7858     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7859     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7860     // (*) the struct this entry pertains to is the 2nd element in the list of
7861     //     arguments, hence MEMBER_OF(2)
7862     //
7863     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7864     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7865     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7866     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7867     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7868     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7869     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7870     // (*) the struct this entry pertains to is the 4th element in the list
7871     //     of arguments, hence MEMBER_OF(4)
7872 
7873     // Track if the map information being generated is the first for a capture.
7874     bool IsCaptureFirstInfo = IsFirstComponentList;
7875     // When the variable is on a declare target link or in a to clause with
7876     // unified memory, a reference is needed to hold the host/device address
7877     // of the variable.
7878     bool RequiresReference = false;
7879 
7880     // Scan the components from the base to the complete expression.
7881     auto CI = Components.rbegin();
7882     auto CE = Components.rend();
7883     auto I = CI;
7884 
7885     // Track if the map information being generated is the first for a list of
7886     // components.
7887     bool IsExpressionFirstInfo = true;
7888     bool FirstPointerInComplexData = false;
7889     Address BP = Address::invalid();
7890     const Expr *AssocExpr = I->getAssociatedExpression();
7891     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7892     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7893     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7894 
7895     if (isa<MemberExpr>(AssocExpr)) {
7896       // The base is the 'this' pointer. The content of the pointer is going
7897       // to be the base of the field being mapped.
7898       BP = CGF.LoadCXXThisAddress();
7899     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7900                (OASE &&
7901                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7902       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7903     } else if (OAShE &&
7904                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7905       BP = Address::deprecated(
7906           CGF.EmitScalarExpr(OAShE->getBase()),
7907           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7908     } else {
7909       // The base is the reference to the variable.
7910       // BP = &Var.
7911       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7912       if (const auto *VD =
7913               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7914         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7915                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7916           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7917               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7918                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7919             RequiresReference = true;
7920             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7921           }
7922         }
7923       }
7924 
7925       // If the variable is a pointer and is being dereferenced (i.e. is not
7926       // the last component), the base has to be the pointer itself, not its
7927       // reference. References are ignored for mapping purposes.
7928       QualType Ty =
7929           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7930       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7931         // No need to generate individual map information for the pointer, it
7932         // can be associated with the combined storage if shared memory mode is
7933         // active or the base declaration is not global variable.
7934         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7935         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7936             !VD || VD->hasLocalStorage())
7937           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7938         else
7939           FirstPointerInComplexData = true;
7940         ++I;
7941       }
7942     }
7943 
7944     // Track whether a component of the list should be marked as MEMBER_OF some
7945     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7946     // in a component list should be marked as MEMBER_OF, all subsequent entries
7947     // do not belong to the base struct. E.g.
7948     // struct S2 s;
7949     // s.ps->ps->ps->f[:]
7950     //   (1) (2) (3) (4)
7951     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7952     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7953     // is the pointee of ps(2) which is not member of struct s, so it should not
7954     // be marked as such (it is still PTR_AND_OBJ).
7955     // The variable is initialized to false so that PTR_AND_OBJ entries which
7956     // are not struct members are not considered (e.g. array of pointers to
7957     // data).
7958     bool ShouldBeMemberOf = false;
7959 
7960     // Variable keeping track of whether or not we have encountered a component
7961     // in the component list which is a member expression. Useful when we have a
7962     // pointer or a final array section, in which case it is the previous
7963     // component in the list which tells us whether we have a member expression.
7964     // E.g. X.f[:]
7965     // While processing the final array section "[:]" it is "f" which tells us
7966     // whether we are dealing with a member of a declared struct.
7967     const MemberExpr *EncounteredME = nullptr;
7968 
7969     // Track for the total number of dimension. Start from one for the dummy
7970     // dimension.
7971     uint64_t DimSize = 1;
7972 
7973     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7974     bool IsPrevMemberReference = false;
7975 
7976     for (; I != CE; ++I) {
7977       // If the current component is member of a struct (parent struct) mark it.
7978       if (!EncounteredME) {
7979         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7980         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7981         // as MEMBER_OF the parent struct.
7982         if (EncounteredME) {
7983           ShouldBeMemberOf = true;
7984           // Do not emit as complex pointer if this is actually not array-like
7985           // expression.
7986           if (FirstPointerInComplexData) {
7987             QualType Ty = std::prev(I)
7988                               ->getAssociatedDeclaration()
7989                               ->getType()
7990                               .getNonReferenceType();
7991             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7992             FirstPointerInComplexData = false;
7993           }
7994         }
7995       }
7996 
7997       auto Next = std::next(I);
7998 
7999       // We need to generate the addresses and sizes if this is the last
8000       // component, if the component is a pointer or if it is an array section
8001       // whose length can't be proved to be one. If this is a pointer, it
8002       // becomes the base address for the following components.
8003 
8004       // A final array section, is one whose length can't be proved to be one.
8005       // If the map item is non-contiguous then we don't treat any array section
8006       // as final array section.
8007       bool IsFinalArraySection =
8008           !IsNonContiguous &&
8009           isFinalArraySectionExpression(I->getAssociatedExpression());
8010 
8011       // If we have a declaration for the mapping use that, otherwise use
8012       // the base declaration of the map clause.
8013       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8014                                      ? I->getAssociatedDeclaration()
8015                                      : BaseDecl;
8016       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8017                                                : MapExpr;
8018 
8019       // Get information on whether the element is a pointer. Have to do a
8020       // special treatment for array sections given that they are built-in
8021       // types.
8022       const auto *OASE =
8023           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
8024       const auto *OAShE =
8025           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8026       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8027       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8028       bool IsPointer =
8029           OAShE ||
8030           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8031                        .getCanonicalType()
8032                        ->isAnyPointerType()) ||
8033           I->getAssociatedExpression()->getType()->isAnyPointerType();
8034       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8035                                MapDecl &&
8036                                MapDecl->getType()->isLValueReferenceType();
8037       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8038 
8039       if (OASE)
8040         ++DimSize;
8041 
8042       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8043           IsFinalArraySection) {
8044         // If this is not the last component, we expect the pointer to be
8045         // associated with an array expression or member expression.
8046         assert((Next == CE ||
8047                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8048                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8049                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8050                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8051                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8052                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8053                "Unexpected expression");
8054 
8055         Address LB = Address::invalid();
8056         Address LowestElem = Address::invalid();
8057         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8058                                        const MemberExpr *E) {
8059           const Expr *BaseExpr = E->getBase();
8060           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8061           // scalar.
8062           LValue BaseLV;
8063           if (E->isArrow()) {
8064             LValueBaseInfo BaseInfo;
8065             TBAAAccessInfo TBAAInfo;
8066             Address Addr =
8067                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8068             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8069             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8070           } else {
8071             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8072           }
8073           return BaseLV;
8074         };
8075         if (OAShE) {
8076           LowestElem = LB =
8077               Address::deprecated(CGF.EmitScalarExpr(OAShE->getBase()),
8078                                   CGF.getContext().getTypeAlignInChars(
8079                                       OAShE->getBase()->getType()));
8080         } else if (IsMemberReference) {
8081           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8082           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8083           LowestElem = CGF.EmitLValueForFieldInitialization(
8084                               BaseLVal, cast<FieldDecl>(MapDecl))
8085                            .getAddress(CGF);
8086           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8087                    .getAddress(CGF);
8088         } else {
8089           LowestElem = LB =
8090               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8091                   .getAddress(CGF);
8092         }
8093 
8094         // If this component is a pointer inside the base struct then we don't
8095         // need to create any entry for it - it will be combined with the object
8096         // it is pointing to into a single PTR_AND_OBJ entry.
8097         bool IsMemberPointerOrAddr =
8098             EncounteredME &&
8099             (((IsPointer || ForDeviceAddr) &&
8100               I->getAssociatedExpression() == EncounteredME) ||
8101              (IsPrevMemberReference && !IsPointer) ||
8102              (IsMemberReference && Next != CE &&
8103               !Next->getAssociatedExpression()->getType()->isPointerType()));
8104         if (!OverlappedElements.empty() && Next == CE) {
8105           // Handle base element with the info for overlapped elements.
8106           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8107           assert(!IsPointer &&
8108                  "Unexpected base element with the pointer type.");
8109           // Mark the whole struct as the struct that requires allocation on the
8110           // device.
8111           PartialStruct.LowestElem = {0, LowestElem};
8112           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8113               I->getAssociatedExpression()->getType());
8114           Address HB = CGF.Builder.CreateConstGEP(
8115               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8116                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8117               TypeSize.getQuantity() - 1);
8118           PartialStruct.HighestElem = {
8119               std::numeric_limits<decltype(
8120                   PartialStruct.HighestElem.first)>::max(),
8121               HB};
8122           PartialStruct.Base = BP;
8123           PartialStruct.LB = LB;
8124           assert(
8125               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8126               "Overlapped elements must be used only once for the variable.");
8127           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8128           // Emit data for non-overlapped data.
8129           OpenMPOffloadMappingFlags Flags =
8130               OMP_MAP_MEMBER_OF |
8131               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8132                              /*AddPtrFlag=*/false,
8133                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8134           llvm::Value *Size = nullptr;
8135           // Do bitcopy of all non-overlapped structure elements.
8136           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8137                    Component : OverlappedElements) {
8138             Address ComponentLB = Address::invalid();
8139             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8140                  Component) {
8141               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8142                 const auto *FD = dyn_cast<FieldDecl>(VD);
8143                 if (FD && FD->getType()->isLValueReferenceType()) {
8144                   const auto *ME =
8145                       cast<MemberExpr>(MC.getAssociatedExpression());
8146                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8147                   ComponentLB =
8148                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8149                           .getAddress(CGF);
8150                 } else {
8151                   ComponentLB =
8152                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8153                           .getAddress(CGF);
8154                 }
8155                 Size = CGF.Builder.CreatePtrDiff(
8156                     CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8157                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8158                 break;
8159               }
8160             }
8161             assert(Size && "Failed to determine structure size");
8162             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8163             CombinedInfo.BasePointers.push_back(BP.getPointer());
8164             CombinedInfo.Pointers.push_back(LB.getPointer());
8165             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8166                 Size, CGF.Int64Ty, /*isSigned=*/true));
8167             CombinedInfo.Types.push_back(Flags);
8168             CombinedInfo.Mappers.push_back(nullptr);
8169             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8170                                                                       : 1);
8171             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8172           }
8173           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8174           CombinedInfo.BasePointers.push_back(BP.getPointer());
8175           CombinedInfo.Pointers.push_back(LB.getPointer());
8176           Size = CGF.Builder.CreatePtrDiff(
8177               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8178               CGF.EmitCastToVoidPtr(LB.getPointer()));
8179           CombinedInfo.Sizes.push_back(
8180               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8181           CombinedInfo.Types.push_back(Flags);
8182           CombinedInfo.Mappers.push_back(nullptr);
8183           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8184                                                                     : 1);
8185           break;
8186         }
8187         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8188         if (!IsMemberPointerOrAddr ||
8189             (Next == CE && MapType != OMPC_MAP_unknown)) {
8190           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8191           CombinedInfo.BasePointers.push_back(BP.getPointer());
8192           CombinedInfo.Pointers.push_back(LB.getPointer());
8193           CombinedInfo.Sizes.push_back(
8194               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8195           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8196                                                                     : 1);
8197 
8198           // If Mapper is valid, the last component inherits the mapper.
8199           bool HasMapper = Mapper && Next == CE;
8200           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8201 
8202           // We need to add a pointer flag for each map that comes from the
8203           // same expression except for the first one. We also need to signal
8204           // this map is the first one that relates with the current capture
8205           // (there is a set of entries for each capture).
8206           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8207               MapType, MapModifiers, MotionModifiers, IsImplicit,
8208               !IsExpressionFirstInfo || RequiresReference ||
8209                   FirstPointerInComplexData || IsMemberReference,
8210               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8211 
8212           if (!IsExpressionFirstInfo || IsMemberReference) {
8213             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8214             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8215             if (IsPointer || (IsMemberReference && Next != CE))
8216               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8217                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8218 
8219             if (ShouldBeMemberOf) {
8220               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8221               // should be later updated with the correct value of MEMBER_OF.
8222               Flags |= OMP_MAP_MEMBER_OF;
8223               // From now on, all subsequent PTR_AND_OBJ entries should not be
8224               // marked as MEMBER_OF.
8225               ShouldBeMemberOf = false;
8226             }
8227           }
8228 
8229           CombinedInfo.Types.push_back(Flags);
8230         }
8231 
8232         // If we have encountered a member expression so far, keep track of the
8233         // mapped member. If the parent is "*this", then the value declaration
8234         // is nullptr.
8235         if (EncounteredME) {
8236           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8237           unsigned FieldIndex = FD->getFieldIndex();
8238 
8239           // Update info about the lowest and highest elements for this struct
8240           if (!PartialStruct.Base.isValid()) {
8241             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8242             if (IsFinalArraySection) {
8243               Address HB =
8244                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8245                       .getAddress(CGF);
8246               PartialStruct.HighestElem = {FieldIndex, HB};
8247             } else {
8248               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8249             }
8250             PartialStruct.Base = BP;
8251             PartialStruct.LB = BP;
8252           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8253             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8254           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8255             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8256           }
8257         }
8258 
8259         // Need to emit combined struct for array sections.
8260         if (IsFinalArraySection || IsNonContiguous)
8261           PartialStruct.IsArraySection = true;
8262 
8263         // If we have a final array section, we are done with this expression.
8264         if (IsFinalArraySection)
8265           break;
8266 
8267         // The pointer becomes the base for the next element.
8268         if (Next != CE)
8269           BP = IsMemberReference ? LowestElem : LB;
8270 
8271         IsExpressionFirstInfo = false;
8272         IsCaptureFirstInfo = false;
8273         FirstPointerInComplexData = false;
8274         IsPrevMemberReference = IsMemberReference;
8275       } else if (FirstPointerInComplexData) {
8276         QualType Ty = Components.rbegin()
8277                           ->getAssociatedDeclaration()
8278                           ->getType()
8279                           .getNonReferenceType();
8280         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8281         FirstPointerInComplexData = false;
8282       }
8283     }
8284     // If ran into the whole component - allocate the space for the whole
8285     // record.
8286     if (!EncounteredME)
8287       PartialStruct.HasCompleteRecord = true;
8288 
8289     if (!IsNonContiguous)
8290       return;
8291 
8292     const ASTContext &Context = CGF.getContext();
8293 
8294     // For supporting stride in array section, we need to initialize the first
8295     // dimension size as 1, first offset as 0, and first count as 1
8296     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8297     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8298     MapValuesArrayTy CurStrides;
8299     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8300     uint64_t ElementTypeSize;
8301 
8302     // Collect Size information for each dimension and get the element size as
8303     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8304     // should be [10, 10] and the first stride is 4 btyes.
8305     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8306          Components) {
8307       const Expr *AssocExpr = Component.getAssociatedExpression();
8308       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8309 
8310       if (!OASE)
8311         continue;
8312 
8313       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8314       auto *CAT = Context.getAsConstantArrayType(Ty);
8315       auto *VAT = Context.getAsVariableArrayType(Ty);
8316 
8317       // We need all the dimension size except for the last dimension.
8318       assert((VAT || CAT || &Component == &*Components.begin()) &&
8319              "Should be either ConstantArray or VariableArray if not the "
8320              "first Component");
8321 
8322       // Get element size if CurStrides is empty.
8323       if (CurStrides.empty()) {
8324         const Type *ElementType = nullptr;
8325         if (CAT)
8326           ElementType = CAT->getElementType().getTypePtr();
8327         else if (VAT)
8328           ElementType = VAT->getElementType().getTypePtr();
8329         else
8330           assert(&Component == &*Components.begin() &&
8331                  "Only expect pointer (non CAT or VAT) when this is the "
8332                  "first Component");
8333         // If ElementType is null, then it means the base is a pointer
8334         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8335         // for next iteration.
8336         if (ElementType) {
8337           // For the case that having pointer as base, we need to remove one
8338           // level of indirection.
8339           if (&Component != &*Components.begin())
8340             ElementType = ElementType->getPointeeOrArrayElementType();
8341           ElementTypeSize =
8342               Context.getTypeSizeInChars(ElementType).getQuantity();
8343           CurStrides.push_back(
8344               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8345         }
8346       }
8347       // Get dimension value except for the last dimension since we don't need
8348       // it.
8349       if (DimSizes.size() < Components.size() - 1) {
8350         if (CAT)
8351           DimSizes.push_back(llvm::ConstantInt::get(
8352               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8353         else if (VAT)
8354           DimSizes.push_back(CGF.Builder.CreateIntCast(
8355               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8356               /*IsSigned=*/false));
8357       }
8358     }
8359 
8360     // Skip the dummy dimension since we have already have its information.
8361     auto *DI = DimSizes.begin() + 1;
8362     // Product of dimension.
8363     llvm::Value *DimProd =
8364         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8365 
8366     // Collect info for non-contiguous. Notice that offset, count, and stride
8367     // are only meaningful for array-section, so we insert a null for anything
8368     // other than array-section.
8369     // Also, the size of offset, count, and stride are not the same as
8370     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8371     // count, and stride are the same as the number of non-contiguous
8372     // declaration in target update to/from clause.
8373     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8374          Components) {
8375       const Expr *AssocExpr = Component.getAssociatedExpression();
8376 
8377       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8378         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8379             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8380             /*isSigned=*/false);
8381         CurOffsets.push_back(Offset);
8382         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8383         CurStrides.push_back(CurStrides.back());
8384         continue;
8385       }
8386 
8387       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8388 
8389       if (!OASE)
8390         continue;
8391 
8392       // Offset
8393       const Expr *OffsetExpr = OASE->getLowerBound();
8394       llvm::Value *Offset = nullptr;
8395       if (!OffsetExpr) {
8396         // If offset is absent, then we just set it to zero.
8397         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8398       } else {
8399         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8400                                            CGF.Int64Ty,
8401                                            /*isSigned=*/false);
8402       }
8403       CurOffsets.push_back(Offset);
8404 
8405       // Count
8406       const Expr *CountExpr = OASE->getLength();
8407       llvm::Value *Count = nullptr;
8408       if (!CountExpr) {
8409         // In Clang, once a high dimension is an array section, we construct all
8410         // the lower dimension as array section, however, for case like
8411         // arr[0:2][2], Clang construct the inner dimension as an array section
8412         // but it actually is not in an array section form according to spec.
8413         if (!OASE->getColonLocFirst().isValid() &&
8414             !OASE->getColonLocSecond().isValid()) {
8415           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8416         } else {
8417           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8418           // When the length is absent it defaults to ⌈(size −
8419           // lower-bound)/stride⌉, where size is the size of the array
8420           // dimension.
8421           const Expr *StrideExpr = OASE->getStride();
8422           llvm::Value *Stride =
8423               StrideExpr
8424                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8425                                               CGF.Int64Ty, /*isSigned=*/false)
8426                   : nullptr;
8427           if (Stride)
8428             Count = CGF.Builder.CreateUDiv(
8429                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8430           else
8431             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8432         }
8433       } else {
8434         Count = CGF.EmitScalarExpr(CountExpr);
8435       }
8436       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8437       CurCounts.push_back(Count);
8438 
8439       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8440       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8441       //              Offset      Count     Stride
8442       //    D0          0           1         4    (int)    <- dummy dimension
8443       //    D1          0           2         8    (2 * (1) * 4)
8444       //    D2          1           2         20   (1 * (1 * 5) * 4)
8445       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8446       const Expr *StrideExpr = OASE->getStride();
8447       llvm::Value *Stride =
8448           StrideExpr
8449               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8450                                           CGF.Int64Ty, /*isSigned=*/false)
8451               : nullptr;
8452       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8453       if (Stride)
8454         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8455       else
8456         CurStrides.push_back(DimProd);
8457       if (DI != DimSizes.end())
8458         ++DI;
8459     }
8460 
8461     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8462     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8463     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8464   }
8465 
8466   /// Return the adjusted map modifiers if the declaration a capture refers to
8467   /// appears in a first-private clause. This is expected to be used only with
8468   /// directives that start with 'target'.
8469   MappableExprsHandler::OpenMPOffloadMappingFlags
8470   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8471     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8472 
8473     // A first private variable captured by reference will use only the
8474     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8475     // declaration is known as first-private in this handler.
8476     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8477       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8478         return MappableExprsHandler::OMP_MAP_TO |
8479                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8480       return MappableExprsHandler::OMP_MAP_PRIVATE |
8481              MappableExprsHandler::OMP_MAP_TO;
8482     }
8483     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8484     if (I != LambdasMap.end())
8485       // for map(to: lambda): using user specified map type.
8486       return getMapTypeBits(
8487           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8488           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8489           /*AddPtrFlag=*/false,
8490           /*AddIsTargetParamFlag=*/false,
8491           /*isNonContiguous=*/false);
8492     return MappableExprsHandler::OMP_MAP_TO |
8493            MappableExprsHandler::OMP_MAP_FROM;
8494   }
8495 
8496   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8497     // Rotate by getFlagMemberOffset() bits.
8498     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8499                                                   << getFlagMemberOffset());
8500   }
8501 
8502   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8503                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8504     // If the entry is PTR_AND_OBJ but has not been marked with the special
8505     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8506     // marked as MEMBER_OF.
8507     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8508         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8509       return;
8510 
8511     // Reset the placeholder value to prepare the flag for the assignment of the
8512     // proper MEMBER_OF value.
8513     Flags &= ~OMP_MAP_MEMBER_OF;
8514     Flags |= MemberOfFlag;
8515   }
8516 
8517   void getPlainLayout(const CXXRecordDecl *RD,
8518                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8519                       bool AsBase) const {
8520     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8521 
8522     llvm::StructType *St =
8523         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8524 
8525     unsigned NumElements = St->getNumElements();
8526     llvm::SmallVector<
8527         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8528         RecordLayout(NumElements);
8529 
8530     // Fill bases.
8531     for (const auto &I : RD->bases()) {
8532       if (I.isVirtual())
8533         continue;
8534       const auto *Base = I.getType()->getAsCXXRecordDecl();
8535       // Ignore empty bases.
8536       if (Base->isEmpty() || CGF.getContext()
8537                                  .getASTRecordLayout(Base)
8538                                  .getNonVirtualSize()
8539                                  .isZero())
8540         continue;
8541 
8542       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8543       RecordLayout[FieldIndex] = Base;
8544     }
8545     // Fill in virtual bases.
8546     for (const auto &I : RD->vbases()) {
8547       const auto *Base = I.getType()->getAsCXXRecordDecl();
8548       // Ignore empty bases.
8549       if (Base->isEmpty())
8550         continue;
8551       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8552       if (RecordLayout[FieldIndex])
8553         continue;
8554       RecordLayout[FieldIndex] = Base;
8555     }
8556     // Fill in all the fields.
8557     assert(!RD->isUnion() && "Unexpected union.");
8558     for (const auto *Field : RD->fields()) {
8559       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8560       // will fill in later.)
8561       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8562         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8563         RecordLayout[FieldIndex] = Field;
8564       }
8565     }
8566     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8567              &Data : RecordLayout) {
8568       if (Data.isNull())
8569         continue;
8570       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8571         getPlainLayout(Base, Layout, /*AsBase=*/true);
8572       else
8573         Layout.push_back(Data.get<const FieldDecl *>());
8574     }
8575   }
8576 
8577   /// Generate all the base pointers, section pointers, sizes, map types, and
8578   /// mappers for the extracted mappable expressions (all included in \a
8579   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8580   /// pair of the relevant declaration and index where it occurs is appended to
8581   /// the device pointers info array.
8582   void generateAllInfoForClauses(
8583       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8584       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8585           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8586     // We have to process the component lists that relate with the same
8587     // declaration in a single chunk so that we can generate the map flags
8588     // correctly. Therefore, we organize all lists in a map.
8589     enum MapKind { Present, Allocs, Other, Total };
8590     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8591                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8592         Info;
8593 
8594     // Helper function to fill the information map for the different supported
8595     // clauses.
8596     auto &&InfoGen =
8597         [&Info, &SkipVarSet](
8598             const ValueDecl *D, MapKind Kind,
8599             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8600             OpenMPMapClauseKind MapType,
8601             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8602             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8603             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8604             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8605           if (SkipVarSet.contains(D))
8606             return;
8607           auto It = Info.find(D);
8608           if (It == Info.end())
8609             It = Info
8610                      .insert(std::make_pair(
8611                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8612                      .first;
8613           It->second[Kind].emplace_back(
8614               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8615               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8616         };
8617 
8618     for (const auto *Cl : Clauses) {
8619       const auto *C = dyn_cast<OMPMapClause>(Cl);
8620       if (!C)
8621         continue;
8622       MapKind Kind = Other;
8623       if (llvm::is_contained(C->getMapTypeModifiers(),
8624                              OMPC_MAP_MODIFIER_present))
8625         Kind = Present;
8626       else if (C->getMapType() == OMPC_MAP_alloc)
8627         Kind = Allocs;
8628       const auto *EI = C->getVarRefs().begin();
8629       for (const auto L : C->component_lists()) {
8630         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8631         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8632                 C->getMapTypeModifiers(), llvm::None,
8633                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8634                 E);
8635         ++EI;
8636       }
8637     }
8638     for (const auto *Cl : Clauses) {
8639       const auto *C = dyn_cast<OMPToClause>(Cl);
8640       if (!C)
8641         continue;
8642       MapKind Kind = Other;
8643       if (llvm::is_contained(C->getMotionModifiers(),
8644                              OMPC_MOTION_MODIFIER_present))
8645         Kind = Present;
8646       const auto *EI = C->getVarRefs().begin();
8647       for (const auto L : C->component_lists()) {
8648         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8649                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8650                 C->isImplicit(), std::get<2>(L), *EI);
8651         ++EI;
8652       }
8653     }
8654     for (const auto *Cl : Clauses) {
8655       const auto *C = dyn_cast<OMPFromClause>(Cl);
8656       if (!C)
8657         continue;
8658       MapKind Kind = Other;
8659       if (llvm::is_contained(C->getMotionModifiers(),
8660                              OMPC_MOTION_MODIFIER_present))
8661         Kind = Present;
8662       const auto *EI = C->getVarRefs().begin();
8663       for (const auto L : C->component_lists()) {
8664         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8665                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8666                 C->isImplicit(), std::get<2>(L), *EI);
8667         ++EI;
8668       }
8669     }
8670 
8671     // Look at the use_device_ptr clause information and mark the existing map
8672     // entries as such. If there is no map information for an entry in the
8673     // use_device_ptr list, we create one with map type 'alloc' and zero size
8674     // section. It is the user fault if that was not mapped before. If there is
8675     // no map information and the pointer is a struct member, then we defer the
8676     // emission of that entry until the whole struct has been processed.
8677     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8678                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8679         DeferredInfo;
8680     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8681 
8682     for (const auto *Cl : Clauses) {
8683       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8684       if (!C)
8685         continue;
8686       for (const auto L : C->component_lists()) {
8687         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8688             std::get<1>(L);
8689         assert(!Components.empty() &&
8690                "Not expecting empty list of components!");
8691         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8692         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8693         const Expr *IE = Components.back().getAssociatedExpression();
8694         // If the first component is a member expression, we have to look into
8695         // 'this', which maps to null in the map of map information. Otherwise
8696         // look directly for the information.
8697         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8698 
8699         // We potentially have map information for this declaration already.
8700         // Look for the first set of components that refer to it.
8701         if (It != Info.end()) {
8702           bool Found = false;
8703           for (auto &Data : It->second) {
8704             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8705               return MI.Components.back().getAssociatedDeclaration() == VD;
8706             });
8707             // If we found a map entry, signal that the pointer has to be
8708             // returned and move on to the next declaration. Exclude cases where
8709             // the base pointer is mapped as array subscript, array section or
8710             // array shaping. The base address is passed as a pointer to base in
8711             // this case and cannot be used as a base for use_device_ptr list
8712             // item.
8713             if (CI != Data.end()) {
8714               auto PrevCI = std::next(CI->Components.rbegin());
8715               const auto *VarD = dyn_cast<VarDecl>(VD);
8716               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8717                   isa<MemberExpr>(IE) ||
8718                   !VD->getType().getNonReferenceType()->isPointerType() ||
8719                   PrevCI == CI->Components.rend() ||
8720                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8721                   VarD->hasLocalStorage()) {
8722                 CI->ReturnDevicePointer = true;
8723                 Found = true;
8724                 break;
8725               }
8726             }
8727           }
8728           if (Found)
8729             continue;
8730         }
8731 
8732         // We didn't find any match in our map information - generate a zero
8733         // size array section - if the pointer is a struct member we defer this
8734         // action until the whole struct has been processed.
8735         if (isa<MemberExpr>(IE)) {
8736           // Insert the pointer into Info to be processed by
8737           // generateInfoForComponentList. Because it is a member pointer
8738           // without a pointee, no entry will be generated for it, therefore
8739           // we need to generate one after the whole struct has been processed.
8740           // Nonetheless, generateInfoForComponentList must be called to take
8741           // the pointer into account for the calculation of the range of the
8742           // partial struct.
8743           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8744                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8745                   nullptr);
8746           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8747         } else {
8748           llvm::Value *Ptr =
8749               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8750           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8751           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8752           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8753           UseDevicePtrCombinedInfo.Sizes.push_back(
8754               llvm::Constant::getNullValue(CGF.Int64Ty));
8755           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8756           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8757         }
8758       }
8759     }
8760 
8761     // Look at the use_device_addr clause information and mark the existing map
8762     // entries as such. If there is no map information for an entry in the
8763     // use_device_addr list, we create one with map type 'alloc' and zero size
8764     // section. It is the user fault if that was not mapped before. If there is
8765     // no map information and the pointer is a struct member, then we defer the
8766     // emission of that entry until the whole struct has been processed.
8767     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8768     for (const auto *Cl : Clauses) {
8769       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8770       if (!C)
8771         continue;
8772       for (const auto L : C->component_lists()) {
8773         assert(!std::get<1>(L).empty() &&
8774                "Not expecting empty list of components!");
8775         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8776         if (!Processed.insert(VD).second)
8777           continue;
8778         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8779         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8780         // If the first component is a member expression, we have to look into
8781         // 'this', which maps to null in the map of map information. Otherwise
8782         // look directly for the information.
8783         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8784 
8785         // We potentially have map information for this declaration already.
8786         // Look for the first set of components that refer to it.
8787         if (It != Info.end()) {
8788           bool Found = false;
8789           for (auto &Data : It->second) {
8790             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8791               return MI.Components.back().getAssociatedDeclaration() == VD;
8792             });
8793             // If we found a map entry, signal that the pointer has to be
8794             // returned and move on to the next declaration.
8795             if (CI != Data.end()) {
8796               CI->ReturnDevicePointer = true;
8797               Found = true;
8798               break;
8799             }
8800           }
8801           if (Found)
8802             continue;
8803         }
8804 
8805         // We didn't find any match in our map information - generate a zero
8806         // size array section - if the pointer is a struct member we defer this
8807         // action until the whole struct has been processed.
8808         if (isa<MemberExpr>(IE)) {
8809           // Insert the pointer into Info to be processed by
8810           // generateInfoForComponentList. Because it is a member pointer
8811           // without a pointee, no entry will be generated for it, therefore
8812           // we need to generate one after the whole struct has been processed.
8813           // Nonetheless, generateInfoForComponentList must be called to take
8814           // the pointer into account for the calculation of the range of the
8815           // partial struct.
8816           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8817                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8818                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8819           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8820         } else {
8821           llvm::Value *Ptr;
8822           if (IE->isGLValue())
8823             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8824           else
8825             Ptr = CGF.EmitScalarExpr(IE);
8826           CombinedInfo.Exprs.push_back(VD);
8827           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8828           CombinedInfo.Pointers.push_back(Ptr);
8829           CombinedInfo.Sizes.push_back(
8830               llvm::Constant::getNullValue(CGF.Int64Ty));
8831           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8832           CombinedInfo.Mappers.push_back(nullptr);
8833         }
8834       }
8835     }
8836 
8837     for (const auto &Data : Info) {
8838       StructRangeInfoTy PartialStruct;
8839       // Temporary generated information.
8840       MapCombinedInfoTy CurInfo;
8841       const Decl *D = Data.first;
8842       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8843       for (const auto &M : Data.second) {
8844         for (const MapInfo &L : M) {
8845           assert(!L.Components.empty() &&
8846                  "Not expecting declaration with no component lists.");
8847 
8848           // Remember the current base pointer index.
8849           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8850           CurInfo.NonContigInfo.IsNonContiguous =
8851               L.Components.back().isNonContiguous();
8852           generateInfoForComponentList(
8853               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8854               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8855               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8856 
8857           // If this entry relates with a device pointer, set the relevant
8858           // declaration and add the 'return pointer' flag.
8859           if (L.ReturnDevicePointer) {
8860             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8861                    "Unexpected number of mapped base pointers.");
8862 
8863             const ValueDecl *RelevantVD =
8864                 L.Components.back().getAssociatedDeclaration();
8865             assert(RelevantVD &&
8866                    "No relevant declaration related with device pointer??");
8867 
8868             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8869                 RelevantVD);
8870             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8871           }
8872         }
8873       }
8874 
8875       // Append any pending zero-length pointers which are struct members and
8876       // used with use_device_ptr or use_device_addr.
8877       auto CI = DeferredInfo.find(Data.first);
8878       if (CI != DeferredInfo.end()) {
8879         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8880           llvm::Value *BasePtr;
8881           llvm::Value *Ptr;
8882           if (L.ForDeviceAddr) {
8883             if (L.IE->isGLValue())
8884               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8885             else
8886               Ptr = this->CGF.EmitScalarExpr(L.IE);
8887             BasePtr = Ptr;
8888             // Entry is RETURN_PARAM. Also, set the placeholder value
8889             // MEMBER_OF=FFFF so that the entry is later updated with the
8890             // correct value of MEMBER_OF.
8891             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8892           } else {
8893             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8894             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8895                                              L.IE->getExprLoc());
8896             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8897             // placeholder value MEMBER_OF=FFFF so that the entry is later
8898             // updated with the correct value of MEMBER_OF.
8899             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8900                                     OMP_MAP_MEMBER_OF);
8901           }
8902           CurInfo.Exprs.push_back(L.VD);
8903           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8904           CurInfo.Pointers.push_back(Ptr);
8905           CurInfo.Sizes.push_back(
8906               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8907           CurInfo.Mappers.push_back(nullptr);
8908         }
8909       }
8910       // If there is an entry in PartialStruct it means we have a struct with
8911       // individual members mapped. Emit an extra combined entry.
8912       if (PartialStruct.Base.isValid()) {
8913         CurInfo.NonContigInfo.Dims.push_back(0);
8914         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8915       }
8916 
8917       // We need to append the results of this capture to what we already
8918       // have.
8919       CombinedInfo.append(CurInfo);
8920     }
8921     // Append data for use_device_ptr clauses.
8922     CombinedInfo.append(UseDevicePtrCombinedInfo);
8923   }
8924 
8925 public:
8926   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8927       : CurDir(&Dir), CGF(CGF) {
8928     // Extract firstprivate clause information.
8929     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8930       for (const auto *D : C->varlists())
8931         FirstPrivateDecls.try_emplace(
8932             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8933     // Extract implicit firstprivates from uses_allocators clauses.
8934     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8935       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8936         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8937         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8938           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8939                                         /*Implicit=*/true);
8940         else if (const auto *VD = dyn_cast<VarDecl>(
8941                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8942                          ->getDecl()))
8943           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8944       }
8945     }
8946     // Extract device pointer clause information.
8947     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8948       for (auto L : C->component_lists())
8949         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8950     // Extract map information.
8951     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8952       if (C->getMapType() != OMPC_MAP_to)
8953         continue;
8954       for (auto L : C->component_lists()) {
8955         const ValueDecl *VD = std::get<0>(L);
8956         const auto *RD = VD ? VD->getType()
8957                                   .getCanonicalType()
8958                                   .getNonReferenceType()
8959                                   ->getAsCXXRecordDecl()
8960                             : nullptr;
8961         if (RD && RD->isLambda())
8962           LambdasMap.try_emplace(std::get<0>(L), C);
8963       }
8964     }
8965   }
8966 
8967   /// Constructor for the declare mapper directive.
8968   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8969       : CurDir(&Dir), CGF(CGF) {}
8970 
8971   /// Generate code for the combined entry if we have a partially mapped struct
8972   /// and take care of the mapping flags of the arguments corresponding to
8973   /// individual struct members.
8974   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8975                          MapFlagsArrayTy &CurTypes,
8976                          const StructRangeInfoTy &PartialStruct,
8977                          const ValueDecl *VD = nullptr,
8978                          bool NotTargetParams = true) const {
8979     if (CurTypes.size() == 1 &&
8980         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8981         !PartialStruct.IsArraySection)
8982       return;
8983     Address LBAddr = PartialStruct.LowestElem.second;
8984     Address HBAddr = PartialStruct.HighestElem.second;
8985     if (PartialStruct.HasCompleteRecord) {
8986       LBAddr = PartialStruct.LB;
8987       HBAddr = PartialStruct.LB;
8988     }
8989     CombinedInfo.Exprs.push_back(VD);
8990     // Base is the base of the struct
8991     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8992     // Pointer is the address of the lowest element
8993     llvm::Value *LB = LBAddr.getPointer();
8994     CombinedInfo.Pointers.push_back(LB);
8995     // There should not be a mapper for a combined entry.
8996     CombinedInfo.Mappers.push_back(nullptr);
8997     // Size is (addr of {highest+1} element) - (addr of lowest element)
8998     llvm::Value *HB = HBAddr.getPointer();
8999     llvm::Value *HAddr =
9000         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
9001     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9002     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9003     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9004     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9005                                                   /*isSigned=*/false);
9006     CombinedInfo.Sizes.push_back(Size);
9007     // Map type is always TARGET_PARAM, if generate info for captures.
9008     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
9009                                                  : OMP_MAP_TARGET_PARAM);
9010     // If any element has the present modifier, then make sure the runtime
9011     // doesn't attempt to allocate the struct.
9012     if (CurTypes.end() !=
9013         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9014           return Type & OMP_MAP_PRESENT;
9015         }))
9016       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
9017     // Remove TARGET_PARAM flag from the first element
9018     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
9019     // If any element has the ompx_hold modifier, then make sure the runtime
9020     // uses the hold reference count for the struct as a whole so that it won't
9021     // be unmapped by an extra dynamic reference count decrement.  Add it to all
9022     // elements as well so the runtime knows which reference count to check
9023     // when determining whether it's time for device-to-host transfers of
9024     // individual elements.
9025     if (CurTypes.end() !=
9026         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9027           return Type & OMP_MAP_OMPX_HOLD;
9028         })) {
9029       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9030       for (auto &M : CurTypes)
9031         M |= OMP_MAP_OMPX_HOLD;
9032     }
9033 
9034     // All other current entries will be MEMBER_OF the combined entry
9035     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9036     // 0xFFFF in the MEMBER_OF field).
9037     OpenMPOffloadMappingFlags MemberOfFlag =
9038         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9039     for (auto &M : CurTypes)
9040       setCorrectMemberOfFlag(M, MemberOfFlag);
9041   }
9042 
9043   /// Generate all the base pointers, section pointers, sizes, map types, and
9044   /// mappers for the extracted mappable expressions (all included in \a
9045   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9046   /// pair of the relevant declaration and index where it occurs is appended to
9047   /// the device pointers info array.
9048   void generateAllInfo(
9049       MapCombinedInfoTy &CombinedInfo,
9050       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9051           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9052     assert(CurDir.is<const OMPExecutableDirective *>() &&
9053            "Expect a executable directive");
9054     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9055     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9056   }
9057 
9058   /// Generate all the base pointers, section pointers, sizes, map types, and
9059   /// mappers for the extracted map clauses of user-defined mapper (all included
9060   /// in \a CombinedInfo).
9061   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9062     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9063            "Expect a declare mapper directive");
9064     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9065     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9066   }
9067 
9068   /// Emit capture info for lambdas for variables captured by reference.
9069   void generateInfoForLambdaCaptures(
9070       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9071       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9072     const auto *RD = VD->getType()
9073                          .getCanonicalType()
9074                          .getNonReferenceType()
9075                          ->getAsCXXRecordDecl();
9076     if (!RD || !RD->isLambda())
9077       return;
9078     Address VDAddr =
9079         Address::deprecated(Arg, CGF.getContext().getDeclAlign(VD));
9080     LValue VDLVal = CGF.MakeAddrLValue(
9081         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9082     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9083     FieldDecl *ThisCapture = nullptr;
9084     RD->getCaptureFields(Captures, ThisCapture);
9085     if (ThisCapture) {
9086       LValue ThisLVal =
9087           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9088       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9089       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9090                                  VDLVal.getPointer(CGF));
9091       CombinedInfo.Exprs.push_back(VD);
9092       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9093       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9094       CombinedInfo.Sizes.push_back(
9095           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9096                                     CGF.Int64Ty, /*isSigned=*/true));
9097       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9098                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9099       CombinedInfo.Mappers.push_back(nullptr);
9100     }
9101     for (const LambdaCapture &LC : RD->captures()) {
9102       if (!LC.capturesVariable())
9103         continue;
9104       const VarDecl *VD = LC.getCapturedVar();
9105       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9106         continue;
9107       auto It = Captures.find(VD);
9108       assert(It != Captures.end() && "Found lambda capture without field.");
9109       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9110       if (LC.getCaptureKind() == LCK_ByRef) {
9111         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9112         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9113                                    VDLVal.getPointer(CGF));
9114         CombinedInfo.Exprs.push_back(VD);
9115         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9116         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9117         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9118             CGF.getTypeSize(
9119                 VD->getType().getCanonicalType().getNonReferenceType()),
9120             CGF.Int64Ty, /*isSigned=*/true));
9121       } else {
9122         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9123         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9124                                    VDLVal.getPointer(CGF));
9125         CombinedInfo.Exprs.push_back(VD);
9126         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9127         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9128         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9129       }
9130       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9131                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9132       CombinedInfo.Mappers.push_back(nullptr);
9133     }
9134   }
9135 
9136   /// Set correct indices for lambdas captures.
9137   void adjustMemberOfForLambdaCaptures(
9138       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9139       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9140       MapFlagsArrayTy &Types) const {
9141     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9142       // Set correct member_of idx for all implicit lambda captures.
9143       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9144                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9145         continue;
9146       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9147       assert(BasePtr && "Unable to find base lambda address.");
9148       int TgtIdx = -1;
9149       for (unsigned J = I; J > 0; --J) {
9150         unsigned Idx = J - 1;
9151         if (Pointers[Idx] != BasePtr)
9152           continue;
9153         TgtIdx = Idx;
9154         break;
9155       }
9156       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9157       // All other current entries will be MEMBER_OF the combined entry
9158       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9159       // 0xFFFF in the MEMBER_OF field).
9160       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9161       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9162     }
9163   }
9164 
9165   /// Generate the base pointers, section pointers, sizes, map types, and
9166   /// mappers associated to a given capture (all included in \a CombinedInfo).
9167   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9168                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9169                               StructRangeInfoTy &PartialStruct) const {
9170     assert(!Cap->capturesVariableArrayType() &&
9171            "Not expecting to generate map info for a variable array type!");
9172 
9173     // We need to know when we generating information for the first component
9174     const ValueDecl *VD = Cap->capturesThis()
9175                               ? nullptr
9176                               : Cap->getCapturedVar()->getCanonicalDecl();
9177 
9178     // for map(to: lambda): skip here, processing it in
9179     // generateDefaultMapInfo
9180     if (LambdasMap.count(VD))
9181       return;
9182 
9183     // If this declaration appears in a is_device_ptr clause we just have to
9184     // pass the pointer by value. If it is a reference to a declaration, we just
9185     // pass its value.
9186     if (DevPointersMap.count(VD)) {
9187       CombinedInfo.Exprs.push_back(VD);
9188       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9189       CombinedInfo.Pointers.push_back(Arg);
9190       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9191           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9192           /*isSigned=*/true));
9193       CombinedInfo.Types.push_back(
9194           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9195           OMP_MAP_TARGET_PARAM);
9196       CombinedInfo.Mappers.push_back(nullptr);
9197       return;
9198     }
9199 
9200     using MapData =
9201         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9202                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9203                    const ValueDecl *, const Expr *>;
9204     SmallVector<MapData, 4> DeclComponentLists;
9205     assert(CurDir.is<const OMPExecutableDirective *>() &&
9206            "Expect a executable directive");
9207     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9208     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9209       const auto *EI = C->getVarRefs().begin();
9210       for (const auto L : C->decl_component_lists(VD)) {
9211         const ValueDecl *VDecl, *Mapper;
9212         // The Expression is not correct if the mapping is implicit
9213         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9214         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9215         std::tie(VDecl, Components, Mapper) = L;
9216         assert(VDecl == VD && "We got information for the wrong declaration??");
9217         assert(!Components.empty() &&
9218                "Not expecting declaration with no component lists.");
9219         DeclComponentLists.emplace_back(Components, C->getMapType(),
9220                                         C->getMapTypeModifiers(),
9221                                         C->isImplicit(), Mapper, E);
9222         ++EI;
9223       }
9224     }
9225     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9226                                              const MapData &RHS) {
9227       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9228       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9229       bool HasPresent =
9230           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9231       bool HasAllocs = MapType == OMPC_MAP_alloc;
9232       MapModifiers = std::get<2>(RHS);
9233       MapType = std::get<1>(LHS);
9234       bool HasPresentR =
9235           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9236       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9237       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9238     });
9239 
9240     // Find overlapping elements (including the offset from the base element).
9241     llvm::SmallDenseMap<
9242         const MapData *,
9243         llvm::SmallVector<
9244             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9245         4>
9246         OverlappedData;
9247     size_t Count = 0;
9248     for (const MapData &L : DeclComponentLists) {
9249       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9250       OpenMPMapClauseKind MapType;
9251       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9252       bool IsImplicit;
9253       const ValueDecl *Mapper;
9254       const Expr *VarRef;
9255       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9256           L;
9257       ++Count;
9258       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9259         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9260         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9261                  VarRef) = L1;
9262         auto CI = Components.rbegin();
9263         auto CE = Components.rend();
9264         auto SI = Components1.rbegin();
9265         auto SE = Components1.rend();
9266         for (; CI != CE && SI != SE; ++CI, ++SI) {
9267           if (CI->getAssociatedExpression()->getStmtClass() !=
9268               SI->getAssociatedExpression()->getStmtClass())
9269             break;
9270           // Are we dealing with different variables/fields?
9271           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9272             break;
9273         }
9274         // Found overlapping if, at least for one component, reached the head
9275         // of the components list.
9276         if (CI == CE || SI == SE) {
9277           // Ignore it if it is the same component.
9278           if (CI == CE && SI == SE)
9279             continue;
9280           const auto It = (SI == SE) ? CI : SI;
9281           // If one component is a pointer and another one is a kind of
9282           // dereference of this pointer (array subscript, section, dereference,
9283           // etc.), it is not an overlapping.
9284           // Same, if one component is a base and another component is a
9285           // dereferenced pointer memberexpr with the same base.
9286           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9287               (std::prev(It)->getAssociatedDeclaration() &&
9288                std::prev(It)
9289                    ->getAssociatedDeclaration()
9290                    ->getType()
9291                    ->isPointerType()) ||
9292               (It->getAssociatedDeclaration() &&
9293                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9294                std::next(It) != CE && std::next(It) != SE))
9295             continue;
9296           const MapData &BaseData = CI == CE ? L : L1;
9297           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9298               SI == SE ? Components : Components1;
9299           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9300           OverlappedElements.getSecond().push_back(SubData);
9301         }
9302       }
9303     }
9304     // Sort the overlapped elements for each item.
9305     llvm::SmallVector<const FieldDecl *, 4> Layout;
9306     if (!OverlappedData.empty()) {
9307       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9308       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9309       while (BaseType != OrigType) {
9310         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9311         OrigType = BaseType->getPointeeOrArrayElementType();
9312       }
9313 
9314       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9315         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9316       else {
9317         const auto *RD = BaseType->getAsRecordDecl();
9318         Layout.append(RD->field_begin(), RD->field_end());
9319       }
9320     }
9321     for (auto &Pair : OverlappedData) {
9322       llvm::stable_sort(
9323           Pair.getSecond(),
9324           [&Layout](
9325               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9326               OMPClauseMappableExprCommon::MappableExprComponentListRef
9327                   Second) {
9328             auto CI = First.rbegin();
9329             auto CE = First.rend();
9330             auto SI = Second.rbegin();
9331             auto SE = Second.rend();
9332             for (; CI != CE && SI != SE; ++CI, ++SI) {
9333               if (CI->getAssociatedExpression()->getStmtClass() !=
9334                   SI->getAssociatedExpression()->getStmtClass())
9335                 break;
9336               // Are we dealing with different variables/fields?
9337               if (CI->getAssociatedDeclaration() !=
9338                   SI->getAssociatedDeclaration())
9339                 break;
9340             }
9341 
9342             // Lists contain the same elements.
9343             if (CI == CE && SI == SE)
9344               return false;
9345 
9346             // List with less elements is less than list with more elements.
9347             if (CI == CE || SI == SE)
9348               return CI == CE;
9349 
9350             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9351             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9352             if (FD1->getParent() == FD2->getParent())
9353               return FD1->getFieldIndex() < FD2->getFieldIndex();
9354             const auto *It =
9355                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9356                   return FD == FD1 || FD == FD2;
9357                 });
9358             return *It == FD1;
9359           });
9360     }
9361 
9362     // Associated with a capture, because the mapping flags depend on it.
9363     // Go through all of the elements with the overlapped elements.
9364     bool IsFirstComponentList = true;
9365     for (const auto &Pair : OverlappedData) {
9366       const MapData &L = *Pair.getFirst();
9367       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9368       OpenMPMapClauseKind MapType;
9369       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9370       bool IsImplicit;
9371       const ValueDecl *Mapper;
9372       const Expr *VarRef;
9373       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9374           L;
9375       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9376           OverlappedComponents = Pair.getSecond();
9377       generateInfoForComponentList(
9378           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9379           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9380           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9381       IsFirstComponentList = false;
9382     }
9383     // Go through other elements without overlapped elements.
9384     for (const MapData &L : DeclComponentLists) {
9385       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9386       OpenMPMapClauseKind MapType;
9387       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9388       bool IsImplicit;
9389       const ValueDecl *Mapper;
9390       const Expr *VarRef;
9391       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9392           L;
9393       auto It = OverlappedData.find(&L);
9394       if (It == OverlappedData.end())
9395         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9396                                      Components, CombinedInfo, PartialStruct,
9397                                      IsFirstComponentList, IsImplicit, Mapper,
9398                                      /*ForDeviceAddr=*/false, VD, VarRef);
9399       IsFirstComponentList = false;
9400     }
9401   }
9402 
9403   /// Generate the default map information for a given capture \a CI,
9404   /// record field declaration \a RI and captured value \a CV.
9405   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9406                               const FieldDecl &RI, llvm::Value *CV,
9407                               MapCombinedInfoTy &CombinedInfo) const {
9408     bool IsImplicit = true;
9409     // Do the default mapping.
9410     if (CI.capturesThis()) {
9411       CombinedInfo.Exprs.push_back(nullptr);
9412       CombinedInfo.BasePointers.push_back(CV);
9413       CombinedInfo.Pointers.push_back(CV);
9414       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9415       CombinedInfo.Sizes.push_back(
9416           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9417                                     CGF.Int64Ty, /*isSigned=*/true));
9418       // Default map type.
9419       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9420     } else if (CI.capturesVariableByCopy()) {
9421       const VarDecl *VD = CI.getCapturedVar();
9422       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9423       CombinedInfo.BasePointers.push_back(CV);
9424       CombinedInfo.Pointers.push_back(CV);
9425       if (!RI.getType()->isAnyPointerType()) {
9426         // We have to signal to the runtime captures passed by value that are
9427         // not pointers.
9428         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9429         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9430             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9431       } else {
9432         // Pointers are implicitly mapped with a zero size and no flags
9433         // (other than first map that is added for all implicit maps).
9434         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9435         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9436       }
9437       auto I = FirstPrivateDecls.find(VD);
9438       if (I != FirstPrivateDecls.end())
9439         IsImplicit = I->getSecond();
9440     } else {
9441       assert(CI.capturesVariable() && "Expected captured reference.");
9442       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9443       QualType ElementType = PtrTy->getPointeeType();
9444       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9445           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9446       // The default map type for a scalar/complex type is 'to' because by
9447       // default the value doesn't have to be retrieved. For an aggregate
9448       // type, the default is 'tofrom'.
9449       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9450       const VarDecl *VD = CI.getCapturedVar();
9451       auto I = FirstPrivateDecls.find(VD);
9452       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9453       CombinedInfo.BasePointers.push_back(CV);
9454       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9455         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9456             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9457             AlignmentSource::Decl));
9458         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9459       } else {
9460         CombinedInfo.Pointers.push_back(CV);
9461       }
9462       if (I != FirstPrivateDecls.end())
9463         IsImplicit = I->getSecond();
9464     }
9465     // Every default map produces a single argument which is a target parameter.
9466     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9467 
9468     // Add flag stating this is an implicit map.
9469     if (IsImplicit)
9470       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9471 
9472     // No user-defined mapper for default mapping.
9473     CombinedInfo.Mappers.push_back(nullptr);
9474   }
9475 };
9476 } // anonymous namespace
9477 
9478 static void emitNonContiguousDescriptor(
9479     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9480     CGOpenMPRuntime::TargetDataInfo &Info) {
9481   CodeGenModule &CGM = CGF.CGM;
9482   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9483       &NonContigInfo = CombinedInfo.NonContigInfo;
9484 
9485   // Build an array of struct descriptor_dim and then assign it to
9486   // offload_args.
9487   //
9488   // struct descriptor_dim {
9489   //  uint64_t offset;
9490   //  uint64_t count;
9491   //  uint64_t stride
9492   // };
9493   ASTContext &C = CGF.getContext();
9494   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9495   RecordDecl *RD;
9496   RD = C.buildImplicitRecord("descriptor_dim");
9497   RD->startDefinition();
9498   addFieldToRecordDecl(C, RD, Int64Ty);
9499   addFieldToRecordDecl(C, RD, Int64Ty);
9500   addFieldToRecordDecl(C, RD, Int64Ty);
9501   RD->completeDefinition();
9502   QualType DimTy = C.getRecordType(RD);
9503 
9504   enum { OffsetFD = 0, CountFD, StrideFD };
9505   // We need two index variable here since the size of "Dims" is the same as the
9506   // size of Components, however, the size of offset, count, and stride is equal
9507   // to the size of base declaration that is non-contiguous.
9508   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9509     // Skip emitting ir if dimension size is 1 since it cannot be
9510     // non-contiguous.
9511     if (NonContigInfo.Dims[I] == 1)
9512       continue;
9513     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9514     QualType ArrayTy =
9515         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9516     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9517     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9518       unsigned RevIdx = EE - II - 1;
9519       LValue DimsLVal = CGF.MakeAddrLValue(
9520           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9521       // Offset
9522       LValue OffsetLVal = CGF.EmitLValueForField(
9523           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9524       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9525       // Count
9526       LValue CountLVal = CGF.EmitLValueForField(
9527           DimsLVal, *std::next(RD->field_begin(), CountFD));
9528       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9529       // Stride
9530       LValue StrideLVal = CGF.EmitLValueForField(
9531           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9532       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9533     }
9534     // args[I] = &dims
9535     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9536         DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9537     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9538         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9539         Info.PointersArray, 0, I);
9540     Address PAddr = Address::deprecated(P, CGF.getPointerAlign());
9541     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9542     ++L;
9543   }
9544 }
9545 
9546 // Try to extract the base declaration from a `this->x` expression if possible.
9547 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9548   if (!E)
9549     return nullptr;
9550 
9551   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9552     if (const MemberExpr *ME =
9553             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9554       return ME->getMemberDecl();
9555   return nullptr;
9556 }
9557 
9558 /// Emit a string constant containing the names of the values mapped to the
9559 /// offloading runtime library.
9560 llvm::Constant *
9561 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9562                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9563 
9564   uint32_t SrcLocStrSize;
9565   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9566     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9567 
9568   SourceLocation Loc;
9569   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9570     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9571       Loc = VD->getLocation();
9572     else
9573       Loc = MapExprs.getMapExpr()->getExprLoc();
9574   } else {
9575     Loc = MapExprs.getMapDecl()->getLocation();
9576   }
9577 
9578   std::string ExprName;
9579   if (MapExprs.getMapExpr()) {
9580     PrintingPolicy P(CGF.getContext().getLangOpts());
9581     llvm::raw_string_ostream OS(ExprName);
9582     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9583     OS.flush();
9584   } else {
9585     ExprName = MapExprs.getMapDecl()->getNameAsString();
9586   }
9587 
9588   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9589   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9590                                          PLoc.getLine(), PLoc.getColumn(),
9591                                          SrcLocStrSize);
9592 }
9593 
9594 /// Emit the arrays used to pass the captures and map information to the
9595 /// offloading runtime library. If there is no map or capture information,
9596 /// return nullptr by reference.
9597 static void emitOffloadingArrays(
9598     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9599     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9600     bool IsNonContiguous = false) {
9601   CodeGenModule &CGM = CGF.CGM;
9602   ASTContext &Ctx = CGF.getContext();
9603 
9604   // Reset the array information.
9605   Info.clearArrayInfo();
9606   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9607 
9608   if (Info.NumberOfPtrs) {
9609     // Detect if we have any capture size requiring runtime evaluation of the
9610     // size so that a constant array could be eventually used.
9611 
9612     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9613     QualType PointerArrayType = Ctx.getConstantArrayType(
9614         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9615         /*IndexTypeQuals=*/0);
9616 
9617     Info.BasePointersArray =
9618         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9619     Info.PointersArray =
9620         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9621     Address MappersArray =
9622         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9623     Info.MappersArray = MappersArray.getPointer();
9624 
9625     // If we don't have any VLA types or other types that require runtime
9626     // evaluation, we can use a constant array for the map sizes, otherwise we
9627     // need to fill up the arrays as we do for the pointers.
9628     QualType Int64Ty =
9629         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9630     SmallVector<llvm::Constant *> ConstSizes(
9631         CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9632     llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9633     for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9634       if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9635         if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9636           if (IsNonContiguous && (CombinedInfo.Types[I] &
9637                                   MappableExprsHandler::OMP_MAP_NON_CONTIG))
9638             ConstSizes[I] = llvm::ConstantInt::get(
9639                 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9640           else
9641             ConstSizes[I] = CI;
9642           continue;
9643         }
9644       }
9645       RuntimeSizes.set(I);
9646     }
9647 
9648     if (RuntimeSizes.all()) {
9649       QualType SizeArrayType = Ctx.getConstantArrayType(
9650           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9651           /*IndexTypeQuals=*/0);
9652       Info.SizesArray =
9653           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9654     } else {
9655       auto *SizesArrayInit = llvm::ConstantArray::get(
9656           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9657       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9658       auto *SizesArrayGbl = new llvm::GlobalVariable(
9659           CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9660           llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9661       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9662       if (RuntimeSizes.any()) {
9663         QualType SizeArrayType = Ctx.getConstantArrayType(
9664             Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9665             /*IndexTypeQuals=*/0);
9666         Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9667         llvm::Value *GblConstPtr =
9668             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9669                 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9670         CGF.Builder.CreateMemCpy(
9671             Buffer,
9672             Address(GblConstPtr, CGM.Int64Ty,
9673                     CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9674                         /*DestWidth=*/64, /*Signed=*/false))),
9675             CGF.getTypeSize(SizeArrayType));
9676         Info.SizesArray = Buffer.getPointer();
9677       } else {
9678         Info.SizesArray = SizesArrayGbl;
9679       }
9680     }
9681 
9682     // The map types are always constant so we don't need to generate code to
9683     // fill arrays. Instead, we create an array constant.
9684     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9685     llvm::copy(CombinedInfo.Types, Mapping.begin());
9686     std::string MaptypesName =
9687         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9688     auto *MapTypesArrayGbl =
9689         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9690     Info.MapTypesArray = MapTypesArrayGbl;
9691 
9692     // The information types are only built if there is debug information
9693     // requested.
9694     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9695       Info.MapNamesArray = llvm::Constant::getNullValue(
9696           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9697     } else {
9698       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9699         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9700       };
9701       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9702       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9703       std::string MapnamesName =
9704           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9705       auto *MapNamesArrayGbl =
9706           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9707       Info.MapNamesArray = MapNamesArrayGbl;
9708     }
9709 
9710     // If there's a present map type modifier, it must not be applied to the end
9711     // of a region, so generate a separate map type array in that case.
9712     if (Info.separateBeginEndCalls()) {
9713       bool EndMapTypesDiffer = false;
9714       for (uint64_t &Type : Mapping) {
9715         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9716           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9717           EndMapTypesDiffer = true;
9718         }
9719       }
9720       if (EndMapTypesDiffer) {
9721         MapTypesArrayGbl =
9722             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9723         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9724       }
9725     }
9726 
9727     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9728       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9729       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9730           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9731           Info.BasePointersArray, 0, I);
9732       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9733           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9734       Address BPAddr =
9735           Address::deprecated(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9736       CGF.Builder.CreateStore(BPVal, BPAddr);
9737 
9738       if (Info.requiresDevicePointerInfo())
9739         if (const ValueDecl *DevVD =
9740                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9741           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9742 
9743       llvm::Value *PVal = CombinedInfo.Pointers[I];
9744       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9745           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9746           Info.PointersArray, 0, I);
9747       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9748           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9749       Address PAddr =
9750           Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9751       CGF.Builder.CreateStore(PVal, PAddr);
9752 
9753       if (RuntimeSizes.test(I)) {
9754         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9755             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9756             Info.SizesArray,
9757             /*Idx0=*/0,
9758             /*Idx1=*/I);
9759         Address SAddr =
9760             Address::deprecated(S, Ctx.getTypeAlignInChars(Int64Ty));
9761         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9762                                                           CGM.Int64Ty,
9763                                                           /*isSigned=*/true),
9764                                 SAddr);
9765       }
9766 
9767       // Fill up the mapper array.
9768       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9769       if (CombinedInfo.Mappers[I]) {
9770         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9771             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9772         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9773         Info.HasMapper = true;
9774       }
9775       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9776       CGF.Builder.CreateStore(MFunc, MAddr);
9777     }
9778   }
9779 
9780   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9781       Info.NumberOfPtrs == 0)
9782     return;
9783 
9784   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9785 }
9786 
9787 namespace {
9788 /// Additional arguments for emitOffloadingArraysArgument function.
9789 struct ArgumentsOptions {
9790   bool ForEndCall = false;
9791   ArgumentsOptions() = default;
9792   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9793 };
9794 } // namespace
9795 
9796 /// Emit the arguments to be passed to the runtime library based on the
9797 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9798 /// ForEndCall, emit map types to be passed for the end of the region instead of
9799 /// the beginning.
9800 static void emitOffloadingArraysArgument(
9801     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9802     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9803     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9804     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9805     const ArgumentsOptions &Options = ArgumentsOptions()) {
9806   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9807          "expected region end call to runtime only when end call is separate");
9808   CodeGenModule &CGM = CGF.CGM;
9809   if (Info.NumberOfPtrs) {
9810     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9811         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9812         Info.BasePointersArray,
9813         /*Idx0=*/0, /*Idx1=*/0);
9814     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9815         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9816         Info.PointersArray,
9817         /*Idx0=*/0,
9818         /*Idx1=*/0);
9819     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9820         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9821         /*Idx0=*/0, /*Idx1=*/0);
9822     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9823         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9824         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9825                                                     : Info.MapTypesArray,
9826         /*Idx0=*/0,
9827         /*Idx1=*/0);
9828 
9829     // Only emit the mapper information arrays if debug information is
9830     // requested.
9831     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9832       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9833     else
9834       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9835           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9836           Info.MapNamesArray,
9837           /*Idx0=*/0,
9838           /*Idx1=*/0);
9839     // If there is no user-defined mapper, set the mapper array to nullptr to
9840     // avoid an unnecessary data privatization
9841     if (!Info.HasMapper)
9842       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9843     else
9844       MappersArrayArg =
9845           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9846   } else {
9847     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9848     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9849     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9850     MapTypesArrayArg =
9851         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9852     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9853     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9854   }
9855 }
9856 
9857 /// Check for inner distribute directive.
9858 static const OMPExecutableDirective *
9859 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9860   const auto *CS = D.getInnermostCapturedStmt();
9861   const auto *Body =
9862       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9863   const Stmt *ChildStmt =
9864       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9865 
9866   if (const auto *NestedDir =
9867           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9868     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9869     switch (D.getDirectiveKind()) {
9870     case OMPD_target:
9871       if (isOpenMPDistributeDirective(DKind))
9872         return NestedDir;
9873       if (DKind == OMPD_teams) {
9874         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9875             /*IgnoreCaptured=*/true);
9876         if (!Body)
9877           return nullptr;
9878         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9879         if (const auto *NND =
9880                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9881           DKind = NND->getDirectiveKind();
9882           if (isOpenMPDistributeDirective(DKind))
9883             return NND;
9884         }
9885       }
9886       return nullptr;
9887     case OMPD_target_teams:
9888       if (isOpenMPDistributeDirective(DKind))
9889         return NestedDir;
9890       return nullptr;
9891     case OMPD_target_parallel:
9892     case OMPD_target_simd:
9893     case OMPD_target_parallel_for:
9894     case OMPD_target_parallel_for_simd:
9895       return nullptr;
9896     case OMPD_target_teams_distribute:
9897     case OMPD_target_teams_distribute_simd:
9898     case OMPD_target_teams_distribute_parallel_for:
9899     case OMPD_target_teams_distribute_parallel_for_simd:
9900     case OMPD_parallel:
9901     case OMPD_for:
9902     case OMPD_parallel_for:
9903     case OMPD_parallel_master:
9904     case OMPD_parallel_sections:
9905     case OMPD_for_simd:
9906     case OMPD_parallel_for_simd:
9907     case OMPD_cancel:
9908     case OMPD_cancellation_point:
9909     case OMPD_ordered:
9910     case OMPD_threadprivate:
9911     case OMPD_allocate:
9912     case OMPD_task:
9913     case OMPD_simd:
9914     case OMPD_tile:
9915     case OMPD_unroll:
9916     case OMPD_sections:
9917     case OMPD_section:
9918     case OMPD_single:
9919     case OMPD_master:
9920     case OMPD_critical:
9921     case OMPD_taskyield:
9922     case OMPD_barrier:
9923     case OMPD_taskwait:
9924     case OMPD_taskgroup:
9925     case OMPD_atomic:
9926     case OMPD_flush:
9927     case OMPD_depobj:
9928     case OMPD_scan:
9929     case OMPD_teams:
9930     case OMPD_target_data:
9931     case OMPD_target_exit_data:
9932     case OMPD_target_enter_data:
9933     case OMPD_distribute:
9934     case OMPD_distribute_simd:
9935     case OMPD_distribute_parallel_for:
9936     case OMPD_distribute_parallel_for_simd:
9937     case OMPD_teams_distribute:
9938     case OMPD_teams_distribute_simd:
9939     case OMPD_teams_distribute_parallel_for:
9940     case OMPD_teams_distribute_parallel_for_simd:
9941     case OMPD_target_update:
9942     case OMPD_declare_simd:
9943     case OMPD_declare_variant:
9944     case OMPD_begin_declare_variant:
9945     case OMPD_end_declare_variant:
9946     case OMPD_declare_target:
9947     case OMPD_end_declare_target:
9948     case OMPD_declare_reduction:
9949     case OMPD_declare_mapper:
9950     case OMPD_taskloop:
9951     case OMPD_taskloop_simd:
9952     case OMPD_master_taskloop:
9953     case OMPD_master_taskloop_simd:
9954     case OMPD_parallel_master_taskloop:
9955     case OMPD_parallel_master_taskloop_simd:
9956     case OMPD_requires:
9957     case OMPD_metadirective:
9958     case OMPD_unknown:
9959     default:
9960       llvm_unreachable("Unexpected directive.");
9961     }
9962   }
9963 
9964   return nullptr;
9965 }
9966 
9967 /// Emit the user-defined mapper function. The code generation follows the
9968 /// pattern in the example below.
9969 /// \code
9970 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9971 ///                                           void *base, void *begin,
9972 ///                                           int64_t size, int64_t type,
9973 ///                                           void *name = nullptr) {
9974 ///   // Allocate space for an array section first or add a base/begin for
9975 ///   // pointer dereference.
9976 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9977 ///       !maptype.IsDelete)
9978 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9979 ///                                 size*sizeof(Ty), clearToFromMember(type));
9980 ///   // Map members.
9981 ///   for (unsigned i = 0; i < size; i++) {
9982 ///     // For each component specified by this mapper:
9983 ///     for (auto c : begin[i]->all_components) {
9984 ///       if (c.hasMapper())
9985 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9986 ///                       c.arg_type, c.arg_name);
9987 ///       else
9988 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9989 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9990 ///                                     c.arg_name);
9991 ///     }
9992 ///   }
9993 ///   // Delete the array section.
9994 ///   if (size > 1 && maptype.IsDelete)
9995 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9996 ///                                 size*sizeof(Ty), clearToFromMember(type));
9997 /// }
9998 /// \endcode
9999 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
10000                                             CodeGenFunction *CGF) {
10001   if (UDMMap.count(D) > 0)
10002     return;
10003   ASTContext &C = CGM.getContext();
10004   QualType Ty = D->getType();
10005   QualType PtrTy = C.getPointerType(Ty).withRestrict();
10006   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10007   auto *MapperVarDecl =
10008       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
10009   SourceLocation Loc = D->getLocation();
10010   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10011 
10012   // Prepare mapper function arguments and attributes.
10013   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
10014                               C.VoidPtrTy, ImplicitParamDecl::Other);
10015   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
10016                             ImplicitParamDecl::Other);
10017   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
10018                              C.VoidPtrTy, ImplicitParamDecl::Other);
10019   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
10020                             ImplicitParamDecl::Other);
10021   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
10022                             ImplicitParamDecl::Other);
10023   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
10024                             ImplicitParamDecl::Other);
10025   FunctionArgList Args;
10026   Args.push_back(&HandleArg);
10027   Args.push_back(&BaseArg);
10028   Args.push_back(&BeginArg);
10029   Args.push_back(&SizeArg);
10030   Args.push_back(&TypeArg);
10031   Args.push_back(&NameArg);
10032   const CGFunctionInfo &FnInfo =
10033       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
10034   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
10035   SmallString<64> TyStr;
10036   llvm::raw_svector_ostream Out(TyStr);
10037   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
10038   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10039   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
10040                                     Name, &CGM.getModule());
10041   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
10042   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
10043   // Start the mapper function code generation.
10044   CodeGenFunction MapperCGF(CGM);
10045   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10046   // Compute the starting and end addresses of array elements.
10047   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10048       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10049       C.getPointerType(Int64Ty), Loc);
10050   // Prepare common arguments for array initiation and deletion.
10051   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10052       MapperCGF.GetAddrOfLocalVar(&HandleArg),
10053       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10054   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10055       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10056       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10057   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10058       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10059       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10060   // Convert the size in bytes into the number of array elements.
10061   Size = MapperCGF.Builder.CreateExactUDiv(
10062       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10063   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10064       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10065   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10066       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10067   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10068       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10069       C.getPointerType(Int64Ty), Loc);
10070   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10071       MapperCGF.GetAddrOfLocalVar(&NameArg),
10072       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10073 
10074   // Emit array initiation if this is an array section and \p MapType indicates
10075   // that memory allocation is required.
10076   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10077   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10078                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10079 
10080   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10081 
10082   // Emit the loop header block.
10083   MapperCGF.EmitBlock(HeadBB);
10084   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10085   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10086   // Evaluate whether the initial condition is satisfied.
10087   llvm::Value *IsEmpty =
10088       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10089   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10090   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10091 
10092   // Emit the loop body block.
10093   MapperCGF.EmitBlock(BodyBB);
10094   llvm::BasicBlock *LastBB = BodyBB;
10095   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10096       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10097   PtrPHI->addIncoming(PtrBegin, EntryBB);
10098   Address PtrCurrent =
10099       Address::deprecated(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10100                                       .getAlignment()
10101                                       .alignmentOfArrayElement(ElementSize));
10102   // Privatize the declared variable of mapper to be the current array element.
10103   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10104   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10105   (void)Scope.Privatize();
10106 
10107   // Get map clause information. Fill up the arrays with all mapped variables.
10108   MappableExprsHandler::MapCombinedInfoTy Info;
10109   MappableExprsHandler MEHandler(*D, MapperCGF);
10110   MEHandler.generateAllInfoForMapper(Info);
10111 
10112   // Call the runtime API __tgt_mapper_num_components to get the number of
10113   // pre-existing components.
10114   llvm::Value *OffloadingArgs[] = {Handle};
10115   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10116       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10117                                             OMPRTL___tgt_mapper_num_components),
10118       OffloadingArgs);
10119   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10120       PreviousSize,
10121       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10122 
10123   // Fill up the runtime mapper handle for all components.
10124   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10125     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10126         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10127     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10128         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10129     llvm::Value *CurSizeArg = Info.Sizes[I];
10130     llvm::Value *CurNameArg =
10131         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10132             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10133             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10134 
10135     // Extract the MEMBER_OF field from the map type.
10136     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10137     llvm::Value *MemberMapType =
10138         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10139 
10140     // Combine the map type inherited from user-defined mapper with that
10141     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10142     // bits of the \a MapType, which is the input argument of the mapper
10143     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10144     // bits of MemberMapType.
10145     // [OpenMP 5.0], 1.2.6. map-type decay.
10146     //        | alloc |  to   | from  | tofrom | release | delete
10147     // ----------------------------------------------------------
10148     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10149     // to     | alloc |  to   | alloc |   to   | release | delete
10150     // from   | alloc | alloc | from  |  from  | release | delete
10151     // tofrom | alloc |  to   | from  | tofrom | release | delete
10152     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10153         MapType,
10154         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10155                                    MappableExprsHandler::OMP_MAP_FROM));
10156     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10157     llvm::BasicBlock *AllocElseBB =
10158         MapperCGF.createBasicBlock("omp.type.alloc.else");
10159     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10160     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10161     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10162     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10163     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10164     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10165     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10166     MapperCGF.EmitBlock(AllocBB);
10167     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10168         MemberMapType,
10169         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10170                                      MappableExprsHandler::OMP_MAP_FROM)));
10171     MapperCGF.Builder.CreateBr(EndBB);
10172     MapperCGF.EmitBlock(AllocElseBB);
10173     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10174         LeftToFrom,
10175         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10176     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10177     // In case of to, clear OMP_MAP_FROM.
10178     MapperCGF.EmitBlock(ToBB);
10179     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10180         MemberMapType,
10181         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10182     MapperCGF.Builder.CreateBr(EndBB);
10183     MapperCGF.EmitBlock(ToElseBB);
10184     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10185         LeftToFrom,
10186         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10187     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10188     // In case of from, clear OMP_MAP_TO.
10189     MapperCGF.EmitBlock(FromBB);
10190     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10191         MemberMapType,
10192         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10193     // In case of tofrom, do nothing.
10194     MapperCGF.EmitBlock(EndBB);
10195     LastBB = EndBB;
10196     llvm::PHINode *CurMapType =
10197         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10198     CurMapType->addIncoming(AllocMapType, AllocBB);
10199     CurMapType->addIncoming(ToMapType, ToBB);
10200     CurMapType->addIncoming(FromMapType, FromBB);
10201     CurMapType->addIncoming(MemberMapType, ToElseBB);
10202 
10203     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10204                                      CurSizeArg, CurMapType, CurNameArg};
10205     if (Info.Mappers[I]) {
10206       // Call the corresponding mapper function.
10207       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10208           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10209       assert(MapperFunc && "Expect a valid mapper function is available.");
10210       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10211     } else {
10212       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10213       // data structure.
10214       MapperCGF.EmitRuntimeCall(
10215           OMPBuilder.getOrCreateRuntimeFunction(
10216               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10217           OffloadingArgs);
10218     }
10219   }
10220 
10221   // Update the pointer to point to the next element that needs to be mapped,
10222   // and check whether we have mapped all elements.
10223   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10224   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10225       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10226   PtrPHI->addIncoming(PtrNext, LastBB);
10227   llvm::Value *IsDone =
10228       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10229   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10230   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10231 
10232   MapperCGF.EmitBlock(ExitBB);
10233   // Emit array deletion if this is an array section and \p MapType indicates
10234   // that deletion is required.
10235   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10236                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10237 
10238   // Emit the function exit block.
10239   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10240   MapperCGF.FinishFunction();
10241   UDMMap.try_emplace(D, Fn);
10242   if (CGF) {
10243     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10244     Decls.second.push_back(D);
10245   }
10246 }
10247 
10248 /// Emit the array initialization or deletion portion for user-defined mapper
10249 /// code generation. First, it evaluates whether an array section is mapped and
10250 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10251 /// true, and \a MapType indicates to not delete this array, array
10252 /// initialization code is generated. If \a IsInit is false, and \a MapType
10253 /// indicates to not this array, array deletion code is generated.
10254 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10255     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10256     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10257     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10258     bool IsInit) {
10259   StringRef Prefix = IsInit ? ".init" : ".del";
10260 
10261   // Evaluate if this is an array section.
10262   llvm::BasicBlock *BodyBB =
10263       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10264   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10265       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10266   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10267       MapType,
10268       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10269   llvm::Value *DeleteCond;
10270   llvm::Value *Cond;
10271   if (IsInit) {
10272     // base != begin?
10273     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10274     // IsPtrAndObj?
10275     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10276         MapType,
10277         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10278     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10279     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10280     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10281     DeleteCond = MapperCGF.Builder.CreateIsNull(
10282         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10283   } else {
10284     Cond = IsArray;
10285     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10286         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10287   }
10288   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10289   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10290 
10291   MapperCGF.EmitBlock(BodyBB);
10292   // Get the array size by multiplying element size and element number (i.e., \p
10293   // Size).
10294   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10295       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10296   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10297   // memory allocation/deletion purpose only.
10298   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10299       MapType,
10300       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10301                                    MappableExprsHandler::OMP_MAP_FROM)));
10302   MapTypeArg = MapperCGF.Builder.CreateOr(
10303       MapTypeArg,
10304       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10305 
10306   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10307   // data structure.
10308   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10309                                    ArraySize, MapTypeArg, MapName};
10310   MapperCGF.EmitRuntimeCall(
10311       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10312                                             OMPRTL___tgt_push_mapper_component),
10313       OffloadingArgs);
10314 }
10315 
10316 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10317     const OMPDeclareMapperDecl *D) {
10318   auto I = UDMMap.find(D);
10319   if (I != UDMMap.end())
10320     return I->second;
10321   emitUserDefinedMapper(D);
10322   return UDMMap.lookup(D);
10323 }
10324 
10325 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10326     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10327     llvm::Value *DeviceID,
10328     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10329                                      const OMPLoopDirective &D)>
10330         SizeEmitter) {
10331   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10332   const OMPExecutableDirective *TD = &D;
10333   // Get nested teams distribute kind directive, if any.
10334   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10335     TD = getNestedDistributeDirective(CGM.getContext(), D);
10336   if (!TD)
10337     return;
10338   const auto *LD = cast<OMPLoopDirective>(TD);
10339   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10340                                                          PrePostActionTy &) {
10341     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10342       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10343       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10344       CGF.EmitRuntimeCall(
10345           OMPBuilder.getOrCreateRuntimeFunction(
10346               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10347           Args);
10348     }
10349   };
10350   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10351 }
10352 
10353 void CGOpenMPRuntime::emitTargetCall(
10354     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10355     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10356     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10357     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10358                                      const OMPLoopDirective &D)>
10359         SizeEmitter) {
10360   if (!CGF.HaveInsertPoint())
10361     return;
10362 
10363   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10364                                    CGM.getLangOpts().OpenMPOffloadMandatory;
10365 
10366   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10367 
10368   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10369                                  D.hasClausesOfKind<OMPNowaitClause>();
10370   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10371   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10372   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10373                                             PrePostActionTy &) {
10374     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10375   };
10376   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10377 
10378   CodeGenFunction::OMPTargetDataInfo InputInfo;
10379   llvm::Value *MapTypesArray = nullptr;
10380   llvm::Value *MapNamesArray = nullptr;
10381   // Generate code for the host fallback function.
10382   auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10383                         &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10384     if (OffloadingMandatory) {
10385       CGF.Builder.CreateUnreachable();
10386     } else {
10387       if (RequiresOuterTask) {
10388         CapturedVars.clear();
10389         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10390       }
10391       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10392     }
10393   };
10394   // Fill up the pointer arrays and transfer execution to the device.
10395   auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10396                     &MapNamesArray, SizeEmitter,
10397                     FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10398     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10399       // Reverse offloading is not supported, so just execute on the host.
10400       FallbackGen(CGF);
10401       return;
10402     }
10403 
10404     // On top of the arrays that were filled up, the target offloading call
10405     // takes as arguments the device id as well as the host pointer. The host
10406     // pointer is used by the runtime library to identify the current target
10407     // region, so it only has to be unique and not necessarily point to
10408     // anything. It could be the pointer to the outlined function that
10409     // implements the target region, but we aren't using that so that the
10410     // compiler doesn't need to keep that, and could therefore inline the host
10411     // function if proven worthwhile during optimization.
10412 
10413     // From this point on, we need to have an ID of the target region defined.
10414     assert(OutlinedFnID && "Invalid outlined function ID!");
10415     (void)OutlinedFnID;
10416 
10417     // Emit device ID if any.
10418     llvm::Value *DeviceID;
10419     if (Device.getPointer()) {
10420       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10421               Device.getInt() == OMPC_DEVICE_device_num) &&
10422              "Expected device_num modifier.");
10423       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10424       DeviceID =
10425           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10426     } else {
10427       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10428     }
10429 
10430     // Emit the number of elements in the offloading arrays.
10431     llvm::Value *PointerNum =
10432         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10433 
10434     // Return value of the runtime offloading call.
10435     llvm::Value *Return;
10436 
10437     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10438     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10439 
10440     // Source location for the ident struct
10441     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10442 
10443     // Emit tripcount for the target loop-based directive.
10444     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10445 
10446     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10447     // The target region is an outlined function launched by the runtime
10448     // via calls __tgt_target() or __tgt_target_teams().
10449     //
10450     // __tgt_target() launches a target region with one team and one thread,
10451     // executing a serial region.  This master thread may in turn launch
10452     // more threads within its team upon encountering a parallel region,
10453     // however, no additional teams can be launched on the device.
10454     //
10455     // __tgt_target_teams() launches a target region with one or more teams,
10456     // each with one or more threads.  This call is required for target
10457     // constructs such as:
10458     //  'target teams'
10459     //  'target' / 'teams'
10460     //  'target teams distribute parallel for'
10461     //  'target parallel'
10462     // and so on.
10463     //
10464     // Note that on the host and CPU targets, the runtime implementation of
10465     // these calls simply call the outlined function without forking threads.
10466     // The outlined functions themselves have runtime calls to
10467     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10468     // the compiler in emitTeamsCall() and emitParallelCall().
10469     //
10470     // In contrast, on the NVPTX target, the implementation of
10471     // __tgt_target_teams() launches a GPU kernel with the requested number
10472     // of teams and threads so no additional calls to the runtime are required.
10473     if (NumTeams) {
10474       // If we have NumTeams defined this means that we have an enclosed teams
10475       // region. Therefore we also expect to have NumThreads defined. These two
10476       // values should be defined in the presence of a teams directive,
10477       // regardless of having any clauses associated. If the user is using teams
10478       // but no clauses, these two values will be the default that should be
10479       // passed to the runtime library - a 32-bit integer with the value zero.
10480       assert(NumThreads && "Thread limit expression should be available along "
10481                            "with number of teams.");
10482       SmallVector<llvm::Value *> OffloadingArgs = {
10483           RTLoc,
10484           DeviceID,
10485           OutlinedFnID,
10486           PointerNum,
10487           InputInfo.BasePointersArray.getPointer(),
10488           InputInfo.PointersArray.getPointer(),
10489           InputInfo.SizesArray.getPointer(),
10490           MapTypesArray,
10491           MapNamesArray,
10492           InputInfo.MappersArray.getPointer(),
10493           NumTeams,
10494           NumThreads};
10495       if (HasNowait) {
10496         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10497         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10498         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10499         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10500         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10501         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10502       }
10503       Return = CGF.EmitRuntimeCall(
10504           OMPBuilder.getOrCreateRuntimeFunction(
10505               CGM.getModule(), HasNowait
10506                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10507                                    : OMPRTL___tgt_target_teams_mapper),
10508           OffloadingArgs);
10509     } else {
10510       SmallVector<llvm::Value *> OffloadingArgs = {
10511           RTLoc,
10512           DeviceID,
10513           OutlinedFnID,
10514           PointerNum,
10515           InputInfo.BasePointersArray.getPointer(),
10516           InputInfo.PointersArray.getPointer(),
10517           InputInfo.SizesArray.getPointer(),
10518           MapTypesArray,
10519           MapNamesArray,
10520           InputInfo.MappersArray.getPointer()};
10521       if (HasNowait) {
10522         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10523         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10524         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10525         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10526         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10527         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10528       }
10529       Return = CGF.EmitRuntimeCall(
10530           OMPBuilder.getOrCreateRuntimeFunction(
10531               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10532                                          : OMPRTL___tgt_target_mapper),
10533           OffloadingArgs);
10534     }
10535 
10536     // Check the error code and execute the host version if required.
10537     llvm::BasicBlock *OffloadFailedBlock =
10538         CGF.createBasicBlock("omp_offload.failed");
10539     llvm::BasicBlock *OffloadContBlock =
10540         CGF.createBasicBlock("omp_offload.cont");
10541     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10542     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10543 
10544     CGF.EmitBlock(OffloadFailedBlock);
10545     FallbackGen(CGF);
10546 
10547     CGF.EmitBranch(OffloadContBlock);
10548 
10549     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10550   };
10551 
10552   // Notify that the host version must be executed.
10553   auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10554     FallbackGen(CGF);
10555   };
10556 
10557   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10558                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10559                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10560     // Fill up the arrays with all the captured variables.
10561     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10562 
10563     // Get mappable expression information.
10564     MappableExprsHandler MEHandler(D, CGF);
10565     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10566     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10567 
10568     auto RI = CS.getCapturedRecordDecl()->field_begin();
10569     auto *CV = CapturedVars.begin();
10570     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10571                                               CE = CS.capture_end();
10572          CI != CE; ++CI, ++RI, ++CV) {
10573       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10574       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10575 
10576       // VLA sizes are passed to the outlined region by copy and do not have map
10577       // information associated.
10578       if (CI->capturesVariableArrayType()) {
10579         CurInfo.Exprs.push_back(nullptr);
10580         CurInfo.BasePointers.push_back(*CV);
10581         CurInfo.Pointers.push_back(*CV);
10582         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10583             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10584         // Copy to the device as an argument. No need to retrieve it.
10585         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10586                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10587                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10588         CurInfo.Mappers.push_back(nullptr);
10589       } else {
10590         // If we have any information in the map clause, we use it, otherwise we
10591         // just do a default mapping.
10592         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10593         if (!CI->capturesThis())
10594           MappedVarSet.insert(CI->getCapturedVar());
10595         else
10596           MappedVarSet.insert(nullptr);
10597         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10598           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10599         // Generate correct mapping for variables captured by reference in
10600         // lambdas.
10601         if (CI->capturesVariable())
10602           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10603                                                   CurInfo, LambdaPointers);
10604       }
10605       // We expect to have at least an element of information for this capture.
10606       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10607              "Non-existing map pointer for capture!");
10608       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10609              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10610              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10611              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10612              "Inconsistent map information sizes!");
10613 
10614       // If there is an entry in PartialStruct it means we have a struct with
10615       // individual members mapped. Emit an extra combined entry.
10616       if (PartialStruct.Base.isValid()) {
10617         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10618         MEHandler.emitCombinedEntry(
10619             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10620             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10621       }
10622 
10623       // We need to append the results of this capture to what we already have.
10624       CombinedInfo.append(CurInfo);
10625     }
10626     // Adjust MEMBER_OF flags for the lambdas captures.
10627     MEHandler.adjustMemberOfForLambdaCaptures(
10628         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10629         CombinedInfo.Types);
10630     // Map any list items in a map clause that were not captures because they
10631     // weren't referenced within the construct.
10632     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10633 
10634     TargetDataInfo Info;
10635     // Fill up the arrays and create the arguments.
10636     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10637     emitOffloadingArraysArgument(
10638         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10639         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10640         {/*ForEndCall=*/false});
10641 
10642     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10643     InputInfo.BasePointersArray =
10644         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
10645     InputInfo.PointersArray =
10646         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
10647     InputInfo.SizesArray =
10648         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
10649     InputInfo.MappersArray =
10650         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
10651     MapTypesArray = Info.MapTypesArray;
10652     MapNamesArray = Info.MapNamesArray;
10653     if (RequiresOuterTask)
10654       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10655     else
10656       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10657   };
10658 
10659   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10660                              CodeGenFunction &CGF, PrePostActionTy &) {
10661     if (RequiresOuterTask) {
10662       CodeGenFunction::OMPTargetDataInfo InputInfo;
10663       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10664     } else {
10665       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10666     }
10667   };
10668 
10669   // If we have a target function ID it means that we need to support
10670   // offloading, otherwise, just execute on the host. We need to execute on host
10671   // regardless of the conditional in the if clause if, e.g., the user do not
10672   // specify target triples.
10673   if (OutlinedFnID) {
10674     if (IfCond) {
10675       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10676     } else {
10677       RegionCodeGenTy ThenRCG(TargetThenGen);
10678       ThenRCG(CGF);
10679     }
10680   } else {
10681     RegionCodeGenTy ElseRCG(TargetElseGen);
10682     ElseRCG(CGF);
10683   }
10684 }
10685 
10686 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10687                                                     StringRef ParentName) {
10688   if (!S)
10689     return;
10690 
10691   // Codegen OMP target directives that offload compute to the device.
10692   bool RequiresDeviceCodegen =
10693       isa<OMPExecutableDirective>(S) &&
10694       isOpenMPTargetExecutionDirective(
10695           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10696 
10697   if (RequiresDeviceCodegen) {
10698     const auto &E = *cast<OMPExecutableDirective>(S);
10699     unsigned DeviceID;
10700     unsigned FileID;
10701     unsigned Line;
10702     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10703                              FileID, Line);
10704 
10705     // Is this a target region that should not be emitted as an entry point? If
10706     // so just signal we are done with this target region.
10707     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10708                                                             ParentName, Line))
10709       return;
10710 
10711     switch (E.getDirectiveKind()) {
10712     case OMPD_target:
10713       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10714                                                    cast<OMPTargetDirective>(E));
10715       break;
10716     case OMPD_target_parallel:
10717       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10718           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10719       break;
10720     case OMPD_target_teams:
10721       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10722           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10723       break;
10724     case OMPD_target_teams_distribute:
10725       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10726           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10727       break;
10728     case OMPD_target_teams_distribute_simd:
10729       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10730           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10731       break;
10732     case OMPD_target_parallel_for:
10733       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10734           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10735       break;
10736     case OMPD_target_parallel_for_simd:
10737       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10738           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10739       break;
10740     case OMPD_target_simd:
10741       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10742           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10743       break;
10744     case OMPD_target_teams_distribute_parallel_for:
10745       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10746           CGM, ParentName,
10747           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10748       break;
10749     case OMPD_target_teams_distribute_parallel_for_simd:
10750       CodeGenFunction::
10751           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10752               CGM, ParentName,
10753               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10754       break;
10755     case OMPD_parallel:
10756     case OMPD_for:
10757     case OMPD_parallel_for:
10758     case OMPD_parallel_master:
10759     case OMPD_parallel_sections:
10760     case OMPD_for_simd:
10761     case OMPD_parallel_for_simd:
10762     case OMPD_cancel:
10763     case OMPD_cancellation_point:
10764     case OMPD_ordered:
10765     case OMPD_threadprivate:
10766     case OMPD_allocate:
10767     case OMPD_task:
10768     case OMPD_simd:
10769     case OMPD_tile:
10770     case OMPD_unroll:
10771     case OMPD_sections:
10772     case OMPD_section:
10773     case OMPD_single:
10774     case OMPD_master:
10775     case OMPD_critical:
10776     case OMPD_taskyield:
10777     case OMPD_barrier:
10778     case OMPD_taskwait:
10779     case OMPD_taskgroup:
10780     case OMPD_atomic:
10781     case OMPD_flush:
10782     case OMPD_depobj:
10783     case OMPD_scan:
10784     case OMPD_teams:
10785     case OMPD_target_data:
10786     case OMPD_target_exit_data:
10787     case OMPD_target_enter_data:
10788     case OMPD_distribute:
10789     case OMPD_distribute_simd:
10790     case OMPD_distribute_parallel_for:
10791     case OMPD_distribute_parallel_for_simd:
10792     case OMPD_teams_distribute:
10793     case OMPD_teams_distribute_simd:
10794     case OMPD_teams_distribute_parallel_for:
10795     case OMPD_teams_distribute_parallel_for_simd:
10796     case OMPD_target_update:
10797     case OMPD_declare_simd:
10798     case OMPD_declare_variant:
10799     case OMPD_begin_declare_variant:
10800     case OMPD_end_declare_variant:
10801     case OMPD_declare_target:
10802     case OMPD_end_declare_target:
10803     case OMPD_declare_reduction:
10804     case OMPD_declare_mapper:
10805     case OMPD_taskloop:
10806     case OMPD_taskloop_simd:
10807     case OMPD_master_taskloop:
10808     case OMPD_master_taskloop_simd:
10809     case OMPD_parallel_master_taskloop:
10810     case OMPD_parallel_master_taskloop_simd:
10811     case OMPD_requires:
10812     case OMPD_metadirective:
10813     case OMPD_unknown:
10814     default:
10815       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10816     }
10817     return;
10818   }
10819 
10820   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10821     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10822       return;
10823 
10824     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10825     return;
10826   }
10827 
10828   // If this is a lambda function, look into its body.
10829   if (const auto *L = dyn_cast<LambdaExpr>(S))
10830     S = L->getBody();
10831 
10832   // Keep looking for target regions recursively.
10833   for (const Stmt *II : S->children())
10834     scanForTargetRegionsFunctions(II, ParentName);
10835 }
10836 
10837 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10838   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10839       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10840   if (!DevTy)
10841     return false;
10842   // Do not emit device_type(nohost) functions for the host.
10843   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10844     return true;
10845   // Do not emit device_type(host) functions for the device.
10846   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10847     return true;
10848   return false;
10849 }
10850 
10851 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10852   // If emitting code for the host, we do not process FD here. Instead we do
10853   // the normal code generation.
10854   if (!CGM.getLangOpts().OpenMPIsDevice) {
10855     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10856       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10857                                   CGM.getLangOpts().OpenMPIsDevice))
10858         return true;
10859     return false;
10860   }
10861 
10862   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10863   // Try to detect target regions in the function.
10864   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10865     StringRef Name = CGM.getMangledName(GD);
10866     scanForTargetRegionsFunctions(FD->getBody(), Name);
10867     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10868                                 CGM.getLangOpts().OpenMPIsDevice))
10869       return true;
10870   }
10871 
10872   // Do not to emit function if it is not marked as declare target.
10873   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10874          AlreadyEmittedTargetDecls.count(VD) == 0;
10875 }
10876 
10877 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10878   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10879                               CGM.getLangOpts().OpenMPIsDevice))
10880     return true;
10881 
10882   if (!CGM.getLangOpts().OpenMPIsDevice)
10883     return false;
10884 
10885   // Check if there are Ctors/Dtors in this declaration and look for target
10886   // regions in it. We use the complete variant to produce the kernel name
10887   // mangling.
10888   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10889   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10890     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10891       StringRef ParentName =
10892           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10893       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10894     }
10895     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10896       StringRef ParentName =
10897           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10898       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10899     }
10900   }
10901 
10902   // Do not to emit variable if it is not marked as declare target.
10903   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10904       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10905           cast<VarDecl>(GD.getDecl()));
10906   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10907       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10908        HasRequiresUnifiedSharedMemory)) {
10909     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10910     return true;
10911   }
10912   return false;
10913 }
10914 
10915 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10916                                                    llvm::Constant *Addr) {
10917   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10918       !CGM.getLangOpts().OpenMPIsDevice)
10919     return;
10920 
10921   // If we have host/nohost variables, they do not need to be registered.
10922   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10923       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10924   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10925     return;
10926 
10927   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10928       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10929   if (!Res) {
10930     if (CGM.getLangOpts().OpenMPIsDevice) {
10931       // Register non-target variables being emitted in device code (debug info
10932       // may cause this).
10933       StringRef VarName = CGM.getMangledName(VD);
10934       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10935     }
10936     return;
10937   }
10938   // Register declare target variables.
10939   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10940   StringRef VarName;
10941   CharUnits VarSize;
10942   llvm::GlobalValue::LinkageTypes Linkage;
10943 
10944   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10945       !HasRequiresUnifiedSharedMemory) {
10946     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10947     VarName = CGM.getMangledName(VD);
10948     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10949       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10950       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10951     } else {
10952       VarSize = CharUnits::Zero();
10953     }
10954     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10955     // Temp solution to prevent optimizations of the internal variables.
10956     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10957       // Do not create a "ref-variable" if the original is not also available
10958       // on the host.
10959       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10960         return;
10961       std::string RefName = getName({VarName, "ref"});
10962       if (!CGM.GetGlobalValue(RefName)) {
10963         llvm::Constant *AddrRef =
10964             getOrCreateInternalVariable(Addr->getType(), RefName);
10965         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10966         GVAddrRef->setConstant(/*Val=*/true);
10967         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10968         GVAddrRef->setInitializer(Addr);
10969         CGM.addCompilerUsedGlobal(GVAddrRef);
10970       }
10971     }
10972   } else {
10973     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10974             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10975              HasRequiresUnifiedSharedMemory)) &&
10976            "Declare target attribute must link or to with unified memory.");
10977     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10978       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10979     else
10980       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10981 
10982     if (CGM.getLangOpts().OpenMPIsDevice) {
10983       VarName = Addr->getName();
10984       Addr = nullptr;
10985     } else {
10986       VarName = getAddrOfDeclareTargetVar(VD).getName();
10987       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10988     }
10989     VarSize = CGM.getPointerSize();
10990     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10991   }
10992 
10993   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10994       VarName, Addr, VarSize, Flags, Linkage);
10995 }
10996 
10997 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10998   if (isa<FunctionDecl>(GD.getDecl()) ||
10999       isa<OMPDeclareReductionDecl>(GD.getDecl()))
11000     return emitTargetFunctions(GD);
11001 
11002   return emitTargetGlobalVariable(GD);
11003 }
11004 
11005 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
11006   for (const VarDecl *VD : DeferredGlobalVariables) {
11007     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11008         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11009     if (!Res)
11010       continue;
11011     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
11012         !HasRequiresUnifiedSharedMemory) {
11013       CGM.EmitGlobal(VD);
11014     } else {
11015       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11016               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
11017                HasRequiresUnifiedSharedMemory)) &&
11018              "Expected link clause or to clause with unified memory.");
11019       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11020     }
11021   }
11022 }
11023 
11024 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
11025     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11026   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11027          " Expected target-based directive.");
11028 }
11029 
11030 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11031   for (const OMPClause *Clause : D->clauselists()) {
11032     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11033       HasRequiresUnifiedSharedMemory = true;
11034     } else if (const auto *AC =
11035                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11036       switch (AC->getAtomicDefaultMemOrderKind()) {
11037       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11038         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11039         break;
11040       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11041         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11042         break;
11043       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11044         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11045         break;
11046       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11047         break;
11048       }
11049     }
11050   }
11051 }
11052 
11053 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11054   return RequiresAtomicOrdering;
11055 }
11056 
11057 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11058                                                        LangAS &AS) {
11059   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11060     return false;
11061   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11062   switch(A->getAllocatorType()) {
11063   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11064   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11065   // Not supported, fallback to the default mem space.
11066   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11067   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11068   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11069   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11070   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11071   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11072   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11073     AS = LangAS::Default;
11074     return true;
11075   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11076     llvm_unreachable("Expected predefined allocator for the variables with the "
11077                      "static storage.");
11078   }
11079   return false;
11080 }
11081 
11082 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11083   return HasRequiresUnifiedSharedMemory;
11084 }
11085 
11086 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11087     CodeGenModule &CGM)
11088     : CGM(CGM) {
11089   if (CGM.getLangOpts().OpenMPIsDevice) {
11090     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11091     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11092   }
11093 }
11094 
11095 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11096   if (CGM.getLangOpts().OpenMPIsDevice)
11097     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11098 }
11099 
11100 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11101   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11102     return true;
11103 
11104   const auto *D = cast<FunctionDecl>(GD.getDecl());
11105   // Do not to emit function if it is marked as declare target as it was already
11106   // emitted.
11107   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11108     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11109       if (auto *F = dyn_cast_or_null<llvm::Function>(
11110               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11111         return !F->isDeclaration();
11112       return false;
11113     }
11114     return true;
11115   }
11116 
11117   return !AlreadyEmittedTargetDecls.insert(D).second;
11118 }
11119 
11120 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11121   // If we don't have entries or if we are emitting code for the device, we
11122   // don't need to do anything.
11123   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11124       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11125       (OffloadEntriesInfoManager.empty() &&
11126        !HasEmittedDeclareTargetRegion &&
11127        !HasEmittedTargetRegion))
11128     return nullptr;
11129 
11130   // Create and register the function that handles the requires directives.
11131   ASTContext &C = CGM.getContext();
11132 
11133   llvm::Function *RequiresRegFn;
11134   {
11135     CodeGenFunction CGF(CGM);
11136     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11137     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11138     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11139     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11140     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11141     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11142     // TODO: check for other requires clauses.
11143     // The requires directive takes effect only when a target region is
11144     // present in the compilation unit. Otherwise it is ignored and not
11145     // passed to the runtime. This avoids the runtime from throwing an error
11146     // for mismatching requires clauses across compilation units that don't
11147     // contain at least 1 target region.
11148     assert((HasEmittedTargetRegion ||
11149             HasEmittedDeclareTargetRegion ||
11150             !OffloadEntriesInfoManager.empty()) &&
11151            "Target or declare target region expected.");
11152     if (HasRequiresUnifiedSharedMemory)
11153       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11154     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11155                             CGM.getModule(), OMPRTL___tgt_register_requires),
11156                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11157     CGF.FinishFunction();
11158   }
11159   return RequiresRegFn;
11160 }
11161 
11162 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11163                                     const OMPExecutableDirective &D,
11164                                     SourceLocation Loc,
11165                                     llvm::Function *OutlinedFn,
11166                                     ArrayRef<llvm::Value *> CapturedVars) {
11167   if (!CGF.HaveInsertPoint())
11168     return;
11169 
11170   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11171   CodeGenFunction::RunCleanupsScope Scope(CGF);
11172 
11173   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11174   llvm::Value *Args[] = {
11175       RTLoc,
11176       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11177       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11178   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11179   RealArgs.append(std::begin(Args), std::end(Args));
11180   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11181 
11182   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11183       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11184   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11185 }
11186 
11187 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11188                                          const Expr *NumTeams,
11189                                          const Expr *ThreadLimit,
11190                                          SourceLocation Loc) {
11191   if (!CGF.HaveInsertPoint())
11192     return;
11193 
11194   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11195 
11196   llvm::Value *NumTeamsVal =
11197       NumTeams
11198           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11199                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11200           : CGF.Builder.getInt32(0);
11201 
11202   llvm::Value *ThreadLimitVal =
11203       ThreadLimit
11204           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11205                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11206           : CGF.Builder.getInt32(0);
11207 
11208   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11209   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11210                                      ThreadLimitVal};
11211   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11212                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11213                       PushNumTeamsArgs);
11214 }
11215 
11216 void CGOpenMPRuntime::emitTargetDataCalls(
11217     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11218     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11219   if (!CGF.HaveInsertPoint())
11220     return;
11221 
11222   // Action used to replace the default codegen action and turn privatization
11223   // off.
11224   PrePostActionTy NoPrivAction;
11225 
11226   // Generate the code for the opening of the data environment. Capture all the
11227   // arguments of the runtime call by reference because they are used in the
11228   // closing of the region.
11229   auto &&BeginThenGen = [this, &D, Device, &Info,
11230                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11231     // Fill up the arrays with all the mapped variables.
11232     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11233 
11234     // Get map clause information.
11235     MappableExprsHandler MEHandler(D, CGF);
11236     MEHandler.generateAllInfo(CombinedInfo);
11237 
11238     // Fill up the arrays and create the arguments.
11239     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11240                          /*IsNonContiguous=*/true);
11241 
11242     llvm::Value *BasePointersArrayArg = nullptr;
11243     llvm::Value *PointersArrayArg = nullptr;
11244     llvm::Value *SizesArrayArg = nullptr;
11245     llvm::Value *MapTypesArrayArg = nullptr;
11246     llvm::Value *MapNamesArrayArg = nullptr;
11247     llvm::Value *MappersArrayArg = nullptr;
11248     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11249                                  SizesArrayArg, MapTypesArrayArg,
11250                                  MapNamesArrayArg, MappersArrayArg, Info);
11251 
11252     // Emit device ID if any.
11253     llvm::Value *DeviceID = nullptr;
11254     if (Device) {
11255       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11256                                            CGF.Int64Ty, /*isSigned=*/true);
11257     } else {
11258       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11259     }
11260 
11261     // Emit the number of elements in the offloading arrays.
11262     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11263     //
11264     // Source location for the ident struct
11265     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11266 
11267     llvm::Value *OffloadingArgs[] = {RTLoc,
11268                                      DeviceID,
11269                                      PointerNum,
11270                                      BasePointersArrayArg,
11271                                      PointersArrayArg,
11272                                      SizesArrayArg,
11273                                      MapTypesArrayArg,
11274                                      MapNamesArrayArg,
11275                                      MappersArrayArg};
11276     CGF.EmitRuntimeCall(
11277         OMPBuilder.getOrCreateRuntimeFunction(
11278             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11279         OffloadingArgs);
11280 
11281     // If device pointer privatization is required, emit the body of the region
11282     // here. It will have to be duplicated: with and without privatization.
11283     if (!Info.CaptureDeviceAddrMap.empty())
11284       CodeGen(CGF);
11285   };
11286 
11287   // Generate code for the closing of the data region.
11288   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11289                                                 PrePostActionTy &) {
11290     assert(Info.isValid() && "Invalid data environment closing arguments.");
11291 
11292     llvm::Value *BasePointersArrayArg = nullptr;
11293     llvm::Value *PointersArrayArg = nullptr;
11294     llvm::Value *SizesArrayArg = nullptr;
11295     llvm::Value *MapTypesArrayArg = nullptr;
11296     llvm::Value *MapNamesArrayArg = nullptr;
11297     llvm::Value *MappersArrayArg = nullptr;
11298     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11299                                  SizesArrayArg, MapTypesArrayArg,
11300                                  MapNamesArrayArg, MappersArrayArg, Info,
11301                                  {/*ForEndCall=*/true});
11302 
11303     // Emit device ID if any.
11304     llvm::Value *DeviceID = nullptr;
11305     if (Device) {
11306       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11307                                            CGF.Int64Ty, /*isSigned=*/true);
11308     } else {
11309       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11310     }
11311 
11312     // Emit the number of elements in the offloading arrays.
11313     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11314 
11315     // Source location for the ident struct
11316     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11317 
11318     llvm::Value *OffloadingArgs[] = {RTLoc,
11319                                      DeviceID,
11320                                      PointerNum,
11321                                      BasePointersArrayArg,
11322                                      PointersArrayArg,
11323                                      SizesArrayArg,
11324                                      MapTypesArrayArg,
11325                                      MapNamesArrayArg,
11326                                      MappersArrayArg};
11327     CGF.EmitRuntimeCall(
11328         OMPBuilder.getOrCreateRuntimeFunction(
11329             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11330         OffloadingArgs);
11331   };
11332 
11333   // If we need device pointer privatization, we need to emit the body of the
11334   // region with no privatization in the 'else' branch of the conditional.
11335   // Otherwise, we don't have to do anything.
11336   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11337                                                          PrePostActionTy &) {
11338     if (!Info.CaptureDeviceAddrMap.empty()) {
11339       CodeGen.setAction(NoPrivAction);
11340       CodeGen(CGF);
11341     }
11342   };
11343 
11344   // We don't have to do anything to close the region if the if clause evaluates
11345   // to false.
11346   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11347 
11348   if (IfCond) {
11349     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11350   } else {
11351     RegionCodeGenTy RCG(BeginThenGen);
11352     RCG(CGF);
11353   }
11354 
11355   // If we don't require privatization of device pointers, we emit the body in
11356   // between the runtime calls. This avoids duplicating the body code.
11357   if (Info.CaptureDeviceAddrMap.empty()) {
11358     CodeGen.setAction(NoPrivAction);
11359     CodeGen(CGF);
11360   }
11361 
11362   if (IfCond) {
11363     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11364   } else {
11365     RegionCodeGenTy RCG(EndThenGen);
11366     RCG(CGF);
11367   }
11368 }
11369 
11370 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11371     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11372     const Expr *Device) {
11373   if (!CGF.HaveInsertPoint())
11374     return;
11375 
11376   assert((isa<OMPTargetEnterDataDirective>(D) ||
11377           isa<OMPTargetExitDataDirective>(D) ||
11378           isa<OMPTargetUpdateDirective>(D)) &&
11379          "Expecting either target enter, exit data, or update directives.");
11380 
11381   CodeGenFunction::OMPTargetDataInfo InputInfo;
11382   llvm::Value *MapTypesArray = nullptr;
11383   llvm::Value *MapNamesArray = nullptr;
11384   // Generate the code for the opening of the data environment.
11385   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11386                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11387     // Emit device ID if any.
11388     llvm::Value *DeviceID = nullptr;
11389     if (Device) {
11390       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11391                                            CGF.Int64Ty, /*isSigned=*/true);
11392     } else {
11393       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11394     }
11395 
11396     // Emit the number of elements in the offloading arrays.
11397     llvm::Constant *PointerNum =
11398         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11399 
11400     // Source location for the ident struct
11401     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11402 
11403     llvm::Value *OffloadingArgs[] = {RTLoc,
11404                                      DeviceID,
11405                                      PointerNum,
11406                                      InputInfo.BasePointersArray.getPointer(),
11407                                      InputInfo.PointersArray.getPointer(),
11408                                      InputInfo.SizesArray.getPointer(),
11409                                      MapTypesArray,
11410                                      MapNamesArray,
11411                                      InputInfo.MappersArray.getPointer()};
11412 
11413     // Select the right runtime function call for each standalone
11414     // directive.
11415     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11416     RuntimeFunction RTLFn;
11417     switch (D.getDirectiveKind()) {
11418     case OMPD_target_enter_data:
11419       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11420                         : OMPRTL___tgt_target_data_begin_mapper;
11421       break;
11422     case OMPD_target_exit_data:
11423       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11424                         : OMPRTL___tgt_target_data_end_mapper;
11425       break;
11426     case OMPD_target_update:
11427       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11428                         : OMPRTL___tgt_target_data_update_mapper;
11429       break;
11430     case OMPD_parallel:
11431     case OMPD_for:
11432     case OMPD_parallel_for:
11433     case OMPD_parallel_master:
11434     case OMPD_parallel_sections:
11435     case OMPD_for_simd:
11436     case OMPD_parallel_for_simd:
11437     case OMPD_cancel:
11438     case OMPD_cancellation_point:
11439     case OMPD_ordered:
11440     case OMPD_threadprivate:
11441     case OMPD_allocate:
11442     case OMPD_task:
11443     case OMPD_simd:
11444     case OMPD_tile:
11445     case OMPD_unroll:
11446     case OMPD_sections:
11447     case OMPD_section:
11448     case OMPD_single:
11449     case OMPD_master:
11450     case OMPD_critical:
11451     case OMPD_taskyield:
11452     case OMPD_barrier:
11453     case OMPD_taskwait:
11454     case OMPD_taskgroup:
11455     case OMPD_atomic:
11456     case OMPD_flush:
11457     case OMPD_depobj:
11458     case OMPD_scan:
11459     case OMPD_teams:
11460     case OMPD_target_data:
11461     case OMPD_distribute:
11462     case OMPD_distribute_simd:
11463     case OMPD_distribute_parallel_for:
11464     case OMPD_distribute_parallel_for_simd:
11465     case OMPD_teams_distribute:
11466     case OMPD_teams_distribute_simd:
11467     case OMPD_teams_distribute_parallel_for:
11468     case OMPD_teams_distribute_parallel_for_simd:
11469     case OMPD_declare_simd:
11470     case OMPD_declare_variant:
11471     case OMPD_begin_declare_variant:
11472     case OMPD_end_declare_variant:
11473     case OMPD_declare_target:
11474     case OMPD_end_declare_target:
11475     case OMPD_declare_reduction:
11476     case OMPD_declare_mapper:
11477     case OMPD_taskloop:
11478     case OMPD_taskloop_simd:
11479     case OMPD_master_taskloop:
11480     case OMPD_master_taskloop_simd:
11481     case OMPD_parallel_master_taskloop:
11482     case OMPD_parallel_master_taskloop_simd:
11483     case OMPD_target:
11484     case OMPD_target_simd:
11485     case OMPD_target_teams_distribute:
11486     case OMPD_target_teams_distribute_simd:
11487     case OMPD_target_teams_distribute_parallel_for:
11488     case OMPD_target_teams_distribute_parallel_for_simd:
11489     case OMPD_target_teams:
11490     case OMPD_target_parallel:
11491     case OMPD_target_parallel_for:
11492     case OMPD_target_parallel_for_simd:
11493     case OMPD_requires:
11494     case OMPD_metadirective:
11495     case OMPD_unknown:
11496     default:
11497       llvm_unreachable("Unexpected standalone target data directive.");
11498       break;
11499     }
11500     CGF.EmitRuntimeCall(
11501         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11502         OffloadingArgs);
11503   };
11504 
11505   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11506                           &MapNamesArray](CodeGenFunction &CGF,
11507                                           PrePostActionTy &) {
11508     // Fill up the arrays with all the mapped variables.
11509     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11510 
11511     // Get map clause information.
11512     MappableExprsHandler MEHandler(D, CGF);
11513     MEHandler.generateAllInfo(CombinedInfo);
11514 
11515     TargetDataInfo Info;
11516     // Fill up the arrays and create the arguments.
11517     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11518                          /*IsNonContiguous=*/true);
11519     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11520                              D.hasClausesOfKind<OMPNowaitClause>();
11521     emitOffloadingArraysArgument(
11522         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11523         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11524         {/*ForEndCall=*/false});
11525     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11526     InputInfo.BasePointersArray =
11527         Address::deprecated(Info.BasePointersArray, CGM.getPointerAlign());
11528     InputInfo.PointersArray =
11529         Address::deprecated(Info.PointersArray, CGM.getPointerAlign());
11530     InputInfo.SizesArray =
11531         Address::deprecated(Info.SizesArray, CGM.getPointerAlign());
11532     InputInfo.MappersArray =
11533         Address::deprecated(Info.MappersArray, CGM.getPointerAlign());
11534     MapTypesArray = Info.MapTypesArray;
11535     MapNamesArray = Info.MapNamesArray;
11536     if (RequiresOuterTask)
11537       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11538     else
11539       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11540   };
11541 
11542   if (IfCond) {
11543     emitIfClause(CGF, IfCond, TargetThenGen,
11544                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11545   } else {
11546     RegionCodeGenTy ThenRCG(TargetThenGen);
11547     ThenRCG(CGF);
11548   }
11549 }
11550 
11551 namespace {
11552   /// Kind of parameter in a function with 'declare simd' directive.
11553   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11554   /// Attribute set of the parameter.
11555   struct ParamAttrTy {
11556     ParamKindTy Kind = Vector;
11557     llvm::APSInt StrideOrArg;
11558     llvm::APSInt Alignment;
11559   };
11560 } // namespace
11561 
11562 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11563                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11564   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11565   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11566   // of that clause. The VLEN value must be power of 2.
11567   // In other case the notion of the function`s "characteristic data type" (CDT)
11568   // is used to compute the vector length.
11569   // CDT is defined in the following order:
11570   //   a) For non-void function, the CDT is the return type.
11571   //   b) If the function has any non-uniform, non-linear parameters, then the
11572   //   CDT is the type of the first such parameter.
11573   //   c) If the CDT determined by a) or b) above is struct, union, or class
11574   //   type which is pass-by-value (except for the type that maps to the
11575   //   built-in complex data type), the characteristic data type is int.
11576   //   d) If none of the above three cases is applicable, the CDT is int.
11577   // The VLEN is then determined based on the CDT and the size of vector
11578   // register of that ISA for which current vector version is generated. The
11579   // VLEN is computed using the formula below:
11580   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11581   // where vector register size specified in section 3.2.1 Registers and the
11582   // Stack Frame of original AMD64 ABI document.
11583   QualType RetType = FD->getReturnType();
11584   if (RetType.isNull())
11585     return 0;
11586   ASTContext &C = FD->getASTContext();
11587   QualType CDT;
11588   if (!RetType.isNull() && !RetType->isVoidType()) {
11589     CDT = RetType;
11590   } else {
11591     unsigned Offset = 0;
11592     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11593       if (ParamAttrs[Offset].Kind == Vector)
11594         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11595       ++Offset;
11596     }
11597     if (CDT.isNull()) {
11598       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11599         if (ParamAttrs[I + Offset].Kind == Vector) {
11600           CDT = FD->getParamDecl(I)->getType();
11601           break;
11602         }
11603       }
11604     }
11605   }
11606   if (CDT.isNull())
11607     CDT = C.IntTy;
11608   CDT = CDT->getCanonicalTypeUnqualified();
11609   if (CDT->isRecordType() || CDT->isUnionType())
11610     CDT = C.IntTy;
11611   return C.getTypeSize(CDT);
11612 }
11613 
11614 static void
11615 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11616                            const llvm::APSInt &VLENVal,
11617                            ArrayRef<ParamAttrTy> ParamAttrs,
11618                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11619   struct ISADataTy {
11620     char ISA;
11621     unsigned VecRegSize;
11622   };
11623   ISADataTy ISAData[] = {
11624       {
11625           'b', 128
11626       }, // SSE
11627       {
11628           'c', 256
11629       }, // AVX
11630       {
11631           'd', 256
11632       }, // AVX2
11633       {
11634           'e', 512
11635       }, // AVX512
11636   };
11637   llvm::SmallVector<char, 2> Masked;
11638   switch (State) {
11639   case OMPDeclareSimdDeclAttr::BS_Undefined:
11640     Masked.push_back('N');
11641     Masked.push_back('M');
11642     break;
11643   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11644     Masked.push_back('N');
11645     break;
11646   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11647     Masked.push_back('M');
11648     break;
11649   }
11650   for (char Mask : Masked) {
11651     for (const ISADataTy &Data : ISAData) {
11652       SmallString<256> Buffer;
11653       llvm::raw_svector_ostream Out(Buffer);
11654       Out << "_ZGV" << Data.ISA << Mask;
11655       if (!VLENVal) {
11656         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11657         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11658         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11659       } else {
11660         Out << VLENVal;
11661       }
11662       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11663         switch (ParamAttr.Kind){
11664         case LinearWithVarStride:
11665           Out << 's' << ParamAttr.StrideOrArg;
11666           break;
11667         case Linear:
11668           Out << 'l';
11669           if (ParamAttr.StrideOrArg != 1)
11670             Out << ParamAttr.StrideOrArg;
11671           break;
11672         case Uniform:
11673           Out << 'u';
11674           break;
11675         case Vector:
11676           Out << 'v';
11677           break;
11678         }
11679         if (!!ParamAttr.Alignment)
11680           Out << 'a' << ParamAttr.Alignment;
11681       }
11682       Out << '_' << Fn->getName();
11683       Fn->addFnAttr(Out.str());
11684     }
11685   }
11686 }
11687 
11688 // This are the Functions that are needed to mangle the name of the
11689 // vector functions generated by the compiler, according to the rules
11690 // defined in the "Vector Function ABI specifications for AArch64",
11691 // available at
11692 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11693 
11694 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11695 ///
11696 /// TODO: Need to implement the behavior for reference marked with a
11697 /// var or no linear modifiers (1.b in the section). For this, we
11698 /// need to extend ParamKindTy to support the linear modifiers.
11699 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11700   QT = QT.getCanonicalType();
11701 
11702   if (QT->isVoidType())
11703     return false;
11704 
11705   if (Kind == ParamKindTy::Uniform)
11706     return false;
11707 
11708   if (Kind == ParamKindTy::Linear)
11709     return false;
11710 
11711   // TODO: Handle linear references with modifiers
11712 
11713   if (Kind == ParamKindTy::LinearWithVarStride)
11714     return false;
11715 
11716   return true;
11717 }
11718 
11719 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11720 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11721   QT = QT.getCanonicalType();
11722   unsigned Size = C.getTypeSize(QT);
11723 
11724   // Only scalars and complex within 16 bytes wide set PVB to true.
11725   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11726     return false;
11727 
11728   if (QT->isFloatingType())
11729     return true;
11730 
11731   if (QT->isIntegerType())
11732     return true;
11733 
11734   if (QT->isPointerType())
11735     return true;
11736 
11737   // TODO: Add support for complex types (section 3.1.2, item 2).
11738 
11739   return false;
11740 }
11741 
11742 /// Computes the lane size (LS) of a return type or of an input parameter,
11743 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11744 /// TODO: Add support for references, section 3.2.1, item 1.
11745 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11746   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11747     QualType PTy = QT.getCanonicalType()->getPointeeType();
11748     if (getAArch64PBV(PTy, C))
11749       return C.getTypeSize(PTy);
11750   }
11751   if (getAArch64PBV(QT, C))
11752     return C.getTypeSize(QT);
11753 
11754   return C.getTypeSize(C.getUIntPtrType());
11755 }
11756 
11757 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11758 // signature of the scalar function, as defined in 3.2.2 of the
11759 // AAVFABI.
11760 static std::tuple<unsigned, unsigned, bool>
11761 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11762   QualType RetType = FD->getReturnType().getCanonicalType();
11763 
11764   ASTContext &C = FD->getASTContext();
11765 
11766   bool OutputBecomesInput = false;
11767 
11768   llvm::SmallVector<unsigned, 8> Sizes;
11769   if (!RetType->isVoidType()) {
11770     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11771     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11772       OutputBecomesInput = true;
11773   }
11774   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11775     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11776     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11777   }
11778 
11779   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11780   // The LS of a function parameter / return value can only be a power
11781   // of 2, starting from 8 bits, up to 128.
11782   assert(llvm::all_of(Sizes,
11783                       [](unsigned Size) {
11784                         return Size == 8 || Size == 16 || Size == 32 ||
11785                                Size == 64 || Size == 128;
11786                       }) &&
11787          "Invalid size");
11788 
11789   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11790                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11791                          OutputBecomesInput);
11792 }
11793 
11794 /// Mangle the parameter part of the vector function name according to
11795 /// their OpenMP classification. The mangling function is defined in
11796 /// section 3.5 of the AAVFABI.
11797 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11798   SmallString<256> Buffer;
11799   llvm::raw_svector_ostream Out(Buffer);
11800   for (const auto &ParamAttr : ParamAttrs) {
11801     switch (ParamAttr.Kind) {
11802     case LinearWithVarStride:
11803       Out << "ls" << ParamAttr.StrideOrArg;
11804       break;
11805     case Linear:
11806       Out << 'l';
11807       // Don't print the step value if it is not present or if it is
11808       // equal to 1.
11809       if (ParamAttr.StrideOrArg != 1)
11810         Out << ParamAttr.StrideOrArg;
11811       break;
11812     case Uniform:
11813       Out << 'u';
11814       break;
11815     case Vector:
11816       Out << 'v';
11817       break;
11818     }
11819 
11820     if (!!ParamAttr.Alignment)
11821       Out << 'a' << ParamAttr.Alignment;
11822   }
11823 
11824   return std::string(Out.str());
11825 }
11826 
11827 // Function used to add the attribute. The parameter `VLEN` is
11828 // templated to allow the use of "x" when targeting scalable functions
11829 // for SVE.
11830 template <typename T>
11831 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11832                                  char ISA, StringRef ParSeq,
11833                                  StringRef MangledName, bool OutputBecomesInput,
11834                                  llvm::Function *Fn) {
11835   SmallString<256> Buffer;
11836   llvm::raw_svector_ostream Out(Buffer);
11837   Out << Prefix << ISA << LMask << VLEN;
11838   if (OutputBecomesInput)
11839     Out << "v";
11840   Out << ParSeq << "_" << MangledName;
11841   Fn->addFnAttr(Out.str());
11842 }
11843 
11844 // Helper function to generate the Advanced SIMD names depending on
11845 // the value of the NDS when simdlen is not present.
11846 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11847                                       StringRef Prefix, char ISA,
11848                                       StringRef ParSeq, StringRef MangledName,
11849                                       bool OutputBecomesInput,
11850                                       llvm::Function *Fn) {
11851   switch (NDS) {
11852   case 8:
11853     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11854                          OutputBecomesInput, Fn);
11855     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11856                          OutputBecomesInput, Fn);
11857     break;
11858   case 16:
11859     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11860                          OutputBecomesInput, Fn);
11861     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11862                          OutputBecomesInput, Fn);
11863     break;
11864   case 32:
11865     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11866                          OutputBecomesInput, Fn);
11867     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11868                          OutputBecomesInput, Fn);
11869     break;
11870   case 64:
11871   case 128:
11872     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11873                          OutputBecomesInput, Fn);
11874     break;
11875   default:
11876     llvm_unreachable("Scalar type is too wide.");
11877   }
11878 }
11879 
11880 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11881 static void emitAArch64DeclareSimdFunction(
11882     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11883     ArrayRef<ParamAttrTy> ParamAttrs,
11884     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11885     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11886 
11887   // Get basic data for building the vector signature.
11888   const auto Data = getNDSWDS(FD, ParamAttrs);
11889   const unsigned NDS = std::get<0>(Data);
11890   const unsigned WDS = std::get<1>(Data);
11891   const bool OutputBecomesInput = std::get<2>(Data);
11892 
11893   // Check the values provided via `simdlen` by the user.
11894   // 1. A `simdlen(1)` doesn't produce vector signatures,
11895   if (UserVLEN == 1) {
11896     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11897         DiagnosticsEngine::Warning,
11898         "The clause simdlen(1) has no effect when targeting aarch64.");
11899     CGM.getDiags().Report(SLoc, DiagID);
11900     return;
11901   }
11902 
11903   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11904   // Advanced SIMD output.
11905   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11906     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11907         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11908                                     "power of 2 when targeting Advanced SIMD.");
11909     CGM.getDiags().Report(SLoc, DiagID);
11910     return;
11911   }
11912 
11913   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11914   // limits.
11915   if (ISA == 's' && UserVLEN != 0) {
11916     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11917       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11918           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11919                                       "lanes in the architectural constraints "
11920                                       "for SVE (min is 128-bit, max is "
11921                                       "2048-bit, by steps of 128-bit)");
11922       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11923       return;
11924     }
11925   }
11926 
11927   // Sort out parameter sequence.
11928   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11929   StringRef Prefix = "_ZGV";
11930   // Generate simdlen from user input (if any).
11931   if (UserVLEN) {
11932     if (ISA == 's') {
11933       // SVE generates only a masked function.
11934       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11935                            OutputBecomesInput, Fn);
11936     } else {
11937       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11938       // Advanced SIMD generates one or two functions, depending on
11939       // the `[not]inbranch` clause.
11940       switch (State) {
11941       case OMPDeclareSimdDeclAttr::BS_Undefined:
11942         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11943                              OutputBecomesInput, Fn);
11944         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11945                              OutputBecomesInput, Fn);
11946         break;
11947       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11948         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11949                              OutputBecomesInput, Fn);
11950         break;
11951       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11952         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11953                              OutputBecomesInput, Fn);
11954         break;
11955       }
11956     }
11957   } else {
11958     // If no user simdlen is provided, follow the AAVFABI rules for
11959     // generating the vector length.
11960     if (ISA == 's') {
11961       // SVE, section 3.4.1, item 1.
11962       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11963                            OutputBecomesInput, Fn);
11964     } else {
11965       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11966       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11967       // two vector names depending on the use of the clause
11968       // `[not]inbranch`.
11969       switch (State) {
11970       case OMPDeclareSimdDeclAttr::BS_Undefined:
11971         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11972                                   OutputBecomesInput, Fn);
11973         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11974                                   OutputBecomesInput, Fn);
11975         break;
11976       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11977         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11978                                   OutputBecomesInput, Fn);
11979         break;
11980       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11981         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11982                                   OutputBecomesInput, Fn);
11983         break;
11984       }
11985     }
11986   }
11987 }
11988 
11989 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11990                                               llvm::Function *Fn) {
11991   ASTContext &C = CGM.getContext();
11992   FD = FD->getMostRecentDecl();
11993   // Map params to their positions in function decl.
11994   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11995   if (isa<CXXMethodDecl>(FD))
11996     ParamPositions.try_emplace(FD, 0);
11997   unsigned ParamPos = ParamPositions.size();
11998   for (const ParmVarDecl *P : FD->parameters()) {
11999     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
12000     ++ParamPos;
12001   }
12002   while (FD) {
12003     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12004       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
12005       // Mark uniform parameters.
12006       for (const Expr *E : Attr->uniforms()) {
12007         E = E->IgnoreParenImpCasts();
12008         unsigned Pos;
12009         if (isa<CXXThisExpr>(E)) {
12010           Pos = ParamPositions[FD];
12011         } else {
12012           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12013                                 ->getCanonicalDecl();
12014           Pos = ParamPositions[PVD];
12015         }
12016         ParamAttrs[Pos].Kind = Uniform;
12017       }
12018       // Get alignment info.
12019       auto *NI = Attr->alignments_begin();
12020       for (const Expr *E : Attr->aligneds()) {
12021         E = E->IgnoreParenImpCasts();
12022         unsigned Pos;
12023         QualType ParmTy;
12024         if (isa<CXXThisExpr>(E)) {
12025           Pos = ParamPositions[FD];
12026           ParmTy = E->getType();
12027         } else {
12028           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12029                                 ->getCanonicalDecl();
12030           Pos = ParamPositions[PVD];
12031           ParmTy = PVD->getType();
12032         }
12033         ParamAttrs[Pos].Alignment =
12034             (*NI)
12035                 ? (*NI)->EvaluateKnownConstInt(C)
12036                 : llvm::APSInt::getUnsigned(
12037                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12038                           .getQuantity());
12039         ++NI;
12040       }
12041       // Mark linear parameters.
12042       auto *SI = Attr->steps_begin();
12043       auto *MI = Attr->modifiers_begin();
12044       for (const Expr *E : Attr->linears()) {
12045         E = E->IgnoreParenImpCasts();
12046         unsigned Pos;
12047         // Rescaling factor needed to compute the linear parameter
12048         // value in the mangled name.
12049         unsigned PtrRescalingFactor = 1;
12050         if (isa<CXXThisExpr>(E)) {
12051           Pos = ParamPositions[FD];
12052         } else {
12053           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12054                                 ->getCanonicalDecl();
12055           Pos = ParamPositions[PVD];
12056           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12057             PtrRescalingFactor = CGM.getContext()
12058                                      .getTypeSizeInChars(P->getPointeeType())
12059                                      .getQuantity();
12060         }
12061         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12062         ParamAttr.Kind = Linear;
12063         // Assuming a stride of 1, for `linear` without modifiers.
12064         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12065         if (*SI) {
12066           Expr::EvalResult Result;
12067           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12068             if (const auto *DRE =
12069                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12070               if (const auto *StridePVD =
12071                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12072                 ParamAttr.Kind = LinearWithVarStride;
12073                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12074                     ParamPositions[StridePVD->getCanonicalDecl()]);
12075               }
12076             }
12077           } else {
12078             ParamAttr.StrideOrArg = Result.Val.getInt();
12079           }
12080         }
12081         // If we are using a linear clause on a pointer, we need to
12082         // rescale the value of linear_step with the byte size of the
12083         // pointee type.
12084         if (Linear == ParamAttr.Kind)
12085           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12086         ++SI;
12087         ++MI;
12088       }
12089       llvm::APSInt VLENVal;
12090       SourceLocation ExprLoc;
12091       const Expr *VLENExpr = Attr->getSimdlen();
12092       if (VLENExpr) {
12093         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12094         ExprLoc = VLENExpr->getExprLoc();
12095       }
12096       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12097       if (CGM.getTriple().isX86()) {
12098         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12099       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12100         unsigned VLEN = VLENVal.getExtValue();
12101         StringRef MangledName = Fn->getName();
12102         if (CGM.getTarget().hasFeature("sve"))
12103           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12104                                          MangledName, 's', 128, Fn, ExprLoc);
12105         if (CGM.getTarget().hasFeature("neon"))
12106           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12107                                          MangledName, 'n', 128, Fn, ExprLoc);
12108       }
12109     }
12110     FD = FD->getPreviousDecl();
12111   }
12112 }
12113 
12114 namespace {
12115 /// Cleanup action for doacross support.
12116 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12117 public:
12118   static const int DoacrossFinArgs = 2;
12119 
12120 private:
12121   llvm::FunctionCallee RTLFn;
12122   llvm::Value *Args[DoacrossFinArgs];
12123 
12124 public:
12125   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12126                     ArrayRef<llvm::Value *> CallArgs)
12127       : RTLFn(RTLFn) {
12128     assert(CallArgs.size() == DoacrossFinArgs);
12129     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12130   }
12131   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12132     if (!CGF.HaveInsertPoint())
12133       return;
12134     CGF.EmitRuntimeCall(RTLFn, Args);
12135   }
12136 };
12137 } // namespace
12138 
12139 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12140                                        const OMPLoopDirective &D,
12141                                        ArrayRef<Expr *> NumIterations) {
12142   if (!CGF.HaveInsertPoint())
12143     return;
12144 
12145   ASTContext &C = CGM.getContext();
12146   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12147   RecordDecl *RD;
12148   if (KmpDimTy.isNull()) {
12149     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12150     //  kmp_int64 lo; // lower
12151     //  kmp_int64 up; // upper
12152     //  kmp_int64 st; // stride
12153     // };
12154     RD = C.buildImplicitRecord("kmp_dim");
12155     RD->startDefinition();
12156     addFieldToRecordDecl(C, RD, Int64Ty);
12157     addFieldToRecordDecl(C, RD, Int64Ty);
12158     addFieldToRecordDecl(C, RD, Int64Ty);
12159     RD->completeDefinition();
12160     KmpDimTy = C.getRecordType(RD);
12161   } else {
12162     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12163   }
12164   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12165   QualType ArrayTy =
12166       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12167 
12168   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12169   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12170   enum { LowerFD = 0, UpperFD, StrideFD };
12171   // Fill dims with data.
12172   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12173     LValue DimsLVal = CGF.MakeAddrLValue(
12174         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12175     // dims.upper = num_iterations;
12176     LValue UpperLVal = CGF.EmitLValueForField(
12177         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12178     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12179         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12180         Int64Ty, NumIterations[I]->getExprLoc());
12181     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12182     // dims.stride = 1;
12183     LValue StrideLVal = CGF.EmitLValueForField(
12184         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12185     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12186                           StrideLVal);
12187   }
12188 
12189   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12190   // kmp_int32 num_dims, struct kmp_dim * dims);
12191   llvm::Value *Args[] = {
12192       emitUpdateLocation(CGF, D.getBeginLoc()),
12193       getThreadID(CGF, D.getBeginLoc()),
12194       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12195       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12196           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12197           CGM.VoidPtrTy)};
12198 
12199   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12200       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12201   CGF.EmitRuntimeCall(RTLFn, Args);
12202   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12203       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12204   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12205       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12206   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12207                                              llvm::makeArrayRef(FiniArgs));
12208 }
12209 
12210 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12211                                           const OMPDependClause *C) {
12212   QualType Int64Ty =
12213       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12214   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12215   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12216       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12217   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12218   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12219     const Expr *CounterVal = C->getLoopData(I);
12220     assert(CounterVal);
12221     llvm::Value *CntVal = CGF.EmitScalarConversion(
12222         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12223         CounterVal->getExprLoc());
12224     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12225                           /*Volatile=*/false, Int64Ty);
12226   }
12227   llvm::Value *Args[] = {
12228       emitUpdateLocation(CGF, C->getBeginLoc()),
12229       getThreadID(CGF, C->getBeginLoc()),
12230       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12231   llvm::FunctionCallee RTLFn;
12232   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12233     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12234                                                   OMPRTL___kmpc_doacross_post);
12235   } else {
12236     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12237     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12238                                                   OMPRTL___kmpc_doacross_wait);
12239   }
12240   CGF.EmitRuntimeCall(RTLFn, Args);
12241 }
12242 
12243 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12244                                llvm::FunctionCallee Callee,
12245                                ArrayRef<llvm::Value *> Args) const {
12246   assert(Loc.isValid() && "Outlined function call location must be valid.");
12247   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12248 
12249   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12250     if (Fn->doesNotThrow()) {
12251       CGF.EmitNounwindRuntimeCall(Fn, Args);
12252       return;
12253     }
12254   }
12255   CGF.EmitRuntimeCall(Callee, Args);
12256 }
12257 
12258 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12259     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12260     ArrayRef<llvm::Value *> Args) const {
12261   emitCall(CGF, Loc, OutlinedFn, Args);
12262 }
12263 
12264 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12265   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12266     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12267       HasEmittedDeclareTargetRegion = true;
12268 }
12269 
12270 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12271                                              const VarDecl *NativeParam,
12272                                              const VarDecl *TargetParam) const {
12273   return CGF.GetAddrOfLocalVar(NativeParam);
12274 }
12275 
12276 /// Return allocator value from expression, or return a null allocator (default
12277 /// when no allocator specified).
12278 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12279                                     const Expr *Allocator) {
12280   llvm::Value *AllocVal;
12281   if (Allocator) {
12282     AllocVal = CGF.EmitScalarExpr(Allocator);
12283     // According to the standard, the original allocator type is a enum
12284     // (integer). Convert to pointer type, if required.
12285     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12286                                         CGF.getContext().VoidPtrTy,
12287                                         Allocator->getExprLoc());
12288   } else {
12289     // If no allocator specified, it defaults to the null allocator.
12290     AllocVal = llvm::Constant::getNullValue(
12291         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12292   }
12293   return AllocVal;
12294 }
12295 
12296 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12297                                                    const VarDecl *VD) {
12298   if (!VD)
12299     return Address::invalid();
12300   Address UntiedAddr = Address::invalid();
12301   Address UntiedRealAddr = Address::invalid();
12302   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12303   if (It != FunctionToUntiedTaskStackMap.end()) {
12304     const UntiedLocalVarsAddressesMap &UntiedData =
12305         UntiedLocalVarsStack[It->second];
12306     auto I = UntiedData.find(VD);
12307     if (I != UntiedData.end()) {
12308       UntiedAddr = I->second.first;
12309       UntiedRealAddr = I->second.second;
12310     }
12311   }
12312   const VarDecl *CVD = VD->getCanonicalDecl();
12313   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12314     // Use the default allocation.
12315     if (!isAllocatableDecl(VD))
12316       return UntiedAddr;
12317     llvm::Value *Size;
12318     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12319     if (CVD->getType()->isVariablyModifiedType()) {
12320       Size = CGF.getTypeSize(CVD->getType());
12321       // Align the size: ((size + align - 1) / align) * align
12322       Size = CGF.Builder.CreateNUWAdd(
12323           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12324       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12325       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12326     } else {
12327       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12328       Size = CGM.getSize(Sz.alignTo(Align));
12329     }
12330     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12331     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12332     const Expr *Allocator = AA->getAllocator();
12333     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12334     llvm::Value *Alignment =
12335         AA->getAlignment()
12336             ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12337                                         CGM.SizeTy, /*isSigned=*/false)
12338             : nullptr;
12339     SmallVector<llvm::Value *, 4> Args;
12340     Args.push_back(ThreadID);
12341     if (Alignment)
12342       Args.push_back(Alignment);
12343     Args.push_back(Size);
12344     Args.push_back(AllocVal);
12345     llvm::omp::RuntimeFunction FnID =
12346         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12347     llvm::Value *Addr = CGF.EmitRuntimeCall(
12348         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12349         getName({CVD->getName(), ".void.addr"}));
12350     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12351         CGM.getModule(), OMPRTL___kmpc_free);
12352     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12353     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12354         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12355     if (UntiedAddr.isValid())
12356       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12357 
12358     // Cleanup action for allocate support.
12359     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12360       llvm::FunctionCallee RTLFn;
12361       SourceLocation::UIntTy LocEncoding;
12362       Address Addr;
12363       const Expr *AllocExpr;
12364 
12365     public:
12366       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12367                            SourceLocation::UIntTy LocEncoding, Address Addr,
12368                            const Expr *AllocExpr)
12369           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12370             AllocExpr(AllocExpr) {}
12371       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12372         if (!CGF.HaveInsertPoint())
12373           return;
12374         llvm::Value *Args[3];
12375         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12376             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12377         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12378             Addr.getPointer(), CGF.VoidPtrTy);
12379         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12380         Args[2] = AllocVal;
12381         CGF.EmitRuntimeCall(RTLFn, Args);
12382       }
12383     };
12384     Address VDAddr = UntiedRealAddr.isValid()
12385                          ? UntiedRealAddr
12386                          : Address::deprecated(Addr, Align);
12387     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12388         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12389         VDAddr, Allocator);
12390     if (UntiedRealAddr.isValid())
12391       if (auto *Region =
12392               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12393         Region->emitUntiedSwitch(CGF);
12394     return VDAddr;
12395   }
12396   return UntiedAddr;
12397 }
12398 
12399 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12400                                              const VarDecl *VD) const {
12401   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12402   if (It == FunctionToUntiedTaskStackMap.end())
12403     return false;
12404   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12405 }
12406 
12407 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12408     CodeGenModule &CGM, const OMPLoopDirective &S)
12409     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12410   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12411   if (!NeedToPush)
12412     return;
12413   NontemporalDeclsSet &DS =
12414       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12415   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12416     for (const Stmt *Ref : C->private_refs()) {
12417       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12418       const ValueDecl *VD;
12419       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12420         VD = DRE->getDecl();
12421       } else {
12422         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12423         assert((ME->isImplicitCXXThis() ||
12424                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12425                "Expected member of current class.");
12426         VD = ME->getMemberDecl();
12427       }
12428       DS.insert(VD);
12429     }
12430   }
12431 }
12432 
12433 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12434   if (!NeedToPush)
12435     return;
12436   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12437 }
12438 
12439 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12440     CodeGenFunction &CGF,
12441     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12442                           std::pair<Address, Address>> &LocalVars)
12443     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12444   if (!NeedToPush)
12445     return;
12446   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12447       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12448   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12449 }
12450 
12451 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12452   if (!NeedToPush)
12453     return;
12454   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12455 }
12456 
12457 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12458   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12459 
12460   return llvm::any_of(
12461       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12462       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12463 }
12464 
12465 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12466     const OMPExecutableDirective &S,
12467     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12468     const {
12469   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12470   // Vars in target/task regions must be excluded completely.
12471   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12472       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12473     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12474     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12475     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12476     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12477       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12478         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12479     }
12480   }
12481   // Exclude vars in private clauses.
12482   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12483     for (const Expr *Ref : C->varlists()) {
12484       if (!Ref->getType()->isScalarType())
12485         continue;
12486       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12487       if (!DRE)
12488         continue;
12489       NeedToCheckForLPCs.insert(DRE->getDecl());
12490     }
12491   }
12492   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12493     for (const Expr *Ref : C->varlists()) {
12494       if (!Ref->getType()->isScalarType())
12495         continue;
12496       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12497       if (!DRE)
12498         continue;
12499       NeedToCheckForLPCs.insert(DRE->getDecl());
12500     }
12501   }
12502   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12503     for (const Expr *Ref : C->varlists()) {
12504       if (!Ref->getType()->isScalarType())
12505         continue;
12506       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12507       if (!DRE)
12508         continue;
12509       NeedToCheckForLPCs.insert(DRE->getDecl());
12510     }
12511   }
12512   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12513     for (const Expr *Ref : C->varlists()) {
12514       if (!Ref->getType()->isScalarType())
12515         continue;
12516       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12517       if (!DRE)
12518         continue;
12519       NeedToCheckForLPCs.insert(DRE->getDecl());
12520     }
12521   }
12522   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12523     for (const Expr *Ref : C->varlists()) {
12524       if (!Ref->getType()->isScalarType())
12525         continue;
12526       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12527       if (!DRE)
12528         continue;
12529       NeedToCheckForLPCs.insert(DRE->getDecl());
12530     }
12531   }
12532   for (const Decl *VD : NeedToCheckForLPCs) {
12533     for (const LastprivateConditionalData &Data :
12534          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12535       if (Data.DeclToUniqueName.count(VD) > 0) {
12536         if (!Data.Disabled)
12537           NeedToAddForLPCsAsDisabled.insert(VD);
12538         break;
12539       }
12540     }
12541   }
12542 }
12543 
12544 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12545     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12546     : CGM(CGF.CGM),
12547       Action((CGM.getLangOpts().OpenMP >= 50 &&
12548               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12549                            [](const OMPLastprivateClause *C) {
12550                              return C->getKind() ==
12551                                     OMPC_LASTPRIVATE_conditional;
12552                            }))
12553                  ? ActionToDo::PushAsLastprivateConditional
12554                  : ActionToDo::DoNotPush) {
12555   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12556   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12557     return;
12558   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12559          "Expected a push action.");
12560   LastprivateConditionalData &Data =
12561       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12562   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12563     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12564       continue;
12565 
12566     for (const Expr *Ref : C->varlists()) {
12567       Data.DeclToUniqueName.insert(std::make_pair(
12568           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12569           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12570     }
12571   }
12572   Data.IVLVal = IVLVal;
12573   Data.Fn = CGF.CurFn;
12574 }
12575 
12576 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12577     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12578     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12579   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12580   if (CGM.getLangOpts().OpenMP < 50)
12581     return;
12582   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12583   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12584   if (!NeedToAddForLPCsAsDisabled.empty()) {
12585     Action = ActionToDo::DisableLastprivateConditional;
12586     LastprivateConditionalData &Data =
12587         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12588     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12589       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12590     Data.Fn = CGF.CurFn;
12591     Data.Disabled = true;
12592   }
12593 }
12594 
12595 CGOpenMPRuntime::LastprivateConditionalRAII
12596 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12597     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12598   return LastprivateConditionalRAII(CGF, S);
12599 }
12600 
12601 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12602   if (CGM.getLangOpts().OpenMP < 50)
12603     return;
12604   if (Action == ActionToDo::DisableLastprivateConditional) {
12605     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12606            "Expected list of disabled private vars.");
12607     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12608   }
12609   if (Action == ActionToDo::PushAsLastprivateConditional) {
12610     assert(
12611         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12612         "Expected list of lastprivate conditional vars.");
12613     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12614   }
12615 }
12616 
12617 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12618                                                         const VarDecl *VD) {
12619   ASTContext &C = CGM.getContext();
12620   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12621   if (I == LastprivateConditionalToTypes.end())
12622     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12623   QualType NewType;
12624   const FieldDecl *VDField;
12625   const FieldDecl *FiredField;
12626   LValue BaseLVal;
12627   auto VI = I->getSecond().find(VD);
12628   if (VI == I->getSecond().end()) {
12629     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12630     RD->startDefinition();
12631     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12632     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12633     RD->completeDefinition();
12634     NewType = C.getRecordType(RD);
12635     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12636     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12637     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12638   } else {
12639     NewType = std::get<0>(VI->getSecond());
12640     VDField = std::get<1>(VI->getSecond());
12641     FiredField = std::get<2>(VI->getSecond());
12642     BaseLVal = std::get<3>(VI->getSecond());
12643   }
12644   LValue FiredLVal =
12645       CGF.EmitLValueForField(BaseLVal, FiredField);
12646   CGF.EmitStoreOfScalar(
12647       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12648       FiredLVal);
12649   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12650 }
12651 
12652 namespace {
12653 /// Checks if the lastprivate conditional variable is referenced in LHS.
12654 class LastprivateConditionalRefChecker final
12655     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12656   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12657   const Expr *FoundE = nullptr;
12658   const Decl *FoundD = nullptr;
12659   StringRef UniqueDeclName;
12660   LValue IVLVal;
12661   llvm::Function *FoundFn = nullptr;
12662   SourceLocation Loc;
12663 
12664 public:
12665   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12666     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12667          llvm::reverse(LPM)) {
12668       auto It = D.DeclToUniqueName.find(E->getDecl());
12669       if (It == D.DeclToUniqueName.end())
12670         continue;
12671       if (D.Disabled)
12672         return false;
12673       FoundE = E;
12674       FoundD = E->getDecl()->getCanonicalDecl();
12675       UniqueDeclName = It->second;
12676       IVLVal = D.IVLVal;
12677       FoundFn = D.Fn;
12678       break;
12679     }
12680     return FoundE == E;
12681   }
12682   bool VisitMemberExpr(const MemberExpr *E) {
12683     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12684       return false;
12685     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12686          llvm::reverse(LPM)) {
12687       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12688       if (It == D.DeclToUniqueName.end())
12689         continue;
12690       if (D.Disabled)
12691         return false;
12692       FoundE = E;
12693       FoundD = E->getMemberDecl()->getCanonicalDecl();
12694       UniqueDeclName = It->second;
12695       IVLVal = D.IVLVal;
12696       FoundFn = D.Fn;
12697       break;
12698     }
12699     return FoundE == E;
12700   }
12701   bool VisitStmt(const Stmt *S) {
12702     for (const Stmt *Child : S->children()) {
12703       if (!Child)
12704         continue;
12705       if (const auto *E = dyn_cast<Expr>(Child))
12706         if (!E->isGLValue())
12707           continue;
12708       if (Visit(Child))
12709         return true;
12710     }
12711     return false;
12712   }
12713   explicit LastprivateConditionalRefChecker(
12714       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12715       : LPM(LPM) {}
12716   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12717   getFoundData() const {
12718     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12719   }
12720 };
12721 } // namespace
12722 
12723 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12724                                                        LValue IVLVal,
12725                                                        StringRef UniqueDeclName,
12726                                                        LValue LVal,
12727                                                        SourceLocation Loc) {
12728   // Last updated loop counter for the lastprivate conditional var.
12729   // int<xx> last_iv = 0;
12730   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12731   llvm::Constant *LastIV =
12732       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12733   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12734       IVLVal.getAlignment().getAsAlign());
12735   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12736 
12737   // Last value of the lastprivate conditional.
12738   // decltype(priv_a) last_a;
12739   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12740       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12741   Last->setAlignment(LVal.getAlignment().getAsAlign());
12742   LValue LastLVal = CGF.MakeAddrLValue(
12743       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12744 
12745   // Global loop counter. Required to handle inner parallel-for regions.
12746   // iv
12747   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12748 
12749   // #pragma omp critical(a)
12750   // if (last_iv <= iv) {
12751   //   last_iv = iv;
12752   //   last_a = priv_a;
12753   // }
12754   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12755                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12756     Action.Enter(CGF);
12757     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12758     // (last_iv <= iv) ? Check if the variable is updated and store new
12759     // value in global var.
12760     llvm::Value *CmpRes;
12761     if (IVLVal.getType()->isSignedIntegerType()) {
12762       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12763     } else {
12764       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12765              "Loop iteration variable must be integer.");
12766       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12767     }
12768     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12769     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12770     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12771     // {
12772     CGF.EmitBlock(ThenBB);
12773 
12774     //   last_iv = iv;
12775     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12776 
12777     //   last_a = priv_a;
12778     switch (CGF.getEvaluationKind(LVal.getType())) {
12779     case TEK_Scalar: {
12780       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12781       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12782       break;
12783     }
12784     case TEK_Complex: {
12785       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12786       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12787       break;
12788     }
12789     case TEK_Aggregate:
12790       llvm_unreachable(
12791           "Aggregates are not supported in lastprivate conditional.");
12792     }
12793     // }
12794     CGF.EmitBranch(ExitBB);
12795     // There is no need to emit line number for unconditional branch.
12796     (void)ApplyDebugLocation::CreateEmpty(CGF);
12797     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12798   };
12799 
12800   if (CGM.getLangOpts().OpenMPSimd) {
12801     // Do not emit as a critical region as no parallel region could be emitted.
12802     RegionCodeGenTy ThenRCG(CodeGen);
12803     ThenRCG(CGF);
12804   } else {
12805     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12806   }
12807 }
12808 
12809 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12810                                                          const Expr *LHS) {
12811   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12812     return;
12813   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12814   if (!Checker.Visit(LHS))
12815     return;
12816   const Expr *FoundE;
12817   const Decl *FoundD;
12818   StringRef UniqueDeclName;
12819   LValue IVLVal;
12820   llvm::Function *FoundFn;
12821   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12822       Checker.getFoundData();
12823   if (FoundFn != CGF.CurFn) {
12824     // Special codegen for inner parallel regions.
12825     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12826     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12827     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12828            "Lastprivate conditional is not found in outer region.");
12829     QualType StructTy = std::get<0>(It->getSecond());
12830     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12831     LValue PrivLVal = CGF.EmitLValue(FoundE);
12832     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12833         PrivLVal.getAddress(CGF),
12834         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12835         CGF.ConvertTypeForMem(StructTy));
12836     LValue BaseLVal =
12837         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12838     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12839     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12840                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12841                         FiredLVal, llvm::AtomicOrdering::Unordered,
12842                         /*IsVolatile=*/true, /*isInit=*/false);
12843     return;
12844   }
12845 
12846   // Private address of the lastprivate conditional in the current context.
12847   // priv_a
12848   LValue LVal = CGF.EmitLValue(FoundE);
12849   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12850                                    FoundE->getExprLoc());
12851 }
12852 
12853 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12854     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12855     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12856   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12857     return;
12858   auto Range = llvm::reverse(LastprivateConditionalStack);
12859   auto It = llvm::find_if(
12860       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12861   if (It == Range.end() || It->Fn != CGF.CurFn)
12862     return;
12863   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12864   assert(LPCI != LastprivateConditionalToTypes.end() &&
12865          "Lastprivates must be registered already.");
12866   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12867   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12868   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12869   for (const auto &Pair : It->DeclToUniqueName) {
12870     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12871     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12872       continue;
12873     auto I = LPCI->getSecond().find(Pair.first);
12874     assert(I != LPCI->getSecond().end() &&
12875            "Lastprivate must be rehistered already.");
12876     // bool Cmp = priv_a.Fired != 0;
12877     LValue BaseLVal = std::get<3>(I->getSecond());
12878     LValue FiredLVal =
12879         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12880     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12881     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12882     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12883     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12884     // if (Cmp) {
12885     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12886     CGF.EmitBlock(ThenBB);
12887     Address Addr = CGF.GetAddrOfLocalVar(VD);
12888     LValue LVal;
12889     if (VD->getType()->isReferenceType())
12890       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12891                                            AlignmentSource::Decl);
12892     else
12893       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12894                                 AlignmentSource::Decl);
12895     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12896                                      D.getBeginLoc());
12897     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12898     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12899     // }
12900   }
12901 }
12902 
12903 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12904     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12905     SourceLocation Loc) {
12906   if (CGF.getLangOpts().OpenMP < 50)
12907     return;
12908   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12909   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12910          "Unknown lastprivate conditional variable.");
12911   StringRef UniqueName = It->second;
12912   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12913   // The variable was not updated in the region - exit.
12914   if (!GV)
12915     return;
12916   LValue LPLVal = CGF.MakeAddrLValue(
12917       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12918       PrivLVal.getType().getNonReferenceType());
12919   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12920   CGF.EmitStoreOfScalar(Res, PrivLVal);
12921 }
12922 
12923 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12924     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12925     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12926   llvm_unreachable("Not supported in SIMD-only mode");
12927 }
12928 
12929 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12930     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12931     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12932   llvm_unreachable("Not supported in SIMD-only mode");
12933 }
12934 
12935 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12936     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12937     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12938     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12939     bool Tied, unsigned &NumberOfParts) {
12940   llvm_unreachable("Not supported in SIMD-only mode");
12941 }
12942 
12943 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12944                                            SourceLocation Loc,
12945                                            llvm::Function *OutlinedFn,
12946                                            ArrayRef<llvm::Value *> CapturedVars,
12947                                            const Expr *IfCond,
12948                                            llvm::Value *NumThreads) {
12949   llvm_unreachable("Not supported in SIMD-only mode");
12950 }
12951 
12952 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12953     CodeGenFunction &CGF, StringRef CriticalName,
12954     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12955     const Expr *Hint) {
12956   llvm_unreachable("Not supported in SIMD-only mode");
12957 }
12958 
12959 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12960                                            const RegionCodeGenTy &MasterOpGen,
12961                                            SourceLocation Loc) {
12962   llvm_unreachable("Not supported in SIMD-only mode");
12963 }
12964 
12965 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12966                                            const RegionCodeGenTy &MasterOpGen,
12967                                            SourceLocation Loc,
12968                                            const Expr *Filter) {
12969   llvm_unreachable("Not supported in SIMD-only mode");
12970 }
12971 
12972 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12973                                             SourceLocation Loc) {
12974   llvm_unreachable("Not supported in SIMD-only mode");
12975 }
12976 
12977 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12978     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12979     SourceLocation Loc) {
12980   llvm_unreachable("Not supported in SIMD-only mode");
12981 }
12982 
12983 void CGOpenMPSIMDRuntime::emitSingleRegion(
12984     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12985     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12986     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12987     ArrayRef<const Expr *> AssignmentOps) {
12988   llvm_unreachable("Not supported in SIMD-only mode");
12989 }
12990 
12991 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12992                                             const RegionCodeGenTy &OrderedOpGen,
12993                                             SourceLocation Loc,
12994                                             bool IsThreads) {
12995   llvm_unreachable("Not supported in SIMD-only mode");
12996 }
12997 
12998 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12999                                           SourceLocation Loc,
13000                                           OpenMPDirectiveKind Kind,
13001                                           bool EmitChecks,
13002                                           bool ForceSimpleCall) {
13003   llvm_unreachable("Not supported in SIMD-only mode");
13004 }
13005 
13006 void CGOpenMPSIMDRuntime::emitForDispatchInit(
13007     CodeGenFunction &CGF, SourceLocation Loc,
13008     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13009     bool Ordered, const DispatchRTInput &DispatchValues) {
13010   llvm_unreachable("Not supported in SIMD-only mode");
13011 }
13012 
13013 void CGOpenMPSIMDRuntime::emitForStaticInit(
13014     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
13015     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13016   llvm_unreachable("Not supported in SIMD-only mode");
13017 }
13018 
13019 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
13020     CodeGenFunction &CGF, SourceLocation Loc,
13021     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13022   llvm_unreachable("Not supported in SIMD-only mode");
13023 }
13024 
13025 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
13026                                                      SourceLocation Loc,
13027                                                      unsigned IVSize,
13028                                                      bool IVSigned) {
13029   llvm_unreachable("Not supported in SIMD-only mode");
13030 }
13031 
13032 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
13033                                               SourceLocation Loc,
13034                                               OpenMPDirectiveKind DKind) {
13035   llvm_unreachable("Not supported in SIMD-only mode");
13036 }
13037 
13038 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
13039                                               SourceLocation Loc,
13040                                               unsigned IVSize, bool IVSigned,
13041                                               Address IL, Address LB,
13042                                               Address UB, Address ST) {
13043   llvm_unreachable("Not supported in SIMD-only mode");
13044 }
13045 
13046 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
13047                                                llvm::Value *NumThreads,
13048                                                SourceLocation Loc) {
13049   llvm_unreachable("Not supported in SIMD-only mode");
13050 }
13051 
13052 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13053                                              ProcBindKind ProcBind,
13054                                              SourceLocation Loc) {
13055   llvm_unreachable("Not supported in SIMD-only mode");
13056 }
13057 
13058 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13059                                                     const VarDecl *VD,
13060                                                     Address VDAddr,
13061                                                     SourceLocation Loc) {
13062   llvm_unreachable("Not supported in SIMD-only mode");
13063 }
13064 
13065 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13066     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13067     CodeGenFunction *CGF) {
13068   llvm_unreachable("Not supported in SIMD-only mode");
13069 }
13070 
13071 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13072     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13073   llvm_unreachable("Not supported in SIMD-only mode");
13074 }
13075 
13076 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13077                                     ArrayRef<const Expr *> Vars,
13078                                     SourceLocation Loc,
13079                                     llvm::AtomicOrdering AO) {
13080   llvm_unreachable("Not supported in SIMD-only mode");
13081 }
13082 
13083 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13084                                        const OMPExecutableDirective &D,
13085                                        llvm::Function *TaskFunction,
13086                                        QualType SharedsTy, Address Shareds,
13087                                        const Expr *IfCond,
13088                                        const OMPTaskDataTy &Data) {
13089   llvm_unreachable("Not supported in SIMD-only mode");
13090 }
13091 
13092 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13093     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13094     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13095     const Expr *IfCond, const OMPTaskDataTy &Data) {
13096   llvm_unreachable("Not supported in SIMD-only mode");
13097 }
13098 
13099 void CGOpenMPSIMDRuntime::emitReduction(
13100     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13101     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13102     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13103   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13104   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13105                                  ReductionOps, Options);
13106 }
13107 
13108 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13109     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13110     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13111   llvm_unreachable("Not supported in SIMD-only mode");
13112 }
13113 
13114 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13115                                                 SourceLocation Loc,
13116                                                 bool IsWorksharingReduction) {
13117   llvm_unreachable("Not supported in SIMD-only mode");
13118 }
13119 
13120 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13121                                                   SourceLocation Loc,
13122                                                   ReductionCodeGen &RCG,
13123                                                   unsigned N) {
13124   llvm_unreachable("Not supported in SIMD-only mode");
13125 }
13126 
13127 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13128                                                   SourceLocation Loc,
13129                                                   llvm::Value *ReductionsPtr,
13130                                                   LValue SharedLVal) {
13131   llvm_unreachable("Not supported in SIMD-only mode");
13132 }
13133 
13134 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13135                                            SourceLocation Loc,
13136                                            const OMPTaskDataTy &Data) {
13137   llvm_unreachable("Not supported in SIMD-only mode");
13138 }
13139 
13140 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13141     CodeGenFunction &CGF, SourceLocation Loc,
13142     OpenMPDirectiveKind CancelRegion) {
13143   llvm_unreachable("Not supported in SIMD-only mode");
13144 }
13145 
13146 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13147                                          SourceLocation Loc, const Expr *IfCond,
13148                                          OpenMPDirectiveKind CancelRegion) {
13149   llvm_unreachable("Not supported in SIMD-only mode");
13150 }
13151 
13152 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13153     const OMPExecutableDirective &D, StringRef ParentName,
13154     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13155     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13156   llvm_unreachable("Not supported in SIMD-only mode");
13157 }
13158 
13159 void CGOpenMPSIMDRuntime::emitTargetCall(
13160     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13161     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13162     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13163     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13164                                      const OMPLoopDirective &D)>
13165         SizeEmitter) {
13166   llvm_unreachable("Not supported in SIMD-only mode");
13167 }
13168 
13169 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13170   llvm_unreachable("Not supported in SIMD-only mode");
13171 }
13172 
13173 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13174   llvm_unreachable("Not supported in SIMD-only mode");
13175 }
13176 
13177 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13178   return false;
13179 }
13180 
13181 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13182                                         const OMPExecutableDirective &D,
13183                                         SourceLocation Loc,
13184                                         llvm::Function *OutlinedFn,
13185                                         ArrayRef<llvm::Value *> CapturedVars) {
13186   llvm_unreachable("Not supported in SIMD-only mode");
13187 }
13188 
13189 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13190                                              const Expr *NumTeams,
13191                                              const Expr *ThreadLimit,
13192                                              SourceLocation Loc) {
13193   llvm_unreachable("Not supported in SIMD-only mode");
13194 }
13195 
13196 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13197     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13198     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13199   llvm_unreachable("Not supported in SIMD-only mode");
13200 }
13201 
13202 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13203     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13204     const Expr *Device) {
13205   llvm_unreachable("Not supported in SIMD-only mode");
13206 }
13207 
13208 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13209                                            const OMPLoopDirective &D,
13210                                            ArrayRef<Expr *> NumIterations) {
13211   llvm_unreachable("Not supported in SIMD-only mode");
13212 }
13213 
13214 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13215                                               const OMPDependClause *C) {
13216   llvm_unreachable("Not supported in SIMD-only mode");
13217 }
13218 
13219 const VarDecl *
13220 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13221                                         const VarDecl *NativeParam) const {
13222   llvm_unreachable("Not supported in SIMD-only mode");
13223 }
13224 
13225 Address
13226 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13227                                          const VarDecl *NativeParam,
13228                                          const VarDecl *TargetParam) const {
13229   llvm_unreachable("Not supported in SIMD-only mode");
13230 }
13231