1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI,
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI,
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
753   // Check whether we've reached the end.
754   llvm::Value *Done =
755       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
756   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
757   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
758 
759   // Done.
760   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
761 }
762 
763 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
764   return CGF.EmitOMPSharedLValue(E);
765 }
766 
767 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
768                                             const Expr *E) {
769   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
770     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
771   return LValue();
772 }
773 
774 void ReductionCodeGen::emitAggregateInitialization(
775     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
776     const OMPDeclareReductionDecl *DRD) {
777   // Emit VarDecl with copy init for arrays.
778   // Get the address of the original variable captured in current
779   // captured region.
780   const auto *PrivateVD =
781       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
782   bool EmitDeclareReductionInit =
783       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
784   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
785                        EmitDeclareReductionInit,
786                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
787                                                 : PrivateVD->getInit(),
788                        DRD, SharedLVal.getAddress(CGF));
789 }
790 
791 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
792                                    ArrayRef<const Expr *> Origs,
793                                    ArrayRef<const Expr *> Privates,
794                                    ArrayRef<const Expr *> ReductionOps) {
795   ClausesData.reserve(Shareds.size());
796   SharedAddresses.reserve(Shareds.size());
797   Sizes.reserve(Shareds.size());
798   BaseDecls.reserve(Shareds.size());
799   const auto *IOrig = Origs.begin();
800   const auto *IPriv = Privates.begin();
801   const auto *IRed = ReductionOps.begin();
802   for (const Expr *Ref : Shareds) {
803     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
804     std::advance(IOrig, 1);
805     std::advance(IPriv, 1);
806     std::advance(IRed, 1);
807   }
808 }
809 
810 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
811   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
812          "Number of generated lvalues must be exactly N.");
813   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
814   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
815   SharedAddresses.emplace_back(First, Second);
816   if (ClausesData[N].Shared == ClausesData[N].Ref) {
817     OrigAddresses.emplace_back(First, Second);
818   } else {
819     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
820     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
821     OrigAddresses.emplace_back(First, Second);
822   }
823 }
824 
825 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
826   const auto *PrivateVD =
827       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
828   QualType PrivateType = PrivateVD->getType();
829   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
830   if (!PrivateType->isVariablyModifiedType()) {
831     Sizes.emplace_back(
832         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
833         nullptr);
834     return;
835   }
836   llvm::Value *Size;
837   llvm::Value *SizeInChars;
838   auto *ElemType =
839       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
840           ->getElementType();
841   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842   if (AsArraySection) {
843     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
844                                      OrigAddresses[N].first.getPointer(CGF));
845     Size = CGF.Builder.CreateNUWAdd(
846         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
847     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
848   } else {
849     SizeInChars =
850         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
851     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
852   }
853   Sizes.emplace_back(SizeInChars, Size);
854   CodeGenFunction::OpaqueValueMapping OpaqueMap(
855       CGF,
856       cast<OpaqueValueExpr>(
857           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
858       RValue::get(Size));
859   CGF.EmitVariablyModifiedType(PrivateType);
860 }
861 
862 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
863                                          llvm::Value *Size) {
864   const auto *PrivateVD =
865       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
866   QualType PrivateType = PrivateVD->getType();
867   if (!PrivateType->isVariablyModifiedType()) {
868     assert(!Size && !Sizes[N].second &&
869            "Size should be nullptr for non-variably modified reduction "
870            "items.");
871     return;
872   }
873   CodeGenFunction::OpaqueValueMapping OpaqueMap(
874       CGF,
875       cast<OpaqueValueExpr>(
876           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
877       RValue::get(Size));
878   CGF.EmitVariablyModifiedType(PrivateType);
879 }
880 
881 void ReductionCodeGen::emitInitialization(
882     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
883     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
884   assert(SharedAddresses.size() > N && "No variable was generated");
885   const auto *PrivateVD =
886       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
887   const OMPDeclareReductionDecl *DRD =
888       getReductionInit(ClausesData[N].ReductionOp);
889   QualType PrivateType = PrivateVD->getType();
890   PrivateAddr = CGF.Builder.CreateElementBitCast(
891       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
892   QualType SharedType = SharedAddresses[N].first.getType();
893   SharedLVal = CGF.MakeAddrLValue(
894       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
895                                        CGF.ConvertTypeForMem(SharedType)),
896       SharedType, SharedAddresses[N].first.getBaseInfo(),
897       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
898   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
899     if (DRD && DRD->getInitializer())
900       (void)DefaultInit(CGF);
901     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
902   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
903     (void)DefaultInit(CGF);
904     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
905                                      PrivateAddr, SharedLVal.getAddress(CGF),
906                                      SharedLVal.getType());
907   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
908              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
909     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
910                          PrivateVD->getType().getQualifiers(),
911                          /*IsInitializer=*/false);
912   }
913 }
914 
915 bool ReductionCodeGen::needCleanups(unsigned N) {
916   const auto *PrivateVD =
917       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
918   QualType PrivateType = PrivateVD->getType();
919   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
920   return DTorKind != QualType::DK_none;
921 }
922 
923 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
924                                     Address PrivateAddr) {
925   const auto *PrivateVD =
926       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
927   QualType PrivateType = PrivateVD->getType();
928   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
929   if (needCleanups(N)) {
930     PrivateAddr = CGF.Builder.CreateElementBitCast(
931         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
932     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
933   }
934 }
935 
936 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
937                           LValue BaseLV) {
938   BaseTy = BaseTy.getNonReferenceType();
939   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
940          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
941     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
942       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
943     } else {
944       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
945       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
946     }
947     BaseTy = BaseTy->getPointeeType();
948   }
949   return CGF.MakeAddrLValue(
950       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
951                                        CGF.ConvertTypeForMem(ElTy)),
952       BaseLV.getType(), BaseLV.getBaseInfo(),
953       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
954 }
955 
956 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
957                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
958                           llvm::Value *Addr) {
959   Address Tmp = Address::invalid();
960   Address TopTmp = Address::invalid();
961   Address MostTopTmp = Address::invalid();
962   BaseTy = BaseTy.getNonReferenceType();
963   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
964          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
965     Tmp = CGF.CreateMemTemp(BaseTy);
966     if (TopTmp.isValid())
967       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
968     else
969       MostTopTmp = Tmp;
970     TopTmp = Tmp;
971     BaseTy = BaseTy->getPointeeType();
972   }
973   llvm::Type *Ty = BaseLVType;
974   if (Tmp.isValid())
975     Ty = Tmp.getElementType();
976   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
977   if (Tmp.isValid()) {
978     CGF.Builder.CreateStore(Addr, Tmp);
979     return MostTopTmp;
980   }
981   return Address(Addr, BaseLVAlignment);
982 }
983 
984 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
985   const VarDecl *OrigVD = nullptr;
986   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
987     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
988     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
989       Base = TempOASE->getBase()->IgnoreParenImpCasts();
990     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
991       Base = TempASE->getBase()->IgnoreParenImpCasts();
992     DE = cast<DeclRefExpr>(Base);
993     OrigVD = cast<VarDecl>(DE->getDecl());
994   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
995     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
996     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
997       Base = TempASE->getBase()->IgnoreParenImpCasts();
998     DE = cast<DeclRefExpr>(Base);
999     OrigVD = cast<VarDecl>(DE->getDecl());
1000   }
1001   return OrigVD;
1002 }
1003 
1004 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1005                                                Address PrivateAddr) {
1006   const DeclRefExpr *DE;
1007   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1008     BaseDecls.emplace_back(OrigVD);
1009     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1010     LValue BaseLValue =
1011         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1012                     OriginalBaseLValue);
1013     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1014         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1015     llvm::Value *PrivatePointer =
1016         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1017             PrivateAddr.getPointer(),
1018             SharedAddresses[N].first.getAddress(CGF).getType());
1019     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1020     return castToBase(CGF, OrigVD->getType(),
1021                       SharedAddresses[N].first.getType(),
1022                       OriginalBaseLValue.getAddress(CGF).getType(),
1023                       OriginalBaseLValue.getAlignment(), Ptr);
1024   }
1025   BaseDecls.emplace_back(
1026       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1027   return PrivateAddr;
1028 }
1029 
1030 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1031   const OMPDeclareReductionDecl *DRD =
1032       getReductionInit(ClausesData[N].ReductionOp);
1033   return DRD && DRD->getInitializer();
1034 }
1035 
1036 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1037   return CGF.EmitLoadOfPointerLValue(
1038       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1039       getThreadIDVariable()->getType()->castAs<PointerType>());
1040 }
1041 
1042 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1043   if (!CGF.HaveInsertPoint())
1044     return;
1045   // 1.2.2 OpenMP Language Terminology
1046   // Structured block - An executable statement with a single entry at the
1047   // top and a single exit at the bottom.
1048   // The point of exit cannot be a branch out of the structured block.
1049   // longjmp() and throw() must not violate the entry/exit criteria.
1050   CGF.EHStack.pushTerminate();
1051   if (S)
1052     CGF.incrementProfileCounter(S);
1053   CodeGen(CGF);
1054   CGF.EHStack.popTerminate();
1055 }
1056 
1057 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1058     CodeGenFunction &CGF) {
1059   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1060                             getThreadIDVariable()->getType(),
1061                             AlignmentSource::Decl);
1062 }
1063 
1064 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1065                                        QualType FieldTy) {
1066   auto *Field = FieldDecl::Create(
1067       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1068       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1069       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1070   Field->setAccess(AS_public);
1071   DC->addDecl(Field);
1072   return Field;
1073 }
1074 
1075 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1076                                  StringRef Separator)
1077     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1078       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1079   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1080 
1081   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1082   OMPBuilder.initialize();
1083   loadOffloadInfoMetadata();
1084 }
1085 
1086 void CGOpenMPRuntime::clear() {
1087   InternalVars.clear();
1088   // Clean non-target variable declarations possibly used only in debug info.
1089   for (const auto &Data : EmittedNonTargetVariables) {
1090     if (!Data.getValue().pointsToAliveValue())
1091       continue;
1092     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1093     if (!GV)
1094       continue;
1095     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1096       continue;
1097     GV->eraseFromParent();
1098   }
1099 }
1100 
1101 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1102   SmallString<128> Buffer;
1103   llvm::raw_svector_ostream OS(Buffer);
1104   StringRef Sep = FirstSeparator;
1105   for (StringRef Part : Parts) {
1106     OS << Sep << Part;
1107     Sep = Separator;
1108   }
1109   return std::string(OS.str());
1110 }
1111 
1112 static llvm::Function *
1113 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1114                           const Expr *CombinerInitializer, const VarDecl *In,
1115                           const VarDecl *Out, bool IsCombiner) {
1116   // void .omp_combiner.(Ty *in, Ty *out);
1117   ASTContext &C = CGM.getContext();
1118   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1119   FunctionArgList Args;
1120   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1121                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1122   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1123                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124   Args.push_back(&OmpOutParm);
1125   Args.push_back(&OmpInParm);
1126   const CGFunctionInfo &FnInfo =
1127       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1128   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1129   std::string Name = CGM.getOpenMPRuntime().getName(
1130       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1131   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1132                                     Name, &CGM.getModule());
1133   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1134   if (CGM.getLangOpts().Optimize) {
1135     Fn->removeFnAttr(llvm::Attribute::NoInline);
1136     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1137     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1138   }
1139   CodeGenFunction CGF(CGM);
1140   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1141   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1142   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1143                     Out->getLocation());
1144   CodeGenFunction::OMPPrivateScope Scope(CGF);
1145   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1146   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1147     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1148         .getAddress(CGF);
1149   });
1150   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1151   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   (void)Scope.Privatize();
1156   if (!IsCombiner && Out->hasInit() &&
1157       !CGF.isTrivialInitializer(Out->getInit())) {
1158     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1159                          Out->getType().getQualifiers(),
1160                          /*IsInitializer=*/true);
1161   }
1162   if (CombinerInitializer)
1163     CGF.EmitIgnoredExpr(CombinerInitializer);
1164   Scope.ForceCleanup();
1165   CGF.FinishFunction();
1166   return Fn;
1167 }
1168 
1169 void CGOpenMPRuntime::emitUserDefinedReduction(
1170     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1171   if (UDRMap.count(D) > 0)
1172     return;
1173   llvm::Function *Combiner = emitCombinerOrInitializer(
1174       CGM, D->getType(), D->getCombiner(),
1175       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1176       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1177       /*IsCombiner=*/true);
1178   llvm::Function *Initializer = nullptr;
1179   if (const Expr *Init = D->getInitializer()) {
1180     Initializer = emitCombinerOrInitializer(
1181         CGM, D->getType(),
1182         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1183                                                                      : nullptr,
1184         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1185         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1186         /*IsCombiner=*/false);
1187   }
1188   UDRMap.try_emplace(D, Combiner, Initializer);
1189   if (CGF) {
1190     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1191     Decls.second.push_back(D);
1192   }
1193 }
1194 
1195 std::pair<llvm::Function *, llvm::Function *>
1196 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1197   auto I = UDRMap.find(D);
1198   if (I != UDRMap.end())
1199     return I->second;
1200   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1201   return UDRMap.lookup(D);
1202 }
1203 
1204 namespace {
1205 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1206 // Builder if one is present.
1207 struct PushAndPopStackRAII {
1208   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1209                       bool HasCancel, llvm::omp::Directive Kind)
1210       : OMPBuilder(OMPBuilder) {
1211     if (!OMPBuilder)
1212       return;
1213 
1214     // The following callback is the crucial part of clangs cleanup process.
1215     //
1216     // NOTE:
1217     // Once the OpenMPIRBuilder is used to create parallel regions (and
1218     // similar), the cancellation destination (Dest below) is determined via
1219     // IP. That means if we have variables to finalize we split the block at IP,
1220     // use the new block (=BB) as destination to build a JumpDest (via
1221     // getJumpDestInCurrentScope(BB)) which then is fed to
1222     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1223     // to push & pop an FinalizationInfo object.
1224     // The FiniCB will still be needed but at the point where the
1225     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1226     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1227       assert(IP.getBlock()->end() == IP.getPoint() &&
1228              "Clang CG should cause non-terminated block!");
1229       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1230       CGF.Builder.restoreIP(IP);
1231       CodeGenFunction::JumpDest Dest =
1232           CGF.getOMPCancelDestination(OMPD_parallel);
1233       CGF.EmitBranchThroughCleanup(Dest);
1234     };
1235 
1236     // TODO: Remove this once we emit parallel regions through the
1237     //       OpenMPIRBuilder as it can do this setup internally.
1238     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1239     OMPBuilder->pushFinalizationCB(std::move(FI));
1240   }
1241   ~PushAndPopStackRAII() {
1242     if (OMPBuilder)
1243       OMPBuilder->popFinalizationCB();
1244   }
1245   llvm::OpenMPIRBuilder *OMPBuilder;
1246 };
1247 } // namespace
1248 
1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1250     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1251     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1252     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1253   assert(ThreadIDVar->getType()->isPointerType() &&
1254          "thread id variable must be of type kmp_int32 *");
1255   CodeGenFunction CGF(CGM, true);
1256   bool HasCancel = false;
1257   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1258     HasCancel = OPD->hasCancel();
1259   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1260     HasCancel = OPD->hasCancel();
1261   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1262     HasCancel = OPSD->hasCancel();
1263   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1264     HasCancel = OPFD->hasCancel();
1265   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1266     HasCancel = OPFD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275 
1276   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1277   //       parallel region to make cancellation barriers work properly.
1278   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1279   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1280   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1281                                     HasCancel, OutlinedHelperName);
1282   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1283   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1284 }
1285 
1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1287     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1289   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1290   return emitParallelOrTeamsOutlinedFunction(
1291       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1292 }
1293 
1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1295     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1297   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1298   return emitParallelOrTeamsOutlinedFunction(
1299       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1300 }
1301 
1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1303     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1304     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1305     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1306     bool Tied, unsigned &NumberOfParts) {
1307   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1308                                               PrePostActionTy &) {
1309     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1310     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1311     llvm::Value *TaskArgs[] = {
1312         UpLoc, ThreadID,
1313         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1314                                     TaskTVar->getType()->castAs<PointerType>())
1315             .getPointer(CGF)};
1316     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1317                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1318                         TaskArgs);
1319   };
1320   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1321                                                             UntiedCodeGen);
1322   CodeGen.setAction(Action);
1323   assert(!ThreadIDVar->getType()->isPointerType() &&
1324          "thread id variable must be of type kmp_int32 for tasks");
1325   const OpenMPDirectiveKind Region =
1326       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1327                                                       : OMPD_task;
1328   const CapturedStmt *CS = D.getCapturedStmt(Region);
1329   bool HasCancel = false;
1330   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1333     HasCancel = TD->hasCancel();
1334   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338 
1339   CodeGenFunction CGF(CGM, true);
1340   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1341                                         InnermostKind, HasCancel, Action);
1342   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1343   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1344   if (!Tied)
1345     NumberOfParts = Action.getNumberOfParts();
1346   return Res;
1347 }
1348 
1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1350                              const RecordDecl *RD, const CGRecordLayout &RL,
1351                              ArrayRef<llvm::Constant *> Data) {
1352   llvm::StructType *StructTy = RL.getLLVMType();
1353   unsigned PrevIdx = 0;
1354   ConstantInitBuilder CIBuilder(CGM);
1355   auto DI = Data.begin();
1356   for (const FieldDecl *FD : RD->fields()) {
1357     unsigned Idx = RL.getLLVMFieldNo(FD);
1358     // Fill the alignment.
1359     for (unsigned I = PrevIdx; I < Idx; ++I)
1360       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1361     PrevIdx = Idx + 1;
1362     Fields.add(*DI);
1363     ++DI;
1364   }
1365 }
1366 
1367 template <class... As>
1368 static llvm::GlobalVariable *
1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1370                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1371                    As &&... Args) {
1372   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1373   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1374   ConstantInitBuilder CIBuilder(CGM);
1375   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1376   buildStructValue(Fields, CGM, RD, RL, Data);
1377   return Fields.finishAndCreateGlobal(
1378       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1379       std::forward<As>(Args)...);
1380 }
1381 
1382 template <typename T>
1383 static void
1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1385                                          ArrayRef<llvm::Constant *> Data,
1386                                          T &Parent) {
1387   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1388   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1389   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1390   buildStructValue(Fields, CGM, RD, RL, Data);
1391   Fields.finishAndAddTo(Parent);
1392 }
1393 
1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1395                                              bool AtCurrentPoint) {
1396   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1397   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1398 
1399   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1400   if (AtCurrentPoint) {
1401     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1402         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1403   } else {
1404     Elem.second.ServiceInsertPt =
1405         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1406     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1407   }
1408 }
1409 
1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1411   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1412   if (Elem.second.ServiceInsertPt) {
1413     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1414     Elem.second.ServiceInsertPt = nullptr;
1415     Ptr->eraseFromParent();
1416   }
1417 }
1418 
1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1420                                                   SourceLocation Loc,
1421                                                   SmallString<128> &Buffer) {
1422   llvm::raw_svector_ostream OS(Buffer);
1423   // Build debug location
1424   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1425   OS << ";" << PLoc.getFilename() << ";";
1426   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1427     OS << FD->getQualifiedNameAsString();
1428   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1429   return OS.str();
1430 }
1431 
1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1433                                                  SourceLocation Loc,
1434                                                  unsigned Flags) {
1435   llvm::Constant *SrcLocStr;
1436   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1437       Loc.isInvalid()) {
1438     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1439   } else {
1440     std::string FunctionName = "";
1441     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1442       FunctionName = FD->getQualifiedNameAsString();
1443     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1444     const char *FileName = PLoc.getFilename();
1445     unsigned Line = PLoc.getLine();
1446     unsigned Column = PLoc.getColumn();
1447     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1448                                                 Line, Column);
1449   }
1450   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1451   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1452                                      Reserved2Flags);
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1456                                           SourceLocation Loc) {
1457   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1458   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1459   // the clang invariants used below might be broken.
1460   if (CGM.getLangOpts().OpenMPIRBuilder) {
1461     SmallString<128> Buffer;
1462     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1463     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1465     return OMPBuilder.getOrCreateThreadID(
1466         OMPBuilder.getOrCreateIdent(SrcLocStr));
1467   }
1468 
1469   llvm::Value *ThreadID = nullptr;
1470   // Check whether we've already cached a load of the thread id in this
1471   // function.
1472   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473   if (I != OpenMPLocThreadIDMap.end()) {
1474     ThreadID = I->second.ThreadID;
1475     if (ThreadID != nullptr)
1476       return ThreadID;
1477   }
1478   // If exceptions are enabled, do not use parameter to avoid possible crash.
1479   if (auto *OMPRegionInfo =
1480           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481     if (OMPRegionInfo->getThreadIDVariable()) {
1482       // Check if this an outlined function with thread id passed as argument.
1483       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486           !CGF.getLangOpts().CXXExceptions ||
1487           CGF.Builder.GetInsertBlock() == TopBlock ||
1488           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490               TopBlock ||
1491           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492               CGF.Builder.GetInsertBlock()) {
1493         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494         // If value loaded in entry block, cache it and use it everywhere in
1495         // function.
1496         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498           Elem.second.ThreadID = ThreadID;
1499         }
1500         return ThreadID;
1501       }
1502     }
1503   }
1504 
1505   // This is not an outlined function region - need to call __kmpc_int32
1506   // kmpc_global_thread_num(ident_t *loc).
1507   // Generate thread id value and cache this value for use across the
1508   // function.
1509   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510   if (!Elem.second.ServiceInsertPt)
1511     setLocThreadIdInsertPt(CGF);
1512   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514   llvm::CallInst *Call = CGF.Builder.CreateCall(
1515       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516                                             OMPRTL___kmpc_global_thread_num),
1517       emitUpdateLocation(CGF, Loc));
1518   Call->setCallingConv(CGF.getRuntimeCC());
1519   Elem.second.ThreadID = Call;
1520   return Call;
1521 }
1522 
1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526     clearLocThreadIdInsertPt(CGF);
1527     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528   }
1529   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531       UDRMap.erase(D);
1532     FunctionUDRMap.erase(CGF.CurFn);
1533   }
1534   auto I = FunctionUDMMap.find(CGF.CurFn);
1535   if (I != FunctionUDMMap.end()) {
1536     for(const auto *D : I->second)
1537       UDMMap.erase(D);
1538     FunctionUDMMap.erase(I);
1539   }
1540   LastprivateConditionalToTypes.erase(CGF.CurFn);
1541   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542 }
1543 
1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545   return OMPBuilder.IdentPtr;
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549   if (!Kmpc_MicroTy) {
1550     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554   }
1555   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556 }
1557 
1558 llvm::FunctionCallee
1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1563                                             : "__kmpc_for_static_init_4u")
1564                                 : (IVSigned ? "__kmpc_for_static_init_8"
1565                                             : "__kmpc_for_static_init_8u");
1566   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1567   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1568   llvm::Type *TypeParams[] = {
1569     getIdentTyPointerTy(),                     // loc
1570     CGM.Int32Ty,                               // tid
1571     CGM.Int32Ty,                               // schedtype
1572     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1573     PtrTy,                                     // p_lower
1574     PtrTy,                                     // p_upper
1575     PtrTy,                                     // p_stride
1576     ITy,                                       // incr
1577     ITy                                        // chunk
1578   };
1579   auto *FnTy =
1580       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1581   return CGM.CreateRuntimeFunction(FnTy, Name);
1582 }
1583 
1584 llvm::FunctionCallee
1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1586   assert((IVSize == 32 || IVSize == 64) &&
1587          "IV size is not compatible with the omp runtime");
1588   StringRef Name =
1589       IVSize == 32
1590           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1591           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1592   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1593   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1594                                CGM.Int32Ty,           // tid
1595                                CGM.Int32Ty,           // schedtype
1596                                ITy,                   // lower
1597                                ITy,                   // upper
1598                                ITy,                   // stride
1599                                ITy                    // chunk
1600   };
1601   auto *FnTy =
1602       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1603   return CGM.CreateRuntimeFunction(FnTy, Name);
1604 }
1605 
1606 llvm::FunctionCallee
1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1608   assert((IVSize == 32 || IVSize == 64) &&
1609          "IV size is not compatible with the omp runtime");
1610   StringRef Name =
1611       IVSize == 32
1612           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1613           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1614   llvm::Type *TypeParams[] = {
1615       getIdentTyPointerTy(), // loc
1616       CGM.Int32Ty,           // tid
1617   };
1618   auto *FnTy =
1619       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1620   return CGM.CreateRuntimeFunction(FnTy, Name);
1621 }
1622 
1623 llvm::FunctionCallee
1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1625   assert((IVSize == 32 || IVSize == 64) &&
1626          "IV size is not compatible with the omp runtime");
1627   StringRef Name =
1628       IVSize == 32
1629           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1630           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1631   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1632   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1633   llvm::Type *TypeParams[] = {
1634     getIdentTyPointerTy(),                     // loc
1635     CGM.Int32Ty,                               // tid
1636     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1637     PtrTy,                                     // p_lower
1638     PtrTy,                                     // p_upper
1639     PtrTy                                      // p_stride
1640   };
1641   auto *FnTy =
1642       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1643   return CGM.CreateRuntimeFunction(FnTy, Name);
1644 }
1645 
1646 /// Obtain information that uniquely identifies a target entry. This
1647 /// consists of the file and device IDs as well as line number associated with
1648 /// the relevant entry source location.
1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1650                                      unsigned &DeviceID, unsigned &FileID,
1651                                      unsigned &LineNum) {
1652   SourceManager &SM = C.getSourceManager();
1653 
1654   // The loc should be always valid and have a file ID (the user cannot use
1655   // #pragma directives in macros)
1656 
1657   assert(Loc.isValid() && "Source location is expected to be always valid.");
1658 
1659   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1660   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1661 
1662   llvm::sys::fs::UniqueID ID;
1663   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1664     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1665     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1666     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1667       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1668           << PLoc.getFilename() << EC.message();
1669   }
1670 
1671   DeviceID = ID.getDevice();
1672   FileID = ID.getFile();
1673   LineNum = PLoc.getLine();
1674 }
1675 
1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1677   if (CGM.getLangOpts().OpenMPSimd)
1678     return Address::invalid();
1679   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1680       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1681   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1682               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1683                HasRequiresUnifiedSharedMemory))) {
1684     SmallString<64> PtrName;
1685     {
1686       llvm::raw_svector_ostream OS(PtrName);
1687       OS << CGM.getMangledName(GlobalDecl(VD));
1688       if (!VD->isExternallyVisible()) {
1689         unsigned DeviceID, FileID, Line;
1690         getTargetEntryUniqueInfo(CGM.getContext(),
1691                                  VD->getCanonicalDecl()->getBeginLoc(),
1692                                  DeviceID, FileID, Line);
1693         OS << llvm::format("_%x", FileID);
1694       }
1695       OS << "_decl_tgt_ref_ptr";
1696     }
1697     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1698     if (!Ptr) {
1699       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1700       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1701                                         PtrName);
1702 
1703       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1704       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1705 
1706       if (!CGM.getLangOpts().OpenMPIsDevice)
1707         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1708       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1709     }
1710     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1711   }
1712   return Address::invalid();
1713 }
1714 
1715 llvm::Constant *
1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1717   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1718          !CGM.getContext().getTargetInfo().isTLSSupported());
1719   // Lookup the entry, lazily creating it if necessary.
1720   std::string Suffix = getName({"cache", ""});
1721   return getOrCreateInternalVariable(
1722       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1723 }
1724 
1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1726                                                 const VarDecl *VD,
1727                                                 Address VDAddr,
1728                                                 SourceLocation Loc) {
1729   if (CGM.getLangOpts().OpenMPUseTLS &&
1730       CGM.getContext().getTargetInfo().isTLSSupported())
1731     return VDAddr;
1732 
1733   llvm::Type *VarTy = VDAddr.getElementType();
1734   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1735                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1736                                                        CGM.Int8PtrTy),
1737                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1738                          getOrCreateThreadPrivateCache(VD)};
1739   return Address(CGF.EmitRuntimeCall(
1740                      OMPBuilder.getOrCreateRuntimeFunction(
1741                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1742                      Args),
1743                  VDAddr.getAlignment());
1744 }
1745 
1746 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1747     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1748     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1749   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1750   // library.
1751   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1752   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1753                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1754                       OMPLoc);
1755   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1756   // to register constructor/destructor for variable.
1757   llvm::Value *Args[] = {
1758       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1759       Ctor, CopyCtor, Dtor};
1760   CGF.EmitRuntimeCall(
1761       OMPBuilder.getOrCreateRuntimeFunction(
1762           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1763       Args);
1764 }
1765 
1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1767     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1768     bool PerformInit, CodeGenFunction *CGF) {
1769   if (CGM.getLangOpts().OpenMPUseTLS &&
1770       CGM.getContext().getTargetInfo().isTLSSupported())
1771     return nullptr;
1772 
1773   VD = VD->getDefinition(CGM.getContext());
1774   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1775     QualType ASTTy = VD->getType();
1776 
1777     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1778     const Expr *Init = VD->getAnyInitializer();
1779     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1780       // Generate function that re-emits the declaration's initializer into the
1781       // threadprivate copy of the variable VD
1782       CodeGenFunction CtorCGF(CGM);
1783       FunctionArgList Args;
1784       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1785                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1786                             ImplicitParamDecl::Other);
1787       Args.push_back(&Dst);
1788 
1789       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1790           CGM.getContext().VoidPtrTy, Args);
1791       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1792       std::string Name = getName({"__kmpc_global_ctor_", ""});
1793       llvm::Function *Fn =
1794           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1795       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1796                             Args, Loc, Loc);
1797       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1798           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1799           CGM.getContext().VoidPtrTy, Dst.getLocation());
1800       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1801       Arg = CtorCGF.Builder.CreateElementBitCast(
1802           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1803       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1804                                /*IsInitializer=*/true);
1805       ArgVal = CtorCGF.EmitLoadOfScalar(
1806           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807           CGM.getContext().VoidPtrTy, Dst.getLocation());
1808       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1809       CtorCGF.FinishFunction();
1810       Ctor = Fn;
1811     }
1812     if (VD->getType().isDestructedType() != QualType::DK_none) {
1813       // Generate function that emits destructor call for the threadprivate copy
1814       // of the variable VD
1815       CodeGenFunction DtorCGF(CGM);
1816       FunctionArgList Args;
1817       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1818                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1819                             ImplicitParamDecl::Other);
1820       Args.push_back(&Dst);
1821 
1822       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1823           CGM.getContext().VoidTy, Args);
1824       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1825       std::string Name = getName({"__kmpc_global_dtor_", ""});
1826       llvm::Function *Fn =
1827           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1828       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1829       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1830                             Loc, Loc);
1831       // Create a scope with an artificial location for the body of this function.
1832       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1833       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1834           DtorCGF.GetAddrOfLocalVar(&Dst),
1835           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1836       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1837                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1838                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1839       DtorCGF.FinishFunction();
1840       Dtor = Fn;
1841     }
1842     // Do not emit init function if it is not required.
1843     if (!Ctor && !Dtor)
1844       return nullptr;
1845 
1846     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1847     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1848                                                /*isVarArg=*/false)
1849                            ->getPointerTo();
1850     // Copying constructor for the threadprivate variable.
1851     // Must be NULL - reserved by runtime, but currently it requires that this
1852     // parameter is always NULL. Otherwise it fires assertion.
1853     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1854     if (Ctor == nullptr) {
1855       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1856                                              /*isVarArg=*/false)
1857                          ->getPointerTo();
1858       Ctor = llvm::Constant::getNullValue(CtorTy);
1859     }
1860     if (Dtor == nullptr) {
1861       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                              /*isVarArg=*/false)
1863                          ->getPointerTo();
1864       Dtor = llvm::Constant::getNullValue(DtorTy);
1865     }
1866     if (!CGF) {
1867       auto *InitFunctionTy =
1868           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1869       std::string Name = getName({"__omp_threadprivate_init_", ""});
1870       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1871           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1872       CodeGenFunction InitCGF(CGM);
1873       FunctionArgList ArgList;
1874       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1875                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1876                             Loc, Loc);
1877       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878       InitCGF.FinishFunction();
1879       return InitFunction;
1880     }
1881     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882   }
1883   return nullptr;
1884 }
1885 
1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1887                                                      llvm::GlobalVariable *Addr,
1888                                                      bool PerformInit) {
1889   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1890       !CGM.getLangOpts().OpenMPIsDevice)
1891     return false;
1892   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1893       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1894   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1895       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1896        HasRequiresUnifiedSharedMemory))
1897     return CGM.getLangOpts().OpenMPIsDevice;
1898   VD = VD->getDefinition(CGM.getContext());
1899   assert(VD && "Unknown VarDecl");
1900 
1901   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1902     return CGM.getLangOpts().OpenMPIsDevice;
1903 
1904   QualType ASTTy = VD->getType();
1905   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1906 
1907   // Produce the unique prefix to identify the new target regions. We use
1908   // the source location of the variable declaration which we know to not
1909   // conflict with any target region.
1910   unsigned DeviceID;
1911   unsigned FileID;
1912   unsigned Line;
1913   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1914   SmallString<128> Buffer, Out;
1915   {
1916     llvm::raw_svector_ostream OS(Buffer);
1917     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1918        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1919   }
1920 
1921   const Expr *Init = VD->getAnyInitializer();
1922   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1923     llvm::Constant *Ctor;
1924     llvm::Constant *ID;
1925     if (CGM.getLangOpts().OpenMPIsDevice) {
1926       // Generate function that re-emits the declaration's initializer into
1927       // the threadprivate copy of the variable VD
1928       CodeGenFunction CtorCGF(CGM);
1929 
1930       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1931       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1932       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1933           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1934       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1935       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1938       CtorCGF.EmitAnyExprToMem(Init,
1939                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1940                                Init->getType().getQualifiers(),
1941                                /*IsInitializer=*/true);
1942       CtorCGF.FinishFunction();
1943       Ctor = Fn;
1944       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1945       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1946     } else {
1947       Ctor = new llvm::GlobalVariable(
1948           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1949           llvm::GlobalValue::PrivateLinkage,
1950           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1951       ID = Ctor;
1952     }
1953 
1954     // Register the information for the entry associated with the constructor.
1955     Out.clear();
1956     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1957         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1958         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1959   }
1960   if (VD->getType().isDestructedType() != QualType::DK_none) {
1961     llvm::Constant *Dtor;
1962     llvm::Constant *ID;
1963     if (CGM.getLangOpts().OpenMPIsDevice) {
1964       // Generate function that emits destructor call for the threadprivate
1965       // copy of the variable VD
1966       CodeGenFunction DtorCGF(CGM);
1967 
1968       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1969       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1970       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1971           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1972       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1973       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1974                             FunctionArgList(), Loc, Loc);
1975       // Create a scope with an artificial location for the body of this
1976       // function.
1977       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1978       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1979                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1980                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1981       DtorCGF.FinishFunction();
1982       Dtor = Fn;
1983       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1984       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1985     } else {
1986       Dtor = new llvm::GlobalVariable(
1987           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1988           llvm::GlobalValue::PrivateLinkage,
1989           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1990       ID = Dtor;
1991     }
1992     // Register the information for the entry associated with the destructor.
1993     Out.clear();
1994     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1995         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1996         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1997   }
1998   return CGM.getLangOpts().OpenMPIsDevice;
1999 }
2000 
2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2002                                                           QualType VarType,
2003                                                           StringRef Name) {
2004   std::string Suffix = getName({"artificial", ""});
2005   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2006   llvm::Value *GAddr =
2007       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2008   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2009       CGM.getTarget().isTLSSupported()) {
2010     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2011     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2012   }
2013   std::string CacheSuffix = getName({"cache", ""});
2014   llvm::Value *Args[] = {
2015       emitUpdateLocation(CGF, SourceLocation()),
2016       getThreadID(CGF, SourceLocation()),
2017       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2018       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2019                                 /*isSigned=*/false),
2020       getOrCreateInternalVariable(
2021           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2022   return Address(
2023       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2024           CGF.EmitRuntimeCall(
2025               OMPBuilder.getOrCreateRuntimeFunction(
2026                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2027               Args),
2028           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2029       CGM.getContext().getTypeAlignInChars(VarType));
2030 }
2031 
2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2033                                    const RegionCodeGenTy &ThenGen,
2034                                    const RegionCodeGenTy &ElseGen) {
2035   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2036 
2037   // If the condition constant folds and can be elided, try to avoid emitting
2038   // the condition and the dead arm of the if/else.
2039   bool CondConstant;
2040   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2041     if (CondConstant)
2042       ThenGen(CGF);
2043     else
2044       ElseGen(CGF);
2045     return;
2046   }
2047 
2048   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2049   // emit the conditional branch.
2050   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2051   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2052   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2053   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2054 
2055   // Emit the 'then' code.
2056   CGF.EmitBlock(ThenBlock);
2057   ThenGen(CGF);
2058   CGF.EmitBranch(ContBlock);
2059   // Emit the 'else' code if present.
2060   // There is no need to emit line number for unconditional branch.
2061   (void)ApplyDebugLocation::CreateEmpty(CGF);
2062   CGF.EmitBlock(ElseBlock);
2063   ElseGen(CGF);
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBranch(ContBlock);
2067   // Emit the continuation block for code after the if.
2068   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2069 }
2070 
2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2072                                        llvm::Function *OutlinedFn,
2073                                        ArrayRef<llvm::Value *> CapturedVars,
2074                                        const Expr *IfCond) {
2075   if (!CGF.HaveInsertPoint())
2076     return;
2077   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2078   auto &M = CGM.getModule();
2079   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2082     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2083     llvm::Value *Args[] = {
2084         RTLoc,
2085         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2086         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2087     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2088     RealArgs.append(std::begin(Args), std::end(Args));
2089     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2090 
2091     llvm::FunctionCallee RTLFn =
2092         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2093     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2094   };
2095   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2096                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2097     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2098     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2099     // Build calls:
2100     // __kmpc_serialized_parallel(&Loc, GTid);
2101     llvm::Value *Args[] = {RTLoc, ThreadID};
2102     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2103                             M, OMPRTL___kmpc_serialized_parallel),
2104                         Args);
2105 
2106     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2107     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2108     Address ZeroAddrBound =
2109         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2110                                          /*Name=*/".bound.zero.addr");
2111     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2112     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2113     // ThreadId for serialized parallels is 0.
2114     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2115     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2116     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2117 
2118     // Ensure we do not inline the function. This is trivially true for the ones
2119     // passed to __kmpc_fork_call but the ones calles in serialized regions
2120     // could be inlined. This is not a perfect but it is closer to the invariant
2121     // we want, namely, every data environment starts with a new function.
2122     // TODO: We should pass the if condition to the runtime function and do the
2123     //       handling there. Much cleaner code.
2124     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2125     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2126 
2127     // __kmpc_end_serialized_parallel(&Loc, GTid);
2128     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2129     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2130                             M, OMPRTL___kmpc_end_serialized_parallel),
2131                         EndArgs);
2132   };
2133   if (IfCond) {
2134     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2135   } else {
2136     RegionCodeGenTy ThenRCG(ThenGen);
2137     ThenRCG(CGF);
2138   }
2139 }
2140 
2141 // If we're inside an (outlined) parallel region, use the region info's
2142 // thread-ID variable (it is passed in a first argument of the outlined function
2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2144 // regular serial code region, get thread ID by calling kmp_int32
2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2146 // return the address of that temp.
2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2148                                              SourceLocation Loc) {
2149   if (auto *OMPRegionInfo =
2150           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2151     if (OMPRegionInfo->getThreadIDVariable())
2152       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2153 
2154   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2155   QualType Int32Ty =
2156       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2157   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2158   CGF.EmitStoreOfScalar(ThreadID,
2159                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2160 
2161   return ThreadIDTemp;
2162 }
2163 
2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2165     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2166   SmallString<256> Buffer;
2167   llvm::raw_svector_ostream Out(Buffer);
2168   Out << Name;
2169   StringRef RuntimeName = Out.str();
2170   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2171   if (Elem.second) {
2172     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2173            "OMP internal variable has different type than requested");
2174     return &*Elem.second;
2175   }
2176 
2177   return Elem.second = new llvm::GlobalVariable(
2178              CGM.getModule(), Ty, /*IsConstant*/ false,
2179              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2180              Elem.first(), /*InsertBefore=*/nullptr,
2181              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2182 }
2183 
2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2185   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2186   std::string Name = getName({Prefix, "var"});
2187   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2188 }
2189 
2190 namespace {
2191 /// Common pre(post)-action for different OpenMP constructs.
2192 class CommonActionTy final : public PrePostActionTy {
2193   llvm::FunctionCallee EnterCallee;
2194   ArrayRef<llvm::Value *> EnterArgs;
2195   llvm::FunctionCallee ExitCallee;
2196   ArrayRef<llvm::Value *> ExitArgs;
2197   bool Conditional;
2198   llvm::BasicBlock *ContBlock = nullptr;
2199 
2200 public:
2201   CommonActionTy(llvm::FunctionCallee EnterCallee,
2202                  ArrayRef<llvm::Value *> EnterArgs,
2203                  llvm::FunctionCallee ExitCallee,
2204                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2205       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2206         ExitArgs(ExitArgs), Conditional(Conditional) {}
2207   void Enter(CodeGenFunction &CGF) override {
2208     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2209     if (Conditional) {
2210       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2211       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2212       ContBlock = CGF.createBasicBlock("omp_if.end");
2213       // Generate the branch (If-stmt)
2214       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2215       CGF.EmitBlock(ThenBlock);
2216     }
2217   }
2218   void Done(CodeGenFunction &CGF) {
2219     // Emit the rest of blocks/branches
2220     CGF.EmitBranch(ContBlock);
2221     CGF.EmitBlock(ContBlock, true);
2222   }
2223   void Exit(CodeGenFunction &CGF) override {
2224     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2225   }
2226 };
2227 } // anonymous namespace
2228 
2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2230                                          StringRef CriticalName,
2231                                          const RegionCodeGenTy &CriticalOpGen,
2232                                          SourceLocation Loc, const Expr *Hint) {
2233   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2234   // CriticalOpGen();
2235   // __kmpc_end_critical(ident_t *, gtid, Lock);
2236   // Prepare arguments and build a call to __kmpc_critical
2237   if (!CGF.HaveInsertPoint())
2238     return;
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2240                          getCriticalRegionLock(CriticalName)};
2241   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2242                                                 std::end(Args));
2243   if (Hint) {
2244     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2245         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2246   }
2247   CommonActionTy Action(
2248       OMPBuilder.getOrCreateRuntimeFunction(
2249           CGM.getModule(),
2250           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2251       EnterArgs,
2252       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2253                                             OMPRTL___kmpc_end_critical),
2254       Args);
2255   CriticalOpGen.setAction(Action);
2256   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2257 }
2258 
2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2260                                        const RegionCodeGenTy &MasterOpGen,
2261                                        SourceLocation Loc) {
2262   if (!CGF.HaveInsertPoint())
2263     return;
2264   // if(__kmpc_master(ident_t *, gtid)) {
2265   //   MasterOpGen();
2266   //   __kmpc_end_master(ident_t *, gtid);
2267   // }
2268   // Prepare arguments and build a call to __kmpc_master
2269   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2270   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2271                             CGM.getModule(), OMPRTL___kmpc_master),
2272                         Args,
2273                         OMPBuilder.getOrCreateRuntimeFunction(
2274                             CGM.getModule(), OMPRTL___kmpc_end_master),
2275                         Args,
2276                         /*Conditional=*/true);
2277   MasterOpGen.setAction(Action);
2278   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2279   Action.Done(CGF);
2280 }
2281 
2282 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2283                                        const RegionCodeGenTy &MaskedOpGen,
2284                                        SourceLocation Loc, const Expr *Filter) {
2285   if (!CGF.HaveInsertPoint())
2286     return;
2287   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2288   //   MaskedOpGen();
2289   //   __kmpc_end_masked(iden_t *, gtid);
2290   // }
2291   // Prepare arguments and build a call to __kmpc_masked
2292   llvm::Value *FilterVal = Filter
2293                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2294                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2295   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2296                          FilterVal};
2297   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2298                             getThreadID(CGF, Loc)};
2299   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300                             CGM.getModule(), OMPRTL___kmpc_masked),
2301                         Args,
2302                         OMPBuilder.getOrCreateRuntimeFunction(
2303                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2304                         ArgsEnd,
2305                         /*Conditional=*/true);
2306   MaskedOpGen.setAction(Action);
2307   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2308   Action.Done(CGF);
2309 }
2310 
2311 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2312                                         SourceLocation Loc) {
2313   if (!CGF.HaveInsertPoint())
2314     return;
2315   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2316     OMPBuilder.createTaskyield(CGF.Builder);
2317   } else {
2318     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2319     llvm::Value *Args[] = {
2320         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2321         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2322     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2323                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2324                         Args);
2325   }
2326 
2327   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2328     Region->emitUntiedSwitch(CGF);
2329 }
2330 
2331 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2332                                           const RegionCodeGenTy &TaskgroupOpGen,
2333                                           SourceLocation Loc) {
2334   if (!CGF.HaveInsertPoint())
2335     return;
2336   // __kmpc_taskgroup(ident_t *, gtid);
2337   // TaskgroupOpGen();
2338   // __kmpc_end_taskgroup(ident_t *, gtid);
2339   // Prepare arguments and build a call to __kmpc_taskgroup
2340   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2341   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2342                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2343                         Args,
2344                         OMPBuilder.getOrCreateRuntimeFunction(
2345                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2346                         Args);
2347   TaskgroupOpGen.setAction(Action);
2348   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2349 }
2350 
2351 /// Given an array of pointers to variables, project the address of a
2352 /// given variable.
2353 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2354                                       unsigned Index, const VarDecl *Var) {
2355   // Pull out the pointer to the variable.
2356   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2357   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2358 
2359   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2360   Addr = CGF.Builder.CreateElementBitCast(
2361       Addr, CGF.ConvertTypeForMem(Var->getType()));
2362   return Addr;
2363 }
2364 
2365 static llvm::Value *emitCopyprivateCopyFunction(
2366     CodeGenModule &CGM, llvm::Type *ArgsType,
2367     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2368     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2369     SourceLocation Loc) {
2370   ASTContext &C = CGM.getContext();
2371   // void copy_func(void *LHSArg, void *RHSArg);
2372   FunctionArgList Args;
2373   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2374                            ImplicitParamDecl::Other);
2375   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2376                            ImplicitParamDecl::Other);
2377   Args.push_back(&LHSArg);
2378   Args.push_back(&RHSArg);
2379   const auto &CGFI =
2380       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2381   std::string Name =
2382       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2383   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2384                                     llvm::GlobalValue::InternalLinkage, Name,
2385                                     &CGM.getModule());
2386   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2387   Fn->setDoesNotRecurse();
2388   CodeGenFunction CGF(CGM);
2389   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2390   // Dest = (void*[n])(LHSArg);
2391   // Src = (void*[n])(RHSArg);
2392   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2393       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2394       ArgsType), CGF.getPointerAlign());
2395   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2396       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2397       ArgsType), CGF.getPointerAlign());
2398   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2399   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2400   // ...
2401   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2402   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2403     const auto *DestVar =
2404         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2405     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2406 
2407     const auto *SrcVar =
2408         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2409     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2410 
2411     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2412     QualType Type = VD->getType();
2413     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2414   }
2415   CGF.FinishFunction();
2416   return Fn;
2417 }
2418 
2419 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2420                                        const RegionCodeGenTy &SingleOpGen,
2421                                        SourceLocation Loc,
2422                                        ArrayRef<const Expr *> CopyprivateVars,
2423                                        ArrayRef<const Expr *> SrcExprs,
2424                                        ArrayRef<const Expr *> DstExprs,
2425                                        ArrayRef<const Expr *> AssignmentOps) {
2426   if (!CGF.HaveInsertPoint())
2427     return;
2428   assert(CopyprivateVars.size() == SrcExprs.size() &&
2429          CopyprivateVars.size() == DstExprs.size() &&
2430          CopyprivateVars.size() == AssignmentOps.size());
2431   ASTContext &C = CGM.getContext();
2432   // int32 did_it = 0;
2433   // if(__kmpc_single(ident_t *, gtid)) {
2434   //   SingleOpGen();
2435   //   __kmpc_end_single(ident_t *, gtid);
2436   //   did_it = 1;
2437   // }
2438   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439   // <copy_func>, did_it);
2440 
2441   Address DidIt = Address::invalid();
2442   if (!CopyprivateVars.empty()) {
2443     // int32 did_it = 0;
2444     QualType KmpInt32Ty =
2445         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2446     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2447     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2448   }
2449   // Prepare arguments and build a call to __kmpc_single
2450   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2451   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2452                             CGM.getModule(), OMPRTL___kmpc_single),
2453                         Args,
2454                         OMPBuilder.getOrCreateRuntimeFunction(
2455                             CGM.getModule(), OMPRTL___kmpc_end_single),
2456                         Args,
2457                         /*Conditional=*/true);
2458   SingleOpGen.setAction(Action);
2459   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2460   if (DidIt.isValid()) {
2461     // did_it = 1;
2462     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2463   }
2464   Action.Done(CGF);
2465   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2466   // <copy_func>, did_it);
2467   if (DidIt.isValid()) {
2468     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2469     QualType CopyprivateArrayTy = C.getConstantArrayType(
2470         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2471         /*IndexTypeQuals=*/0);
2472     // Create a list of all private variables for copyprivate.
2473     Address CopyprivateList =
2474         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2475     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2476       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2477       CGF.Builder.CreateStore(
2478           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2479               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2480               CGF.VoidPtrTy),
2481           Elem);
2482     }
2483     // Build function that copies private values from single region to all other
2484     // threads in the corresponding parallel region.
2485     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2486         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2487         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2488     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2489     Address CL =
2490       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2491                                                       CGF.VoidPtrTy);
2492     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2493     llvm::Value *Args[] = {
2494         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2495         getThreadID(CGF, Loc),        // i32 <gtid>
2496         BufSize,                      // size_t <buf_size>
2497         CL.getPointer(),              // void *<copyprivate list>
2498         CpyFn,                        // void (*) (void *, void *) <copy_func>
2499         DidItVal                      // i32 did_it
2500     };
2501     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2502                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2503                         Args);
2504   }
2505 }
2506 
2507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2508                                         const RegionCodeGenTy &OrderedOpGen,
2509                                         SourceLocation Loc, bool IsThreads) {
2510   if (!CGF.HaveInsertPoint())
2511     return;
2512   // __kmpc_ordered(ident_t *, gtid);
2513   // OrderedOpGen();
2514   // __kmpc_end_ordered(ident_t *, gtid);
2515   // Prepare arguments and build a call to __kmpc_ordered
2516   if (IsThreads) {
2517     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2518     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2519                               CGM.getModule(), OMPRTL___kmpc_ordered),
2520                           Args,
2521                           OMPBuilder.getOrCreateRuntimeFunction(
2522                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2523                           Args);
2524     OrderedOpGen.setAction(Action);
2525     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2526     return;
2527   }
2528   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2529 }
2530 
2531 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2532   unsigned Flags;
2533   if (Kind == OMPD_for)
2534     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2535   else if (Kind == OMPD_sections)
2536     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2537   else if (Kind == OMPD_single)
2538     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2539   else if (Kind == OMPD_barrier)
2540     Flags = OMP_IDENT_BARRIER_EXPL;
2541   else
2542     Flags = OMP_IDENT_BARRIER_IMPL;
2543   return Flags;
2544 }
2545 
2546 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2547     CodeGenFunction &CGF, const OMPLoopDirective &S,
2548     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2549   // Check if the loop directive is actually a doacross loop directive. In this
2550   // case choose static, 1 schedule.
2551   if (llvm::any_of(
2552           S.getClausesOfKind<OMPOrderedClause>(),
2553           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2554     ScheduleKind = OMPC_SCHEDULE_static;
2555     // Chunk size is 1 in this case.
2556     llvm::APInt ChunkSize(32, 1);
2557     ChunkExpr = IntegerLiteral::Create(
2558         CGF.getContext(), ChunkSize,
2559         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2560         SourceLocation());
2561   }
2562 }
2563 
2564 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2565                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2566                                       bool ForceSimpleCall) {
2567   // Check if we should use the OMPBuilder
2568   auto *OMPRegionInfo =
2569       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2570   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2571     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2572         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2573     return;
2574   }
2575 
2576   if (!CGF.HaveInsertPoint())
2577     return;
2578   // Build call __kmpc_cancel_barrier(loc, thread_id);
2579   // Build call __kmpc_barrier(loc, thread_id);
2580   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2581   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2582   // thread_id);
2583   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2584                          getThreadID(CGF, Loc)};
2585   if (OMPRegionInfo) {
2586     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2587       llvm::Value *Result = CGF.EmitRuntimeCall(
2588           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2589                                                 OMPRTL___kmpc_cancel_barrier),
2590           Args);
2591       if (EmitChecks) {
2592         // if (__kmpc_cancel_barrier()) {
2593         //   exit from construct;
2594         // }
2595         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2596         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2597         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2598         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2599         CGF.EmitBlock(ExitBB);
2600         //   exit from construct;
2601         CodeGenFunction::JumpDest CancelDestination =
2602             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2603         CGF.EmitBranchThroughCleanup(CancelDestination);
2604         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2605       }
2606       return;
2607     }
2608   }
2609   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2610                           CGM.getModule(), OMPRTL___kmpc_barrier),
2611                       Args);
2612 }
2613 
2614 /// Map the OpenMP loop schedule to the runtime enumeration.
2615 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2616                                           bool Chunked, bool Ordered) {
2617   switch (ScheduleKind) {
2618   case OMPC_SCHEDULE_static:
2619     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2620                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2621   case OMPC_SCHEDULE_dynamic:
2622     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2623   case OMPC_SCHEDULE_guided:
2624     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2625   case OMPC_SCHEDULE_runtime:
2626     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2627   case OMPC_SCHEDULE_auto:
2628     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2629   case OMPC_SCHEDULE_unknown:
2630     assert(!Chunked && "chunk was specified but schedule kind not known");
2631     return Ordered ? OMP_ord_static : OMP_sch_static;
2632   }
2633   llvm_unreachable("Unexpected runtime schedule");
2634 }
2635 
2636 /// Map the OpenMP distribute schedule to the runtime enumeration.
2637 static OpenMPSchedType
2638 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2639   // only static is allowed for dist_schedule
2640   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2641 }
2642 
2643 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2644                                          bool Chunked) const {
2645   OpenMPSchedType Schedule =
2646       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2647   return Schedule == OMP_sch_static;
2648 }
2649 
2650 bool CGOpenMPRuntime::isStaticNonchunked(
2651     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2652   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2653   return Schedule == OMP_dist_sch_static;
2654 }
2655 
2656 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2657                                       bool Chunked) const {
2658   OpenMPSchedType Schedule =
2659       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2660   return Schedule == OMP_sch_static_chunked;
2661 }
2662 
2663 bool CGOpenMPRuntime::isStaticChunked(
2664     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2665   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2666   return Schedule == OMP_dist_sch_static_chunked;
2667 }
2668 
2669 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2672   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2673   return Schedule != OMP_sch_static;
2674 }
2675 
2676 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2677                                   OpenMPScheduleClauseModifier M1,
2678                                   OpenMPScheduleClauseModifier M2) {
2679   int Modifier = 0;
2680   switch (M1) {
2681   case OMPC_SCHEDULE_MODIFIER_monotonic:
2682     Modifier = OMP_sch_modifier_monotonic;
2683     break;
2684   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2685     Modifier = OMP_sch_modifier_nonmonotonic;
2686     break;
2687   case OMPC_SCHEDULE_MODIFIER_simd:
2688     if (Schedule == OMP_sch_static_chunked)
2689       Schedule = OMP_sch_static_balanced_chunked;
2690     break;
2691   case OMPC_SCHEDULE_MODIFIER_last:
2692   case OMPC_SCHEDULE_MODIFIER_unknown:
2693     break;
2694   }
2695   switch (M2) {
2696   case OMPC_SCHEDULE_MODIFIER_monotonic:
2697     Modifier = OMP_sch_modifier_monotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2700     Modifier = OMP_sch_modifier_nonmonotonic;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_simd:
2703     if (Schedule == OMP_sch_static_chunked)
2704       Schedule = OMP_sch_static_balanced_chunked;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_last:
2707   case OMPC_SCHEDULE_MODIFIER_unknown:
2708     break;
2709   }
2710   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2711   // If the static schedule kind is specified or if the ordered clause is
2712   // specified, and if the nonmonotonic modifier is not specified, the effect is
2713   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2714   // modifier is specified, the effect is as if the nonmonotonic modifier is
2715   // specified.
2716   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2717     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2718           Schedule == OMP_sch_static_balanced_chunked ||
2719           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2720           Schedule == OMP_dist_sch_static_chunked ||
2721           Schedule == OMP_dist_sch_static))
2722       Modifier = OMP_sch_modifier_nonmonotonic;
2723   }
2724   return Schedule | Modifier;
2725 }
2726 
2727 void CGOpenMPRuntime::emitForDispatchInit(
2728     CodeGenFunction &CGF, SourceLocation Loc,
2729     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2730     bool Ordered, const DispatchRTInput &DispatchValues) {
2731   if (!CGF.HaveInsertPoint())
2732     return;
2733   OpenMPSchedType Schedule = getRuntimeSchedule(
2734       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2735   assert(Ordered ||
2736          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2737           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2738           Schedule != OMP_sch_static_balanced_chunked));
2739   // Call __kmpc_dispatch_init(
2740   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2741   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2742   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2743 
2744   // If the Chunk was not specified in the clause - use default value 1.
2745   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2746                                             : CGF.Builder.getIntN(IVSize, 1);
2747   llvm::Value *Args[] = {
2748       emitUpdateLocation(CGF, Loc),
2749       getThreadID(CGF, Loc),
2750       CGF.Builder.getInt32(addMonoNonMonoModifier(
2751           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2752       DispatchValues.LB,                                     // Lower
2753       DispatchValues.UB,                                     // Upper
2754       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2755       Chunk                                                  // Chunk
2756   };
2757   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2758 }
2759 
2760 static void emitForStaticInitCall(
2761     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2762     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2763     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2764     const CGOpenMPRuntime::StaticRTInput &Values) {
2765   if (!CGF.HaveInsertPoint())
2766     return;
2767 
2768   assert(!Values.Ordered);
2769   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2770          Schedule == OMP_sch_static_balanced_chunked ||
2771          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2772          Schedule == OMP_dist_sch_static ||
2773          Schedule == OMP_dist_sch_static_chunked);
2774 
2775   // Call __kmpc_for_static_init(
2776   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2777   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2778   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2779   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2780   llvm::Value *Chunk = Values.Chunk;
2781   if (Chunk == nullptr) {
2782     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2783             Schedule == OMP_dist_sch_static) &&
2784            "expected static non-chunked schedule");
2785     // If the Chunk was not specified in the clause - use default value 1.
2786     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2787   } else {
2788     assert((Schedule == OMP_sch_static_chunked ||
2789             Schedule == OMP_sch_static_balanced_chunked ||
2790             Schedule == OMP_ord_static_chunked ||
2791             Schedule == OMP_dist_sch_static_chunked) &&
2792            "expected static chunked schedule");
2793   }
2794   llvm::Value *Args[] = {
2795       UpdateLocation,
2796       ThreadId,
2797       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2798                                                   M2)), // Schedule type
2799       Values.IL.getPointer(),                           // &isLastIter
2800       Values.LB.getPointer(),                           // &LB
2801       Values.UB.getPointer(),                           // &UB
2802       Values.ST.getPointer(),                           // &Stride
2803       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2804       Chunk                                             // Chunk
2805   };
2806   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2807 }
2808 
2809 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2810                                         SourceLocation Loc,
2811                                         OpenMPDirectiveKind DKind,
2812                                         const OpenMPScheduleTy &ScheduleKind,
2813                                         const StaticRTInput &Values) {
2814   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2815       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2816   assert(isOpenMPWorksharingDirective(DKind) &&
2817          "Expected loop-based or sections-based directive.");
2818   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2819                                              isOpenMPLoopDirective(DKind)
2820                                                  ? OMP_IDENT_WORK_LOOP
2821                                                  : OMP_IDENT_WORK_SECTIONS);
2822   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2823   llvm::FunctionCallee StaticInitFunction =
2824       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2825   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2826   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2827                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2828 }
2829 
2830 void CGOpenMPRuntime::emitDistributeStaticInit(
2831     CodeGenFunction &CGF, SourceLocation Loc,
2832     OpenMPDistScheduleClauseKind SchedKind,
2833     const CGOpenMPRuntime::StaticRTInput &Values) {
2834   OpenMPSchedType ScheduleNum =
2835       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2836   llvm::Value *UpdatedLocation =
2837       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2838   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2839   llvm::FunctionCallee StaticInitFunction =
2840       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2841   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2842                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2843                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2844 }
2845 
2846 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2847                                           SourceLocation Loc,
2848                                           OpenMPDirectiveKind DKind) {
2849   if (!CGF.HaveInsertPoint())
2850     return;
2851   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2852   llvm::Value *Args[] = {
2853       emitUpdateLocation(CGF, Loc,
2854                          isOpenMPDistributeDirective(DKind)
2855                              ? OMP_IDENT_WORK_DISTRIBUTE
2856                              : isOpenMPLoopDirective(DKind)
2857                                    ? OMP_IDENT_WORK_LOOP
2858                                    : OMP_IDENT_WORK_SECTIONS),
2859       getThreadID(CGF, Loc)};
2860   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2861   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2862                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2863                       Args);
2864 }
2865 
2866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2867                                                  SourceLocation Loc,
2868                                                  unsigned IVSize,
2869                                                  bool IVSigned) {
2870   if (!CGF.HaveInsertPoint())
2871     return;
2872   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2873   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2874   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2875 }
2876 
2877 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2878                                           SourceLocation Loc, unsigned IVSize,
2879                                           bool IVSigned, Address IL,
2880                                           Address LB, Address UB,
2881                                           Address ST) {
2882   // Call __kmpc_dispatch_next(
2883   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2884   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2885   //          kmp_int[32|64] *p_stride);
2886   llvm::Value *Args[] = {
2887       emitUpdateLocation(CGF, Loc),
2888       getThreadID(CGF, Loc),
2889       IL.getPointer(), // &isLastIter
2890       LB.getPointer(), // &Lower
2891       UB.getPointer(), // &Upper
2892       ST.getPointer()  // &Stride
2893   };
2894   llvm::Value *Call =
2895       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2896   return CGF.EmitScalarConversion(
2897       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2898       CGF.getContext().BoolTy, Loc);
2899 }
2900 
2901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2902                                            llvm::Value *NumThreads,
2903                                            SourceLocation Loc) {
2904   if (!CGF.HaveInsertPoint())
2905     return;
2906   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2907   llvm::Value *Args[] = {
2908       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2909       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2910   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2911                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2912                       Args);
2913 }
2914 
2915 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2916                                          ProcBindKind ProcBind,
2917                                          SourceLocation Loc) {
2918   if (!CGF.HaveInsertPoint())
2919     return;
2920   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2921   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2922   llvm::Value *Args[] = {
2923       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2924       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2925   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2926                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2927                       Args);
2928 }
2929 
2930 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2931                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2932   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2933     OMPBuilder.createFlush(CGF.Builder);
2934   } else {
2935     if (!CGF.HaveInsertPoint())
2936       return;
2937     // Build call void __kmpc_flush(ident_t *loc)
2938     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2939                             CGM.getModule(), OMPRTL___kmpc_flush),
2940                         emitUpdateLocation(CGF, Loc));
2941   }
2942 }
2943 
2944 namespace {
2945 /// Indexes of fields for type kmp_task_t.
2946 enum KmpTaskTFields {
2947   /// List of shared variables.
2948   KmpTaskTShareds,
2949   /// Task routine.
2950   KmpTaskTRoutine,
2951   /// Partition id for the untied tasks.
2952   KmpTaskTPartId,
2953   /// Function with call of destructors for private variables.
2954   Data1,
2955   /// Task priority.
2956   Data2,
2957   /// (Taskloops only) Lower bound.
2958   KmpTaskTLowerBound,
2959   /// (Taskloops only) Upper bound.
2960   KmpTaskTUpperBound,
2961   /// (Taskloops only) Stride.
2962   KmpTaskTStride,
2963   /// (Taskloops only) Is last iteration flag.
2964   KmpTaskTLastIter,
2965   /// (Taskloops only) Reduction data.
2966   KmpTaskTReductions,
2967 };
2968 } // anonymous namespace
2969 
2970 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2971   return OffloadEntriesTargetRegion.empty() &&
2972          OffloadEntriesDeviceGlobalVar.empty();
2973 }
2974 
2975 /// Initialize target region entry.
2976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2977     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2978                                     StringRef ParentName, unsigned LineNum,
2979                                     unsigned Order) {
2980   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2981                                              "only required for the device "
2982                                              "code generation.");
2983   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2984       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2985                                    OMPTargetRegionEntryTargetRegion);
2986   ++OffloadingEntriesNum;
2987 }
2988 
2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2990     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2991                                   StringRef ParentName, unsigned LineNum,
2992                                   llvm::Constant *Addr, llvm::Constant *ID,
2993                                   OMPTargetRegionEntryKind Flags) {
2994   // If we are emitting code for a target, the entry is already initialized,
2995   // only has to be registered.
2996   if (CGM.getLangOpts().OpenMPIsDevice) {
2997     // This could happen if the device compilation is invoked standalone.
2998     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2999       return;
3000     auto &Entry =
3001         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3002     Entry.setAddress(Addr);
3003     Entry.setID(ID);
3004     Entry.setFlags(Flags);
3005   } else {
3006     if (Flags ==
3007             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3008         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3009                                  /*IgnoreAddressId*/ true))
3010       return;
3011     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3012            "Target region entry already registered!");
3013     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3014     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3015     ++OffloadingEntriesNum;
3016   }
3017 }
3018 
3019 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3020     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3021     bool IgnoreAddressId) const {
3022   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3023   if (PerDevice == OffloadEntriesTargetRegion.end())
3024     return false;
3025   auto PerFile = PerDevice->second.find(FileID);
3026   if (PerFile == PerDevice->second.end())
3027     return false;
3028   auto PerParentName = PerFile->second.find(ParentName);
3029   if (PerParentName == PerFile->second.end())
3030     return false;
3031   auto PerLine = PerParentName->second.find(LineNum);
3032   if (PerLine == PerParentName->second.end())
3033     return false;
3034   // Fail if this entry is already registered.
3035   if (!IgnoreAddressId &&
3036       (PerLine->second.getAddress() || PerLine->second.getID()))
3037     return false;
3038   return true;
3039 }
3040 
3041 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3042     const OffloadTargetRegionEntryInfoActTy &Action) {
3043   // Scan all target region entries and perform the provided action.
3044   for (const auto &D : OffloadEntriesTargetRegion)
3045     for (const auto &F : D.second)
3046       for (const auto &P : F.second)
3047         for (const auto &L : P.second)
3048           Action(D.first, F.first, P.first(), L.first, L.second);
3049 }
3050 
3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3052     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3053                                        OMPTargetGlobalVarEntryKind Flags,
3054                                        unsigned Order) {
3055   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3056                                              "only required for the device "
3057                                              "code generation.");
3058   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3059   ++OffloadingEntriesNum;
3060 }
3061 
3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3063     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3064                                      CharUnits VarSize,
3065                                      OMPTargetGlobalVarEntryKind Flags,
3066                                      llvm::GlobalValue::LinkageTypes Linkage) {
3067   if (CGM.getLangOpts().OpenMPIsDevice) {
3068     // This could happen if the device compilation is invoked standalone.
3069     if (!hasDeviceGlobalVarEntryInfo(VarName))
3070       return;
3071     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3072     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3073       if (Entry.getVarSize().isZero()) {
3074         Entry.setVarSize(VarSize);
3075         Entry.setLinkage(Linkage);
3076       }
3077       return;
3078     }
3079     Entry.setVarSize(VarSize);
3080     Entry.setLinkage(Linkage);
3081     Entry.setAddress(Addr);
3082   } else {
3083     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3084       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3085       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3086              "Entry not initialized!");
3087       if (Entry.getVarSize().isZero()) {
3088         Entry.setVarSize(VarSize);
3089         Entry.setLinkage(Linkage);
3090       }
3091       return;
3092     }
3093     OffloadEntriesDeviceGlobalVar.try_emplace(
3094         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3095     ++OffloadingEntriesNum;
3096   }
3097 }
3098 
3099 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3100     actOnDeviceGlobalVarEntriesInfo(
3101         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3102   // Scan all target region entries and perform the provided action.
3103   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3104     Action(E.getKey(), E.getValue());
3105 }
3106 
3107 void CGOpenMPRuntime::createOffloadEntry(
3108     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3109     llvm::GlobalValue::LinkageTypes Linkage) {
3110   StringRef Name = Addr->getName();
3111   llvm::Module &M = CGM.getModule();
3112   llvm::LLVMContext &C = M.getContext();
3113 
3114   // Create constant string with the name.
3115   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3116 
3117   std::string StringName = getName({"omp_offloading", "entry_name"});
3118   auto *Str = new llvm::GlobalVariable(
3119       M, StrPtrInit->getType(), /*isConstant=*/true,
3120       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3121   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3122 
3123   llvm::Constant *Data[] = {
3124       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3125       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3126       llvm::ConstantInt::get(CGM.SizeTy, Size),
3127       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3128       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3129   std::string EntryName = getName({"omp_offloading", "entry", ""});
3130   llvm::GlobalVariable *Entry = createGlobalStruct(
3131       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3132       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3133 
3134   // The entry has to be created in the section the linker expects it to be.
3135   Entry->setSection("omp_offloading_entries");
3136 }
3137 
3138 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3139   // Emit the offloading entries and metadata so that the device codegen side
3140   // can easily figure out what to emit. The produced metadata looks like
3141   // this:
3142   //
3143   // !omp_offload.info = !{!1, ...}
3144   //
3145   // Right now we only generate metadata for function that contain target
3146   // regions.
3147 
3148   // If we are in simd mode or there are no entries, we don't need to do
3149   // anything.
3150   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3151     return;
3152 
3153   llvm::Module &M = CGM.getModule();
3154   llvm::LLVMContext &C = M.getContext();
3155   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3156                          SourceLocation, StringRef>,
3157               16>
3158       OrderedEntries(OffloadEntriesInfoManager.size());
3159   llvm::SmallVector<StringRef, 16> ParentFunctions(
3160       OffloadEntriesInfoManager.size());
3161 
3162   // Auxiliary methods to create metadata values and strings.
3163   auto &&GetMDInt = [this](unsigned V) {
3164     return llvm::ConstantAsMetadata::get(
3165         llvm::ConstantInt::get(CGM.Int32Ty, V));
3166   };
3167 
3168   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3169 
3170   // Create the offloading info metadata node.
3171   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3172 
3173   // Create function that emits metadata for each target region entry;
3174   auto &&TargetRegionMetadataEmitter =
3175       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3176        &GetMDString](
3177           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3178           unsigned Line,
3179           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3180         // Generate metadata for target regions. Each entry of this metadata
3181         // contains:
3182         // - Entry 0 -> Kind of this type of metadata (0).
3183         // - Entry 1 -> Device ID of the file where the entry was identified.
3184         // - Entry 2 -> File ID of the file where the entry was identified.
3185         // - Entry 3 -> Mangled name of the function where the entry was
3186         // identified.
3187         // - Entry 4 -> Line in the file where the entry was identified.
3188         // - Entry 5 -> Order the entry was created.
3189         // The first element of the metadata node is the kind.
3190         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3191                                  GetMDInt(FileID),      GetMDString(ParentName),
3192                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3193 
3194         SourceLocation Loc;
3195         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3196                   E = CGM.getContext().getSourceManager().fileinfo_end();
3197              I != E; ++I) {
3198           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3199               I->getFirst()->getUniqueID().getFile() == FileID) {
3200             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3201                 I->getFirst(), Line, 1);
3202             break;
3203           }
3204         }
3205         // Save this entry in the right position of the ordered entries array.
3206         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3207         ParentFunctions[E.getOrder()] = ParentName;
3208 
3209         // Add metadata to the named metadata node.
3210         MD->addOperand(llvm::MDNode::get(C, Ops));
3211       };
3212 
3213   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3214       TargetRegionMetadataEmitter);
3215 
3216   // Create function that emits metadata for each device global variable entry;
3217   auto &&DeviceGlobalVarMetadataEmitter =
3218       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3219        MD](StringRef MangledName,
3220            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3221                &E) {
3222         // Generate metadata for global variables. Each entry of this metadata
3223         // contains:
3224         // - Entry 0 -> Kind of this type of metadata (1).
3225         // - Entry 1 -> Mangled name of the variable.
3226         // - Entry 2 -> Declare target kind.
3227         // - Entry 3 -> Order the entry was created.
3228         // The first element of the metadata node is the kind.
3229         llvm::Metadata *Ops[] = {
3230             GetMDInt(E.getKind()), GetMDString(MangledName),
3231             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3232 
3233         // Save this entry in the right position of the ordered entries array.
3234         OrderedEntries[E.getOrder()] =
3235             std::make_tuple(&E, SourceLocation(), MangledName);
3236 
3237         // Add metadata to the named metadata node.
3238         MD->addOperand(llvm::MDNode::get(C, Ops));
3239       };
3240 
3241   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3242       DeviceGlobalVarMetadataEmitter);
3243 
3244   for (const auto &E : OrderedEntries) {
3245     assert(std::get<0>(E) && "All ordered entries must exist!");
3246     if (const auto *CE =
3247             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3248                 std::get<0>(E))) {
3249       if (!CE->getID() || !CE->getAddress()) {
3250         // Do not blame the entry if the parent funtion is not emitted.
3251         StringRef FnName = ParentFunctions[CE->getOrder()];
3252         if (!CGM.GetGlobalValue(FnName))
3253           continue;
3254         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3255             DiagnosticsEngine::Error,
3256             "Offloading entry for target region in %0 is incorrect: either the "
3257             "address or the ID is invalid.");
3258         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3259         continue;
3260       }
3261       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3262                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3263     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3264                                              OffloadEntryInfoDeviceGlobalVar>(
3265                    std::get<0>(E))) {
3266       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3267           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3268               CE->getFlags());
3269       switch (Flags) {
3270       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3271         if (CGM.getLangOpts().OpenMPIsDevice &&
3272             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3273           continue;
3274         if (!CE->getAddress()) {
3275           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3276               DiagnosticsEngine::Error, "Offloading entry for declare target "
3277                                         "variable %0 is incorrect: the "
3278                                         "address is invalid.");
3279           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3280           continue;
3281         }
3282         // The vaiable has no definition - no need to add the entry.
3283         if (CE->getVarSize().isZero())
3284           continue;
3285         break;
3286       }
3287       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3288         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3289                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3290                "Declaret target link address is set.");
3291         if (CGM.getLangOpts().OpenMPIsDevice)
3292           continue;
3293         if (!CE->getAddress()) {
3294           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3295               DiagnosticsEngine::Error,
3296               "Offloading entry for declare target variable is incorrect: the "
3297               "address is invalid.");
3298           CGM.getDiags().Report(DiagID);
3299           continue;
3300         }
3301         break;
3302       }
3303       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3304                          CE->getVarSize().getQuantity(), Flags,
3305                          CE->getLinkage());
3306     } else {
3307       llvm_unreachable("Unsupported entry kind.");
3308     }
3309   }
3310 }
3311 
3312 /// Loads all the offload entries information from the host IR
3313 /// metadata.
3314 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3315   // If we are in target mode, load the metadata from the host IR. This code has
3316   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3317 
3318   if (!CGM.getLangOpts().OpenMPIsDevice)
3319     return;
3320 
3321   if (CGM.getLangOpts().OMPHostIRFile.empty())
3322     return;
3323 
3324   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3325   if (auto EC = Buf.getError()) {
3326     CGM.getDiags().Report(diag::err_cannot_open_file)
3327         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3328     return;
3329   }
3330 
3331   llvm::LLVMContext C;
3332   auto ME = expectedToErrorOrAndEmitErrors(
3333       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3334 
3335   if (auto EC = ME.getError()) {
3336     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3337         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3338     CGM.getDiags().Report(DiagID)
3339         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3340     return;
3341   }
3342 
3343   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3344   if (!MD)
3345     return;
3346 
3347   for (llvm::MDNode *MN : MD->operands()) {
3348     auto &&GetMDInt = [MN](unsigned Idx) {
3349       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3350       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3351     };
3352 
3353     auto &&GetMDString = [MN](unsigned Idx) {
3354       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3355       return V->getString();
3356     };
3357 
3358     switch (GetMDInt(0)) {
3359     default:
3360       llvm_unreachable("Unexpected metadata!");
3361       break;
3362     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3363         OffloadingEntryInfoTargetRegion:
3364       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3365           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3366           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3367           /*Order=*/GetMDInt(5));
3368       break;
3369     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3370         OffloadingEntryInfoDeviceGlobalVar:
3371       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3372           /*MangledName=*/GetMDString(1),
3373           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3374               /*Flags=*/GetMDInt(2)),
3375           /*Order=*/GetMDInt(3));
3376       break;
3377     }
3378   }
3379 }
3380 
3381 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3382   if (!KmpRoutineEntryPtrTy) {
3383     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3384     ASTContext &C = CGM.getContext();
3385     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3386     FunctionProtoType::ExtProtoInfo EPI;
3387     KmpRoutineEntryPtrQTy = C.getPointerType(
3388         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3389     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3390   }
3391 }
3392 
3393 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3394   // Make sure the type of the entry is already created. This is the type we
3395   // have to create:
3396   // struct __tgt_offload_entry{
3397   //   void      *addr;       // Pointer to the offload entry info.
3398   //                          // (function or global)
3399   //   char      *name;       // Name of the function or global.
3400   //   size_t     size;       // Size of the entry info (0 if it a function).
3401   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3402   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3403   // };
3404   if (TgtOffloadEntryQTy.isNull()) {
3405     ASTContext &C = CGM.getContext();
3406     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3407     RD->startDefinition();
3408     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3409     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3410     addFieldToRecordDecl(C, RD, C.getSizeType());
3411     addFieldToRecordDecl(
3412         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3413     addFieldToRecordDecl(
3414         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3415     RD->completeDefinition();
3416     RD->addAttr(PackedAttr::CreateImplicit(C));
3417     TgtOffloadEntryQTy = C.getRecordType(RD);
3418   }
3419   return TgtOffloadEntryQTy;
3420 }
3421 
3422 namespace {
3423 struct PrivateHelpersTy {
3424   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3425                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3426       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3427         PrivateElemInit(PrivateElemInit) {}
3428   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3429   const Expr *OriginalRef = nullptr;
3430   const VarDecl *Original = nullptr;
3431   const VarDecl *PrivateCopy = nullptr;
3432   const VarDecl *PrivateElemInit = nullptr;
3433   bool isLocalPrivate() const {
3434     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3435   }
3436 };
3437 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3438 } // anonymous namespace
3439 
3440 static bool isAllocatableDecl(const VarDecl *VD) {
3441   const VarDecl *CVD = VD->getCanonicalDecl();
3442   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3443     return false;
3444   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3445   // Use the default allocation.
3446   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3447             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3448            !AA->getAllocator());
3449 }
3450 
3451 static RecordDecl *
3452 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3453   if (!Privates.empty()) {
3454     ASTContext &C = CGM.getContext();
3455     // Build struct .kmp_privates_t. {
3456     //         /*  private vars  */
3457     //       };
3458     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3459     RD->startDefinition();
3460     for (const auto &Pair : Privates) {
3461       const VarDecl *VD = Pair.second.Original;
3462       QualType Type = VD->getType().getNonReferenceType();
3463       // If the private variable is a local variable with lvalue ref type,
3464       // allocate the pointer instead of the pointee type.
3465       if (Pair.second.isLocalPrivate()) {
3466         if (VD->getType()->isLValueReferenceType())
3467           Type = C.getPointerType(Type);
3468         if (isAllocatableDecl(VD))
3469           Type = C.getPointerType(Type);
3470       }
3471       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3472       if (VD->hasAttrs()) {
3473         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3474              E(VD->getAttrs().end());
3475              I != E; ++I)
3476           FD->addAttr(*I);
3477       }
3478     }
3479     RD->completeDefinition();
3480     return RD;
3481   }
3482   return nullptr;
3483 }
3484 
3485 static RecordDecl *
3486 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3487                          QualType KmpInt32Ty,
3488                          QualType KmpRoutineEntryPointerQTy) {
3489   ASTContext &C = CGM.getContext();
3490   // Build struct kmp_task_t {
3491   //         void *              shareds;
3492   //         kmp_routine_entry_t routine;
3493   //         kmp_int32           part_id;
3494   //         kmp_cmplrdata_t data1;
3495   //         kmp_cmplrdata_t data2;
3496   // For taskloops additional fields:
3497   //         kmp_uint64          lb;
3498   //         kmp_uint64          ub;
3499   //         kmp_int64           st;
3500   //         kmp_int32           liter;
3501   //         void *              reductions;
3502   //       };
3503   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3504   UD->startDefinition();
3505   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3506   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3507   UD->completeDefinition();
3508   QualType KmpCmplrdataTy = C.getRecordType(UD);
3509   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3510   RD->startDefinition();
3511   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3512   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3513   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3514   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3515   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3516   if (isOpenMPTaskLoopDirective(Kind)) {
3517     QualType KmpUInt64Ty =
3518         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3519     QualType KmpInt64Ty =
3520         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3521     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3522     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3523     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3524     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3525     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3526   }
3527   RD->completeDefinition();
3528   return RD;
3529 }
3530 
3531 static RecordDecl *
3532 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3533                                      ArrayRef<PrivateDataTy> Privates) {
3534   ASTContext &C = CGM.getContext();
3535   // Build struct kmp_task_t_with_privates {
3536   //         kmp_task_t task_data;
3537   //         .kmp_privates_t. privates;
3538   //       };
3539   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3540   RD->startDefinition();
3541   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3542   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3543     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3544   RD->completeDefinition();
3545   return RD;
3546 }
3547 
3548 /// Emit a proxy function which accepts kmp_task_t as the second
3549 /// argument.
3550 /// \code
3551 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3552 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3553 ///   For taskloops:
3554 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3555 ///   tt->reductions, tt->shareds);
3556 ///   return 0;
3557 /// }
3558 /// \endcode
3559 static llvm::Function *
3560 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3561                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3562                       QualType KmpTaskTWithPrivatesPtrQTy,
3563                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3564                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3565                       llvm::Value *TaskPrivatesMap) {
3566   ASTContext &C = CGM.getContext();
3567   FunctionArgList Args;
3568   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3569                             ImplicitParamDecl::Other);
3570   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3571                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3572                                 ImplicitParamDecl::Other);
3573   Args.push_back(&GtidArg);
3574   Args.push_back(&TaskTypeArg);
3575   const auto &TaskEntryFnInfo =
3576       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3577   llvm::FunctionType *TaskEntryTy =
3578       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3579   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3580   auto *TaskEntry = llvm::Function::Create(
3581       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3582   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3583   TaskEntry->setDoesNotRecurse();
3584   CodeGenFunction CGF(CGM);
3585   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3586                     Loc, Loc);
3587 
3588   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3589   // tt,
3590   // For taskloops:
3591   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3592   // tt->task_data.shareds);
3593   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3594       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3595   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3596       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3597       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3598   const auto *KmpTaskTWithPrivatesQTyRD =
3599       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3600   LValue Base =
3601       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3602   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3603   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3604   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3605   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3606 
3607   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3608   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3609   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3610       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3611       CGF.ConvertTypeForMem(SharedsPtrTy));
3612 
3613   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3614   llvm::Value *PrivatesParam;
3615   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3616     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3617     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3618         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3619   } else {
3620     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3621   }
3622 
3623   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3624                                TaskPrivatesMap,
3625                                CGF.Builder
3626                                    .CreatePointerBitCastOrAddrSpaceCast(
3627                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3628                                    .getPointer()};
3629   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3630                                           std::end(CommonArgs));
3631   if (isOpenMPTaskLoopDirective(Kind)) {
3632     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3633     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3634     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3635     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3636     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3637     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3638     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3639     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3640     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3641     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3642     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3643     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3644     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3645     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3646     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3647     CallArgs.push_back(LBParam);
3648     CallArgs.push_back(UBParam);
3649     CallArgs.push_back(StParam);
3650     CallArgs.push_back(LIParam);
3651     CallArgs.push_back(RParam);
3652   }
3653   CallArgs.push_back(SharedsParam);
3654 
3655   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3656                                                   CallArgs);
3657   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3658                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3659   CGF.FinishFunction();
3660   return TaskEntry;
3661 }
3662 
3663 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3664                                             SourceLocation Loc,
3665                                             QualType KmpInt32Ty,
3666                                             QualType KmpTaskTWithPrivatesPtrQTy,
3667                                             QualType KmpTaskTWithPrivatesQTy) {
3668   ASTContext &C = CGM.getContext();
3669   FunctionArgList Args;
3670   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3671                             ImplicitParamDecl::Other);
3672   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3673                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3674                                 ImplicitParamDecl::Other);
3675   Args.push_back(&GtidArg);
3676   Args.push_back(&TaskTypeArg);
3677   const auto &DestructorFnInfo =
3678       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3679   llvm::FunctionType *DestructorFnTy =
3680       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3681   std::string Name =
3682       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3683   auto *DestructorFn =
3684       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3685                              Name, &CGM.getModule());
3686   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3687                                     DestructorFnInfo);
3688   DestructorFn->setDoesNotRecurse();
3689   CodeGenFunction CGF(CGM);
3690   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3691                     Args, Loc, Loc);
3692 
3693   LValue Base = CGF.EmitLoadOfPointerLValue(
3694       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3695       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3696   const auto *KmpTaskTWithPrivatesQTyRD =
3697       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3698   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3699   Base = CGF.EmitLValueForField(Base, *FI);
3700   for (const auto *Field :
3701        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3702     if (QualType::DestructionKind DtorKind =
3703             Field->getType().isDestructedType()) {
3704       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3705       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3706     }
3707   }
3708   CGF.FinishFunction();
3709   return DestructorFn;
3710 }
3711 
3712 /// Emit a privates mapping function for correct handling of private and
3713 /// firstprivate variables.
3714 /// \code
3715 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3716 /// **noalias priv1,...,  <tyn> **noalias privn) {
3717 ///   *priv1 = &.privates.priv1;
3718 ///   ...;
3719 ///   *privn = &.privates.privn;
3720 /// }
3721 /// \endcode
3722 static llvm::Value *
3723 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3724                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3725                                ArrayRef<PrivateDataTy> Privates) {
3726   ASTContext &C = CGM.getContext();
3727   FunctionArgList Args;
3728   ImplicitParamDecl TaskPrivatesArg(
3729       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3731       ImplicitParamDecl::Other);
3732   Args.push_back(&TaskPrivatesArg);
3733   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3734   unsigned Counter = 1;
3735   for (const Expr *E : Data.PrivateVars) {
3736     Args.push_back(ImplicitParamDecl::Create(
3737         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3738         C.getPointerType(C.getPointerType(E->getType()))
3739             .withConst()
3740             .withRestrict(),
3741         ImplicitParamDecl::Other));
3742     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3743     PrivateVarsPos[VD] = Counter;
3744     ++Counter;
3745   }
3746   for (const Expr *E : Data.FirstprivateVars) {
3747     Args.push_back(ImplicitParamDecl::Create(
3748         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3749         C.getPointerType(C.getPointerType(E->getType()))
3750             .withConst()
3751             .withRestrict(),
3752         ImplicitParamDecl::Other));
3753     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3754     PrivateVarsPos[VD] = Counter;
3755     ++Counter;
3756   }
3757   for (const Expr *E : Data.LastprivateVars) {
3758     Args.push_back(ImplicitParamDecl::Create(
3759         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3760         C.getPointerType(C.getPointerType(E->getType()))
3761             .withConst()
3762             .withRestrict(),
3763         ImplicitParamDecl::Other));
3764     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3765     PrivateVarsPos[VD] = Counter;
3766     ++Counter;
3767   }
3768   for (const VarDecl *VD : Data.PrivateLocals) {
3769     QualType Ty = VD->getType().getNonReferenceType();
3770     if (VD->getType()->isLValueReferenceType())
3771       Ty = C.getPointerType(Ty);
3772     if (isAllocatableDecl(VD))
3773       Ty = C.getPointerType(Ty);
3774     Args.push_back(ImplicitParamDecl::Create(
3775         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3776         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3777         ImplicitParamDecl::Other));
3778     PrivateVarsPos[VD] = Counter;
3779     ++Counter;
3780   }
3781   const auto &TaskPrivatesMapFnInfo =
3782       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3783   llvm::FunctionType *TaskPrivatesMapTy =
3784       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3785   std::string Name =
3786       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3787   auto *TaskPrivatesMap = llvm::Function::Create(
3788       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3789       &CGM.getModule());
3790   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3791                                     TaskPrivatesMapFnInfo);
3792   if (CGM.getLangOpts().Optimize) {
3793     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3794     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3795     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3796   }
3797   CodeGenFunction CGF(CGM);
3798   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3799                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3800 
3801   // *privi = &.privates.privi;
3802   LValue Base = CGF.EmitLoadOfPointerLValue(
3803       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3804       TaskPrivatesArg.getType()->castAs<PointerType>());
3805   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3806   Counter = 0;
3807   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3808     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3809     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3810     LValue RefLVal =
3811         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3812     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3813         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3814     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3815     ++Counter;
3816   }
3817   CGF.FinishFunction();
3818   return TaskPrivatesMap;
3819 }
3820 
3821 /// Emit initialization for private variables in task-based directives.
3822 static void emitPrivatesInit(CodeGenFunction &CGF,
3823                              const OMPExecutableDirective &D,
3824                              Address KmpTaskSharedsPtr, LValue TDBase,
3825                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3826                              QualType SharedsTy, QualType SharedsPtrTy,
3827                              const OMPTaskDataTy &Data,
3828                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3829   ASTContext &C = CGF.getContext();
3830   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3831   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3832   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3833                                  ? OMPD_taskloop
3834                                  : OMPD_task;
3835   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3836   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3837   LValue SrcBase;
3838   bool IsTargetTask =
3839       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3840       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3841   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3842   // PointersArray, SizesArray, and MappersArray. The original variables for
3843   // these arrays are not captured and we get their addresses explicitly.
3844   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3845       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3846     SrcBase = CGF.MakeAddrLValue(
3847         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3848             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3849         SharedsTy);
3850   }
3851   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3852   for (const PrivateDataTy &Pair : Privates) {
3853     // Do not initialize private locals.
3854     if (Pair.second.isLocalPrivate()) {
3855       ++FI;
3856       continue;
3857     }
3858     const VarDecl *VD = Pair.second.PrivateCopy;
3859     const Expr *Init = VD->getAnyInitializer();
3860     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3861                              !CGF.isTrivialInitializer(Init)))) {
3862       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3863       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3864         const VarDecl *OriginalVD = Pair.second.Original;
3865         // Check if the variable is the target-based BasePointersArray,
3866         // PointersArray, SizesArray, or MappersArray.
3867         LValue SharedRefLValue;
3868         QualType Type = PrivateLValue.getType();
3869         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3870         if (IsTargetTask && !SharedField) {
3871           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3872                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3873                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3874                          ->getNumParams() == 0 &&
3875                  isa<TranslationUnitDecl>(
3876                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3877                          ->getDeclContext()) &&
3878                  "Expected artificial target data variable.");
3879           SharedRefLValue =
3880               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3881         } else if (ForDup) {
3882           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3883           SharedRefLValue = CGF.MakeAddrLValue(
3884               Address(SharedRefLValue.getPointer(CGF),
3885                       C.getDeclAlign(OriginalVD)),
3886               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3887               SharedRefLValue.getTBAAInfo());
3888         } else if (CGF.LambdaCaptureFields.count(
3889                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3890                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3891           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3892         } else {
3893           // Processing for implicitly captured variables.
3894           InlinedOpenMPRegionRAII Region(
3895               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3896               /*HasCancel=*/false, /*NoInheritance=*/true);
3897           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3898         }
3899         if (Type->isArrayType()) {
3900           // Initialize firstprivate array.
3901           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3902             // Perform simple memcpy.
3903             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3904           } else {
3905             // Initialize firstprivate array using element-by-element
3906             // initialization.
3907             CGF.EmitOMPAggregateAssign(
3908                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3909                 Type,
3910                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3911                                                   Address SrcElement) {
3912                   // Clean up any temporaries needed by the initialization.
3913                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3914                   InitScope.addPrivate(
3915                       Elem, [SrcElement]() -> Address { return SrcElement; });
3916                   (void)InitScope.Privatize();
3917                   // Emit initialization for single element.
3918                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3919                       CGF, &CapturesInfo);
3920                   CGF.EmitAnyExprToMem(Init, DestElement,
3921                                        Init->getType().getQualifiers(),
3922                                        /*IsInitializer=*/false);
3923                 });
3924           }
3925         } else {
3926           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3927           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3928             return SharedRefLValue.getAddress(CGF);
3929           });
3930           (void)InitScope.Privatize();
3931           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3932           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3933                              /*capturedByInit=*/false);
3934         }
3935       } else {
3936         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3937       }
3938     }
3939     ++FI;
3940   }
3941 }
3942 
3943 /// Check if duplication function is required for taskloops.
3944 static bool checkInitIsRequired(CodeGenFunction &CGF,
3945                                 ArrayRef<PrivateDataTy> Privates) {
3946   bool InitRequired = false;
3947   for (const PrivateDataTy &Pair : Privates) {
3948     if (Pair.second.isLocalPrivate())
3949       continue;
3950     const VarDecl *VD = Pair.second.PrivateCopy;
3951     const Expr *Init = VD->getAnyInitializer();
3952     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3953                                     !CGF.isTrivialInitializer(Init));
3954     if (InitRequired)
3955       break;
3956   }
3957   return InitRequired;
3958 }
3959 
3960 
3961 /// Emit task_dup function (for initialization of
3962 /// private/firstprivate/lastprivate vars and last_iter flag)
3963 /// \code
3964 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3965 /// lastpriv) {
3966 /// // setup lastprivate flag
3967 ///    task_dst->last = lastpriv;
3968 /// // could be constructor calls here...
3969 /// }
3970 /// \endcode
3971 static llvm::Value *
3972 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3973                     const OMPExecutableDirective &D,
3974                     QualType KmpTaskTWithPrivatesPtrQTy,
3975                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3976                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3977                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3978                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3979   ASTContext &C = CGM.getContext();
3980   FunctionArgList Args;
3981   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3982                            KmpTaskTWithPrivatesPtrQTy,
3983                            ImplicitParamDecl::Other);
3984   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3985                            KmpTaskTWithPrivatesPtrQTy,
3986                            ImplicitParamDecl::Other);
3987   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3988                                 ImplicitParamDecl::Other);
3989   Args.push_back(&DstArg);
3990   Args.push_back(&SrcArg);
3991   Args.push_back(&LastprivArg);
3992   const auto &TaskDupFnInfo =
3993       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3994   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3995   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3996   auto *TaskDup = llvm::Function::Create(
3997       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3998   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3999   TaskDup->setDoesNotRecurse();
4000   CodeGenFunction CGF(CGM);
4001   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4002                     Loc);
4003 
4004   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4005       CGF.GetAddrOfLocalVar(&DstArg),
4006       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4007   // task_dst->liter = lastpriv;
4008   if (WithLastIter) {
4009     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4010     LValue Base = CGF.EmitLValueForField(
4011         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4012     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4013     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4014         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4015     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4016   }
4017 
4018   // Emit initial values for private copies (if any).
4019   assert(!Privates.empty());
4020   Address KmpTaskSharedsPtr = Address::invalid();
4021   if (!Data.FirstprivateVars.empty()) {
4022     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4023         CGF.GetAddrOfLocalVar(&SrcArg),
4024         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4025     LValue Base = CGF.EmitLValueForField(
4026         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4027     KmpTaskSharedsPtr = Address(
4028         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4029                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4030                                                   KmpTaskTShareds)),
4031                              Loc),
4032         CGM.getNaturalTypeAlignment(SharedsTy));
4033   }
4034   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4035                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4036   CGF.FinishFunction();
4037   return TaskDup;
4038 }
4039 
4040 /// Checks if destructor function is required to be generated.
4041 /// \return true if cleanups are required, false otherwise.
4042 static bool
4043 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4044                          ArrayRef<PrivateDataTy> Privates) {
4045   for (const PrivateDataTy &P : Privates) {
4046     if (P.second.isLocalPrivate())
4047       continue;
4048     QualType Ty = P.second.Original->getType().getNonReferenceType();
4049     if (Ty.isDestructedType())
4050       return true;
4051   }
4052   return false;
4053 }
4054 
4055 namespace {
4056 /// Loop generator for OpenMP iterator expression.
4057 class OMPIteratorGeneratorScope final
4058     : public CodeGenFunction::OMPPrivateScope {
4059   CodeGenFunction &CGF;
4060   const OMPIteratorExpr *E = nullptr;
4061   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4062   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4063   OMPIteratorGeneratorScope() = delete;
4064   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4065 
4066 public:
4067   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4068       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4069     if (!E)
4070       return;
4071     SmallVector<llvm::Value *, 4> Uppers;
4072     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4073       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4074       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4075       addPrivate(VD, [&CGF, VD]() {
4076         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4077       });
4078       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4079       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4080         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4081                                  "counter.addr");
4082       });
4083     }
4084     Privatize();
4085 
4086     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4087       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4088       LValue CLVal =
4089           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4090                              HelperData.CounterVD->getType());
4091       // Counter = 0;
4092       CGF.EmitStoreOfScalar(
4093           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4094           CLVal);
4095       CodeGenFunction::JumpDest &ContDest =
4096           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4097       CodeGenFunction::JumpDest &ExitDest =
4098           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4099       // N = <number-of_iterations>;
4100       llvm::Value *N = Uppers[I];
4101       // cont:
4102       // if (Counter < N) goto body; else goto exit;
4103       CGF.EmitBlock(ContDest.getBlock());
4104       auto *CVal =
4105           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4106       llvm::Value *Cmp =
4107           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4108               ? CGF.Builder.CreateICmpSLT(CVal, N)
4109               : CGF.Builder.CreateICmpULT(CVal, N);
4110       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4111       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4112       // body:
4113       CGF.EmitBlock(BodyBB);
4114       // Iteri = Begini + Counter * Stepi;
4115       CGF.EmitIgnoredExpr(HelperData.Update);
4116     }
4117   }
4118   ~OMPIteratorGeneratorScope() {
4119     if (!E)
4120       return;
4121     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4122       // Counter = Counter + 1;
4123       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4124       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4125       // goto cont;
4126       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4127       // exit:
4128       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4129     }
4130   }
4131 };
4132 } // namespace
4133 
4134 static std::pair<llvm::Value *, llvm::Value *>
4135 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4136   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4137   llvm::Value *Addr;
4138   if (OASE) {
4139     const Expr *Base = OASE->getBase();
4140     Addr = CGF.EmitScalarExpr(Base);
4141   } else {
4142     Addr = CGF.EmitLValue(E).getPointer(CGF);
4143   }
4144   llvm::Value *SizeVal;
4145   QualType Ty = E->getType();
4146   if (OASE) {
4147     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4148     for (const Expr *SE : OASE->getDimensions()) {
4149       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4150       Sz = CGF.EmitScalarConversion(
4151           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4152       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4153     }
4154   } else if (const auto *ASE =
4155                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4156     LValue UpAddrLVal =
4157         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4158     llvm::Value *UpAddr =
4159         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4160     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4161     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4162     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4163   } else {
4164     SizeVal = CGF.getTypeSize(Ty);
4165   }
4166   return std::make_pair(Addr, SizeVal);
4167 }
4168 
4169 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4170 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4171   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4172   if (KmpTaskAffinityInfoTy.isNull()) {
4173     RecordDecl *KmpAffinityInfoRD =
4174         C.buildImplicitRecord("kmp_task_affinity_info_t");
4175     KmpAffinityInfoRD->startDefinition();
4176     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4177     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4178     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4179     KmpAffinityInfoRD->completeDefinition();
4180     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4181   }
4182 }
4183 
4184 CGOpenMPRuntime::TaskResultTy
4185 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4186                               const OMPExecutableDirective &D,
4187                               llvm::Function *TaskFunction, QualType SharedsTy,
4188                               Address Shareds, const OMPTaskDataTy &Data) {
4189   ASTContext &C = CGM.getContext();
4190   llvm::SmallVector<PrivateDataTy, 4> Privates;
4191   // Aggregate privates and sort them by the alignment.
4192   const auto *I = Data.PrivateCopies.begin();
4193   for (const Expr *E : Data.PrivateVars) {
4194     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4195     Privates.emplace_back(
4196         C.getDeclAlign(VD),
4197         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4198                          /*PrivateElemInit=*/nullptr));
4199     ++I;
4200   }
4201   I = Data.FirstprivateCopies.begin();
4202   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4203   for (const Expr *E : Data.FirstprivateVars) {
4204     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4205     Privates.emplace_back(
4206         C.getDeclAlign(VD),
4207         PrivateHelpersTy(
4208             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4209             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4210     ++I;
4211     ++IElemInitRef;
4212   }
4213   I = Data.LastprivateCopies.begin();
4214   for (const Expr *E : Data.LastprivateVars) {
4215     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4216     Privates.emplace_back(
4217         C.getDeclAlign(VD),
4218         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4219                          /*PrivateElemInit=*/nullptr));
4220     ++I;
4221   }
4222   for (const VarDecl *VD : Data.PrivateLocals) {
4223     if (isAllocatableDecl(VD))
4224       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4225     else
4226       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4227   }
4228   llvm::stable_sort(Privates,
4229                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4230                       return L.first > R.first;
4231                     });
4232   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4233   // Build type kmp_routine_entry_t (if not built yet).
4234   emitKmpRoutineEntryT(KmpInt32Ty);
4235   // Build type kmp_task_t (if not built yet).
4236   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4237     if (SavedKmpTaskloopTQTy.isNull()) {
4238       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4239           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4240     }
4241     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4242   } else {
4243     assert((D.getDirectiveKind() == OMPD_task ||
4244             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4245             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4246            "Expected taskloop, task or target directive");
4247     if (SavedKmpTaskTQTy.isNull()) {
4248       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4249           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4250     }
4251     KmpTaskTQTy = SavedKmpTaskTQTy;
4252   }
4253   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4254   // Build particular struct kmp_task_t for the given task.
4255   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4256       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4257   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4258   QualType KmpTaskTWithPrivatesPtrQTy =
4259       C.getPointerType(KmpTaskTWithPrivatesQTy);
4260   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4261   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4262       KmpTaskTWithPrivatesTy->getPointerTo();
4263   llvm::Value *KmpTaskTWithPrivatesTySize =
4264       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4265   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4266 
4267   // Emit initial values for private copies (if any).
4268   llvm::Value *TaskPrivatesMap = nullptr;
4269   llvm::Type *TaskPrivatesMapTy =
4270       std::next(TaskFunction->arg_begin(), 3)->getType();
4271   if (!Privates.empty()) {
4272     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4273     TaskPrivatesMap =
4274         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4275     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4276         TaskPrivatesMap, TaskPrivatesMapTy);
4277   } else {
4278     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4279         cast<llvm::PointerType>(TaskPrivatesMapTy));
4280   }
4281   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4282   // kmp_task_t *tt);
4283   llvm::Function *TaskEntry = emitProxyTaskFunction(
4284       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4285       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4286       TaskPrivatesMap);
4287 
4288   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4289   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4290   // kmp_routine_entry_t *task_entry);
4291   // Task flags. Format is taken from
4292   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4293   // description of kmp_tasking_flags struct.
4294   enum {
4295     TiedFlag = 0x1,
4296     FinalFlag = 0x2,
4297     DestructorsFlag = 0x8,
4298     PriorityFlag = 0x20,
4299     DetachableFlag = 0x40,
4300   };
4301   unsigned Flags = Data.Tied ? TiedFlag : 0;
4302   bool NeedsCleanup = false;
4303   if (!Privates.empty()) {
4304     NeedsCleanup =
4305         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4306     if (NeedsCleanup)
4307       Flags = Flags | DestructorsFlag;
4308   }
4309   if (Data.Priority.getInt())
4310     Flags = Flags | PriorityFlag;
4311   if (D.hasClausesOfKind<OMPDetachClause>())
4312     Flags = Flags | DetachableFlag;
4313   llvm::Value *TaskFlags =
4314       Data.Final.getPointer()
4315           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4316                                      CGF.Builder.getInt32(FinalFlag),
4317                                      CGF.Builder.getInt32(/*C=*/0))
4318           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4319   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4320   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4321   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4322       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4323       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4324           TaskEntry, KmpRoutineEntryPtrTy)};
4325   llvm::Value *NewTask;
4326   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4327     // Check if we have any device clause associated with the directive.
4328     const Expr *Device = nullptr;
4329     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4330       Device = C->getDevice();
4331     // Emit device ID if any otherwise use default value.
4332     llvm::Value *DeviceID;
4333     if (Device)
4334       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4335                                            CGF.Int64Ty, /*isSigned=*/true);
4336     else
4337       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4338     AllocArgs.push_back(DeviceID);
4339     NewTask = CGF.EmitRuntimeCall(
4340         OMPBuilder.getOrCreateRuntimeFunction(
4341             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4342         AllocArgs);
4343   } else {
4344     NewTask =
4345         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4346                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4347                             AllocArgs);
4348   }
4349   // Emit detach clause initialization.
4350   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4351   // task_descriptor);
4352   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4353     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4354     LValue EvtLVal = CGF.EmitLValue(Evt);
4355 
4356     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4357     // int gtid, kmp_task_t *task);
4358     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4359     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4360     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4361     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4362         OMPBuilder.getOrCreateRuntimeFunction(
4363             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4364         {Loc, Tid, NewTask});
4365     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4366                                       Evt->getExprLoc());
4367     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4368   }
4369   // Process affinity clauses.
4370   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4371     // Process list of affinity data.
4372     ASTContext &C = CGM.getContext();
4373     Address AffinitiesArray = Address::invalid();
4374     // Calculate number of elements to form the array of affinity data.
4375     llvm::Value *NumOfElements = nullptr;
4376     unsigned NumAffinities = 0;
4377     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4378       if (const Expr *Modifier = C->getModifier()) {
4379         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4380         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4381           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4382           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4383           NumOfElements =
4384               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4385         }
4386       } else {
4387         NumAffinities += C->varlist_size();
4388       }
4389     }
4390     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4391     // Fields ids in kmp_task_affinity_info record.
4392     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4393 
4394     QualType KmpTaskAffinityInfoArrayTy;
4395     if (NumOfElements) {
4396       NumOfElements = CGF.Builder.CreateNUWAdd(
4397           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4398       OpaqueValueExpr OVE(
4399           Loc,
4400           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4401           VK_PRValue);
4402       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4403                                                     RValue::get(NumOfElements));
4404       KmpTaskAffinityInfoArrayTy =
4405           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4406                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4407       // Properly emit variable-sized array.
4408       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4409                                            ImplicitParamDecl::Other);
4410       CGF.EmitVarDecl(*PD);
4411       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4412       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4413                                                 /*isSigned=*/false);
4414     } else {
4415       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4416           KmpTaskAffinityInfoTy,
4417           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4418           ArrayType::Normal, /*IndexTypeQuals=*/0);
4419       AffinitiesArray =
4420           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4421       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4422       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4423                                              /*isSigned=*/false);
4424     }
4425 
4426     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4427     // Fill array by elements without iterators.
4428     unsigned Pos = 0;
4429     bool HasIterator = false;
4430     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4431       if (C->getModifier()) {
4432         HasIterator = true;
4433         continue;
4434       }
4435       for (const Expr *E : C->varlists()) {
4436         llvm::Value *Addr;
4437         llvm::Value *Size;
4438         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4439         LValue Base =
4440             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4441                                KmpTaskAffinityInfoTy);
4442         // affs[i].base_addr = &<Affinities[i].second>;
4443         LValue BaseAddrLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4445         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4446                               BaseAddrLVal);
4447         // affs[i].len = sizeof(<Affinities[i].second>);
4448         LValue LenLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4450         CGF.EmitStoreOfScalar(Size, LenLVal);
4451         ++Pos;
4452       }
4453     }
4454     LValue PosLVal;
4455     if (HasIterator) {
4456       PosLVal = CGF.MakeAddrLValue(
4457           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4458           C.getSizeType());
4459       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4460     }
4461     // Process elements with iterators.
4462     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4463       const Expr *Modifier = C->getModifier();
4464       if (!Modifier)
4465         continue;
4466       OMPIteratorGeneratorScope IteratorScope(
4467           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4468       for (const Expr *E : C->varlists()) {
4469         llvm::Value *Addr;
4470         llvm::Value *Size;
4471         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4472         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4473         LValue Base = CGF.MakeAddrLValue(
4474             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4475                     AffinitiesArray.getAlignment()),
4476             KmpTaskAffinityInfoTy);
4477         // affs[i].base_addr = &<Affinities[i].second>;
4478         LValue BaseAddrLVal = CGF.EmitLValueForField(
4479             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4480         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4481                               BaseAddrLVal);
4482         // affs[i].len = sizeof(<Affinities[i].second>);
4483         LValue LenLVal = CGF.EmitLValueForField(
4484             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4485         CGF.EmitStoreOfScalar(Size, LenLVal);
4486         Idx = CGF.Builder.CreateNUWAdd(
4487             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4488         CGF.EmitStoreOfScalar(Idx, PosLVal);
4489       }
4490     }
4491     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4492     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4493     // naffins, kmp_task_affinity_info_t *affin_list);
4494     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4495     llvm::Value *GTid = getThreadID(CGF, Loc);
4496     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4497         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4498     // FIXME: Emit the function and ignore its result for now unless the
4499     // runtime function is properly implemented.
4500     (void)CGF.EmitRuntimeCall(
4501         OMPBuilder.getOrCreateRuntimeFunction(
4502             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4503         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4504   }
4505   llvm::Value *NewTaskNewTaskTTy =
4506       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4507           NewTask, KmpTaskTWithPrivatesPtrTy);
4508   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4509                                                KmpTaskTWithPrivatesQTy);
4510   LValue TDBase =
4511       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4512   // Fill the data in the resulting kmp_task_t record.
4513   // Copy shareds if there are any.
4514   Address KmpTaskSharedsPtr = Address::invalid();
4515   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4516     KmpTaskSharedsPtr =
4517         Address(CGF.EmitLoadOfScalar(
4518                     CGF.EmitLValueForField(
4519                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4520                                            KmpTaskTShareds)),
4521                     Loc),
4522                 CGM.getNaturalTypeAlignment(SharedsTy));
4523     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4524     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4525     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4526   }
4527   // Emit initial values for private copies (if any).
4528   TaskResultTy Result;
4529   if (!Privates.empty()) {
4530     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4531                      SharedsTy, SharedsPtrTy, Data, Privates,
4532                      /*ForDup=*/false);
4533     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4534         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4535       Result.TaskDupFn = emitTaskDupFunction(
4536           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4537           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4538           /*WithLastIter=*/!Data.LastprivateVars.empty());
4539     }
4540   }
4541   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4542   enum { Priority = 0, Destructors = 1 };
4543   // Provide pointer to function with destructors for privates.
4544   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4545   const RecordDecl *KmpCmplrdataUD =
4546       (*FI)->getType()->getAsUnionType()->getDecl();
4547   if (NeedsCleanup) {
4548     llvm::Value *DestructorFn = emitDestructorsFunction(
4549         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4550         KmpTaskTWithPrivatesQTy);
4551     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4552     LValue DestructorsLV = CGF.EmitLValueForField(
4553         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4554     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4555                               DestructorFn, KmpRoutineEntryPtrTy),
4556                           DestructorsLV);
4557   }
4558   // Set priority.
4559   if (Data.Priority.getInt()) {
4560     LValue Data2LV = CGF.EmitLValueForField(
4561         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4562     LValue PriorityLV = CGF.EmitLValueForField(
4563         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4564     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4565   }
4566   Result.NewTask = NewTask;
4567   Result.TaskEntry = TaskEntry;
4568   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4569   Result.TDBase = TDBase;
4570   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4571   return Result;
4572 }
4573 
4574 namespace {
4575 /// Dependence kind for RTL.
4576 enum RTLDependenceKindTy {
4577   DepIn = 0x01,
4578   DepInOut = 0x3,
4579   DepMutexInOutSet = 0x4
4580 };
4581 /// Fields ids in kmp_depend_info record.
4582 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4583 } // namespace
4584 
4585 /// Translates internal dependency kind into the runtime kind.
4586 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4587   RTLDependenceKindTy DepKind;
4588   switch (K) {
4589   case OMPC_DEPEND_in:
4590     DepKind = DepIn;
4591     break;
4592   // Out and InOut dependencies must use the same code.
4593   case OMPC_DEPEND_out:
4594   case OMPC_DEPEND_inout:
4595     DepKind = DepInOut;
4596     break;
4597   case OMPC_DEPEND_mutexinoutset:
4598     DepKind = DepMutexInOutSet;
4599     break;
4600   case OMPC_DEPEND_source:
4601   case OMPC_DEPEND_sink:
4602   case OMPC_DEPEND_depobj:
4603   case OMPC_DEPEND_unknown:
4604     llvm_unreachable("Unknown task dependence type");
4605   }
4606   return DepKind;
4607 }
4608 
4609 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4610 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4611                            QualType &FlagsTy) {
4612   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4613   if (KmpDependInfoTy.isNull()) {
4614     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4615     KmpDependInfoRD->startDefinition();
4616     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4617     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4618     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4619     KmpDependInfoRD->completeDefinition();
4620     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4621   }
4622 }
4623 
4624 std::pair<llvm::Value *, LValue>
4625 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4626                                    SourceLocation Loc) {
4627   ASTContext &C = CGM.getContext();
4628   QualType FlagsTy;
4629   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4630   RecordDecl *KmpDependInfoRD =
4631       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4632   LValue Base = CGF.EmitLoadOfPointerLValue(
4633       DepobjLVal.getAddress(CGF),
4634       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4635   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4636   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4637           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4638   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4639                             Base.getTBAAInfo());
4640   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4641       Addr.getPointer(),
4642       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4643   LValue NumDepsBase = CGF.MakeAddrLValue(
4644       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4645       Base.getBaseInfo(), Base.getTBAAInfo());
4646   // NumDeps = deps[i].base_addr;
4647   LValue BaseAddrLVal = CGF.EmitLValueForField(
4648       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4649   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4650   return std::make_pair(NumDeps, Base);
4651 }
4652 
4653 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4654                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4655                            const OMPTaskDataTy::DependData &Data,
4656                            Address DependenciesArray) {
4657   CodeGenModule &CGM = CGF.CGM;
4658   ASTContext &C = CGM.getContext();
4659   QualType FlagsTy;
4660   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4661   RecordDecl *KmpDependInfoRD =
4662       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4663   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4664 
4665   OMPIteratorGeneratorScope IteratorScope(
4666       CGF, cast_or_null<OMPIteratorExpr>(
4667                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4668                                  : nullptr));
4669   for (const Expr *E : Data.DepExprs) {
4670     llvm::Value *Addr;
4671     llvm::Value *Size;
4672     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4673     LValue Base;
4674     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4675       Base = CGF.MakeAddrLValue(
4676           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4677     } else {
4678       LValue &PosLVal = *Pos.get<LValue *>();
4679       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4680       Base = CGF.MakeAddrLValue(
4681           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4682                   DependenciesArray.getAlignment()),
4683           KmpDependInfoTy);
4684     }
4685     // deps[i].base_addr = &<Dependencies[i].second>;
4686     LValue BaseAddrLVal = CGF.EmitLValueForField(
4687         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4688     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4689                           BaseAddrLVal);
4690     // deps[i].len = sizeof(<Dependencies[i].second>);
4691     LValue LenLVal = CGF.EmitLValueForField(
4692         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4693     CGF.EmitStoreOfScalar(Size, LenLVal);
4694     // deps[i].flags = <Dependencies[i].first>;
4695     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4696     LValue FlagsLVal = CGF.EmitLValueForField(
4697         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4698     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4699                           FlagsLVal);
4700     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4701       ++(*P);
4702     } else {
4703       LValue &PosLVal = *Pos.get<LValue *>();
4704       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4705       Idx = CGF.Builder.CreateNUWAdd(Idx,
4706                                      llvm::ConstantInt::get(Idx->getType(), 1));
4707       CGF.EmitStoreOfScalar(Idx, PosLVal);
4708     }
4709   }
4710 }
4711 
4712 static SmallVector<llvm::Value *, 4>
4713 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4714                         const OMPTaskDataTy::DependData &Data) {
4715   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4716          "Expected depobj dependecy kind.");
4717   SmallVector<llvm::Value *, 4> Sizes;
4718   SmallVector<LValue, 4> SizeLVals;
4719   ASTContext &C = CGF.getContext();
4720   QualType FlagsTy;
4721   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4722   RecordDecl *KmpDependInfoRD =
4723       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4724   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4725   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4726   {
4727     OMPIteratorGeneratorScope IteratorScope(
4728         CGF, cast_or_null<OMPIteratorExpr>(
4729                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4730                                    : nullptr));
4731     for (const Expr *E : Data.DepExprs) {
4732       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4733       LValue Base = CGF.EmitLoadOfPointerLValue(
4734           DepobjLVal.getAddress(CGF),
4735           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4736       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4737           Base.getAddress(CGF), KmpDependInfoPtrT);
4738       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4739                                 Base.getTBAAInfo());
4740       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4741           Addr.getPointer(),
4742           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4743       LValue NumDepsBase = CGF.MakeAddrLValue(
4744           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4745           Base.getBaseInfo(), Base.getTBAAInfo());
4746       // NumDeps = deps[i].base_addr;
4747       LValue BaseAddrLVal = CGF.EmitLValueForField(
4748           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4749       llvm::Value *NumDeps =
4750           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4751       LValue NumLVal = CGF.MakeAddrLValue(
4752           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4753           C.getUIntPtrType());
4754       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4755                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4756       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4757       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4758       CGF.EmitStoreOfScalar(Add, NumLVal);
4759       SizeLVals.push_back(NumLVal);
4760     }
4761   }
4762   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4763     llvm::Value *Size =
4764         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4765     Sizes.push_back(Size);
4766   }
4767   return Sizes;
4768 }
4769 
4770 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4771                                LValue PosLVal,
4772                                const OMPTaskDataTy::DependData &Data,
4773                                Address DependenciesArray) {
4774   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4775          "Expected depobj dependecy kind.");
4776   ASTContext &C = CGF.getContext();
4777   QualType FlagsTy;
4778   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4779   RecordDecl *KmpDependInfoRD =
4780       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4781   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4782   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4783   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4784   {
4785     OMPIteratorGeneratorScope IteratorScope(
4786         CGF, cast_or_null<OMPIteratorExpr>(
4787                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4788                                    : nullptr));
4789     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4790       const Expr *E = Data.DepExprs[I];
4791       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4792       LValue Base = CGF.EmitLoadOfPointerLValue(
4793           DepobjLVal.getAddress(CGF),
4794           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4795       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4796           Base.getAddress(CGF), KmpDependInfoPtrT);
4797       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4798                                 Base.getTBAAInfo());
4799 
4800       // Get number of elements in a single depobj.
4801       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4802           Addr.getPointer(),
4803           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4804       LValue NumDepsBase = CGF.MakeAddrLValue(
4805           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4806           Base.getBaseInfo(), Base.getTBAAInfo());
4807       // NumDeps = deps[i].base_addr;
4808       LValue BaseAddrLVal = CGF.EmitLValueForField(
4809           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4810       llvm::Value *NumDeps =
4811           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4812 
4813       // memcopy dependency data.
4814       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4815           ElSize,
4816           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4817       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4818       Address DepAddr =
4819           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4820                   DependenciesArray.getAlignment());
4821       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4822 
4823       // Increase pos.
4824       // pos += size;
4825       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4826       CGF.EmitStoreOfScalar(Add, PosLVal);
4827     }
4828   }
4829 }
4830 
4831 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4832     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4833     SourceLocation Loc) {
4834   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4835         return D.DepExprs.empty();
4836       }))
4837     return std::make_pair(nullptr, Address::invalid());
4838   // Process list of dependencies.
4839   ASTContext &C = CGM.getContext();
4840   Address DependenciesArray = Address::invalid();
4841   llvm::Value *NumOfElements = nullptr;
4842   unsigned NumDependencies = std::accumulate(
4843       Dependencies.begin(), Dependencies.end(), 0,
4844       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4845         return D.DepKind == OMPC_DEPEND_depobj
4846                    ? V
4847                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4848       });
4849   QualType FlagsTy;
4850   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4851   bool HasDepobjDeps = false;
4852   bool HasRegularWithIterators = false;
4853   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4854   llvm::Value *NumOfRegularWithIterators =
4855       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4856   // Calculate number of depobj dependecies and regular deps with the iterators.
4857   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4858     if (D.DepKind == OMPC_DEPEND_depobj) {
4859       SmallVector<llvm::Value *, 4> Sizes =
4860           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4861       for (llvm::Value *Size : Sizes) {
4862         NumOfDepobjElements =
4863             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4864       }
4865       HasDepobjDeps = true;
4866       continue;
4867     }
4868     // Include number of iterations, if any.
4869     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4870       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4871         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4872         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4873         NumOfRegularWithIterators =
4874             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4875       }
4876       HasRegularWithIterators = true;
4877       continue;
4878     }
4879   }
4880 
4881   QualType KmpDependInfoArrayTy;
4882   if (HasDepobjDeps || HasRegularWithIterators) {
4883     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4884                                            /*isSigned=*/false);
4885     if (HasDepobjDeps) {
4886       NumOfElements =
4887           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4888     }
4889     if (HasRegularWithIterators) {
4890       NumOfElements =
4891           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4892     }
4893     OpaqueValueExpr OVE(Loc,
4894                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4895                         VK_PRValue);
4896     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4897                                                   RValue::get(NumOfElements));
4898     KmpDependInfoArrayTy =
4899         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4900                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4901     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4902     // Properly emit variable-sized array.
4903     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4904                                          ImplicitParamDecl::Other);
4905     CGF.EmitVarDecl(*PD);
4906     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4907     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4908                                               /*isSigned=*/false);
4909   } else {
4910     KmpDependInfoArrayTy = C.getConstantArrayType(
4911         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4912         ArrayType::Normal, /*IndexTypeQuals=*/0);
4913     DependenciesArray =
4914         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4915     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4916     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4917                                            /*isSigned=*/false);
4918   }
4919   unsigned Pos = 0;
4920   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4921     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4922         Dependencies[I].IteratorExpr)
4923       continue;
4924     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4925                    DependenciesArray);
4926   }
4927   // Copy regular dependecies with iterators.
4928   LValue PosLVal = CGF.MakeAddrLValue(
4929       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4930   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4931   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4932     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4933         !Dependencies[I].IteratorExpr)
4934       continue;
4935     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4936                    DependenciesArray);
4937   }
4938   // Copy final depobj arrays without iterators.
4939   if (HasDepobjDeps) {
4940     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4942         continue;
4943       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4944                          DependenciesArray);
4945     }
4946   }
4947   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4948       DependenciesArray, CGF.VoidPtrTy);
4949   return std::make_pair(NumOfElements, DependenciesArray);
4950 }
4951 
4952 Address CGOpenMPRuntime::emitDepobjDependClause(
4953     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4954     SourceLocation Loc) {
4955   if (Dependencies.DepExprs.empty())
4956     return Address::invalid();
4957   // Process list of dependencies.
4958   ASTContext &C = CGM.getContext();
4959   Address DependenciesArray = Address::invalid();
4960   unsigned NumDependencies = Dependencies.DepExprs.size();
4961   QualType FlagsTy;
4962   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4963   RecordDecl *KmpDependInfoRD =
4964       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4965 
4966   llvm::Value *Size;
4967   // Define type kmp_depend_info[<Dependencies.size()>];
4968   // For depobj reserve one extra element to store the number of elements.
4969   // It is required to handle depobj(x) update(in) construct.
4970   // kmp_depend_info[<Dependencies.size()>] deps;
4971   llvm::Value *NumDepsVal;
4972   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4973   if (const auto *IE =
4974           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4975     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4976     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4977       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4978       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4979       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4980     }
4981     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4982                                     NumDepsVal);
4983     CharUnits SizeInBytes =
4984         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4985     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4986     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4987     NumDepsVal =
4988         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4989   } else {
4990     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4991         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4992         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4993     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4994     Size = CGM.getSize(Sz.alignTo(Align));
4995     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4996   }
4997   // Need to allocate on the dynamic memory.
4998   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4999   // Use default allocator.
5000   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5001   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5002 
5003   llvm::Value *Addr =
5004       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5005                               CGM.getModule(), OMPRTL___kmpc_alloc),
5006                           Args, ".dep.arr.addr");
5007   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5008       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5009   DependenciesArray = Address(Addr, Align);
5010   // Write number of elements in the first element of array for depobj.
5011   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5012   // deps[i].base_addr = NumDependencies;
5013   LValue BaseAddrLVal = CGF.EmitLValueForField(
5014       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5015   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5016   llvm::PointerUnion<unsigned *, LValue *> Pos;
5017   unsigned Idx = 1;
5018   LValue PosLVal;
5019   if (Dependencies.IteratorExpr) {
5020     PosLVal = CGF.MakeAddrLValue(
5021         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5022         C.getSizeType());
5023     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5024                           /*IsInit=*/true);
5025     Pos = &PosLVal;
5026   } else {
5027     Pos = &Idx;
5028   }
5029   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5030   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5031       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5032   return DependenciesArray;
5033 }
5034 
5035 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5036                                         SourceLocation Loc) {
5037   ASTContext &C = CGM.getContext();
5038   QualType FlagsTy;
5039   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5040   LValue Base = CGF.EmitLoadOfPointerLValue(
5041       DepobjLVal.getAddress(CGF),
5042       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5043   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5044   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5046   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5047       Addr.getPointer(),
5048       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5049   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5050                                                                CGF.VoidPtrTy);
5051   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5052   // Use default allocator.
5053   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5054   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5055 
5056   // _kmpc_free(gtid, addr, nullptr);
5057   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5058                                 CGM.getModule(), OMPRTL___kmpc_free),
5059                             Args);
5060 }
5061 
5062 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5063                                        OpenMPDependClauseKind NewDepKind,
5064                                        SourceLocation Loc) {
5065   ASTContext &C = CGM.getContext();
5066   QualType FlagsTy;
5067   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5068   RecordDecl *KmpDependInfoRD =
5069       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5070   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5071   llvm::Value *NumDeps;
5072   LValue Base;
5073   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5074 
5075   Address Begin = Base.getAddress(CGF);
5076   // Cast from pointer to array type to pointer to single element.
5077   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5078   // The basic structure here is a while-do loop.
5079   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5080   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5081   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5082   CGF.EmitBlock(BodyBB);
5083   llvm::PHINode *ElementPHI =
5084       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5085   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5086   Begin = Address(ElementPHI, Begin.getAlignment());
5087   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5088                             Base.getTBAAInfo());
5089   // deps[i].flags = NewDepKind;
5090   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5091   LValue FlagsLVal = CGF.EmitLValueForField(
5092       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5093   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5094                         FlagsLVal);
5095 
5096   // Shift the address forward by one element.
5097   Address ElementNext =
5098       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5099   ElementPHI->addIncoming(ElementNext.getPointer(),
5100                           CGF.Builder.GetInsertBlock());
5101   llvm::Value *IsEmpty =
5102       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5103   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5104   // Done.
5105   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5106 }
5107 
5108 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5109                                    const OMPExecutableDirective &D,
5110                                    llvm::Function *TaskFunction,
5111                                    QualType SharedsTy, Address Shareds,
5112                                    const Expr *IfCond,
5113                                    const OMPTaskDataTy &Data) {
5114   if (!CGF.HaveInsertPoint())
5115     return;
5116 
5117   TaskResultTy Result =
5118       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5119   llvm::Value *NewTask = Result.NewTask;
5120   llvm::Function *TaskEntry = Result.TaskEntry;
5121   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5122   LValue TDBase = Result.TDBase;
5123   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5124   // Process list of dependences.
5125   Address DependenciesArray = Address::invalid();
5126   llvm::Value *NumOfElements;
5127   std::tie(NumOfElements, DependenciesArray) =
5128       emitDependClause(CGF, Data.Dependences, Loc);
5129 
5130   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5131   // libcall.
5132   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5133   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5134   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5135   // list is not empty
5136   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5137   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5138   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5139   llvm::Value *DepTaskArgs[7];
5140   if (!Data.Dependences.empty()) {
5141     DepTaskArgs[0] = UpLoc;
5142     DepTaskArgs[1] = ThreadID;
5143     DepTaskArgs[2] = NewTask;
5144     DepTaskArgs[3] = NumOfElements;
5145     DepTaskArgs[4] = DependenciesArray.getPointer();
5146     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5147     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5148   }
5149   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5150                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5151     if (!Data.Tied) {
5152       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5153       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5154       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5155     }
5156     if (!Data.Dependences.empty()) {
5157       CGF.EmitRuntimeCall(
5158           OMPBuilder.getOrCreateRuntimeFunction(
5159               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5160           DepTaskArgs);
5161     } else {
5162       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5163                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5164                           TaskArgs);
5165     }
5166     // Check if parent region is untied and build return for untied task;
5167     if (auto *Region =
5168             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5169       Region->emitUntiedSwitch(CGF);
5170   };
5171 
5172   llvm::Value *DepWaitTaskArgs[6];
5173   if (!Data.Dependences.empty()) {
5174     DepWaitTaskArgs[0] = UpLoc;
5175     DepWaitTaskArgs[1] = ThreadID;
5176     DepWaitTaskArgs[2] = NumOfElements;
5177     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5178     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5179     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5180   }
5181   auto &M = CGM.getModule();
5182   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5183                         TaskEntry, &Data, &DepWaitTaskArgs,
5184                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5185     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5186     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5187     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5188     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5189     // is specified.
5190     if (!Data.Dependences.empty())
5191       CGF.EmitRuntimeCall(
5192           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5193           DepWaitTaskArgs);
5194     // Call proxy_task_entry(gtid, new_task);
5195     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5196                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5197       Action.Enter(CGF);
5198       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5199       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5200                                                           OutlinedFnArgs);
5201     };
5202 
5203     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5204     // kmp_task_t *new_task);
5205     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5206     // kmp_task_t *new_task);
5207     RegionCodeGenTy RCG(CodeGen);
5208     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5209                               M, OMPRTL___kmpc_omp_task_begin_if0),
5210                           TaskArgs,
5211                           OMPBuilder.getOrCreateRuntimeFunction(
5212                               M, OMPRTL___kmpc_omp_task_complete_if0),
5213                           TaskArgs);
5214     RCG.setAction(Action);
5215     RCG(CGF);
5216   };
5217 
5218   if (IfCond) {
5219     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5220   } else {
5221     RegionCodeGenTy ThenRCG(ThenCodeGen);
5222     ThenRCG(CGF);
5223   }
5224 }
5225 
5226 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5227                                        const OMPLoopDirective &D,
5228                                        llvm::Function *TaskFunction,
5229                                        QualType SharedsTy, Address Shareds,
5230                                        const Expr *IfCond,
5231                                        const OMPTaskDataTy &Data) {
5232   if (!CGF.HaveInsertPoint())
5233     return;
5234   TaskResultTy Result =
5235       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5236   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5237   // libcall.
5238   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5239   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5240   // sched, kmp_uint64 grainsize, void *task_dup);
5241   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5242   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5243   llvm::Value *IfVal;
5244   if (IfCond) {
5245     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5246                                       /*isSigned=*/true);
5247   } else {
5248     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5249   }
5250 
5251   LValue LBLVal = CGF.EmitLValueForField(
5252       Result.TDBase,
5253       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5254   const auto *LBVar =
5255       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5256   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5257                        LBLVal.getQuals(),
5258                        /*IsInitializer=*/true);
5259   LValue UBLVal = CGF.EmitLValueForField(
5260       Result.TDBase,
5261       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5262   const auto *UBVar =
5263       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5264   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5265                        UBLVal.getQuals(),
5266                        /*IsInitializer=*/true);
5267   LValue StLVal = CGF.EmitLValueForField(
5268       Result.TDBase,
5269       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5270   const auto *StVar =
5271       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5272   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5273                        StLVal.getQuals(),
5274                        /*IsInitializer=*/true);
5275   // Store reductions address.
5276   LValue RedLVal = CGF.EmitLValueForField(
5277       Result.TDBase,
5278       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5279   if (Data.Reductions) {
5280     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5281   } else {
5282     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5283                                CGF.getContext().VoidPtrTy);
5284   }
5285   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5286   llvm::Value *TaskArgs[] = {
5287       UpLoc,
5288       ThreadID,
5289       Result.NewTask,
5290       IfVal,
5291       LBLVal.getPointer(CGF),
5292       UBLVal.getPointer(CGF),
5293       CGF.EmitLoadOfScalar(StLVal, Loc),
5294       llvm::ConstantInt::getSigned(
5295           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5296       llvm::ConstantInt::getSigned(
5297           CGF.IntTy, Data.Schedule.getPointer()
5298                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5299                          : NoSchedule),
5300       Data.Schedule.getPointer()
5301           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5302                                       /*isSigned=*/false)
5303           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5304       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5305                              Result.TaskDupFn, CGF.VoidPtrTy)
5306                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5307   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5308                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5309                       TaskArgs);
5310 }
5311 
5312 /// Emit reduction operation for each element of array (required for
5313 /// array sections) LHS op = RHS.
5314 /// \param Type Type of array.
5315 /// \param LHSVar Variable on the left side of the reduction operation
5316 /// (references element of array in original variable).
5317 /// \param RHSVar Variable on the right side of the reduction operation
5318 /// (references element of array in original variable).
5319 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5320 /// RHSVar.
5321 static void EmitOMPAggregateReduction(
5322     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5323     const VarDecl *RHSVar,
5324     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5325                                   const Expr *, const Expr *)> &RedOpGen,
5326     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5327     const Expr *UpExpr = nullptr) {
5328   // Perform element-by-element initialization.
5329   QualType ElementTy;
5330   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5331   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5332 
5333   // Drill down to the base element type on both arrays.
5334   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5335   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5336 
5337   llvm::Value *RHSBegin = RHSAddr.getPointer();
5338   llvm::Value *LHSBegin = LHSAddr.getPointer();
5339   // Cast from pointer to array type to pointer to single element.
5340   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5341   // The basic structure here is a while-do loop.
5342   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5343   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5344   llvm::Value *IsEmpty =
5345       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5346   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5347 
5348   // Enter the loop body, making that address the current address.
5349   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5350   CGF.EmitBlock(BodyBB);
5351 
5352   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5353 
5354   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5355       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5356   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5357   Address RHSElementCurrent =
5358       Address(RHSElementPHI,
5359               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5360 
5361   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5362       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5363   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5364   Address LHSElementCurrent =
5365       Address(LHSElementPHI,
5366               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5367 
5368   // Emit copy.
5369   CodeGenFunction::OMPPrivateScope Scope(CGF);
5370   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5371   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5372   Scope.Privatize();
5373   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5374   Scope.ForceCleanup();
5375 
5376   // Shift the address forward by one element.
5377   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5378       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5379   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5380       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5381   // Check whether we've reached the end.
5382   llvm::Value *Done =
5383       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5384   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5385   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5386   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5387 
5388   // Done.
5389   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5390 }
5391 
5392 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5393 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5394 /// UDR combiner function.
5395 static void emitReductionCombiner(CodeGenFunction &CGF,
5396                                   const Expr *ReductionOp) {
5397   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5398     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5399       if (const auto *DRE =
5400               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5401         if (const auto *DRD =
5402                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5403           std::pair<llvm::Function *, llvm::Function *> Reduction =
5404               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5405           RValue Func = RValue::get(Reduction.first);
5406           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5407           CGF.EmitIgnoredExpr(ReductionOp);
5408           return;
5409         }
5410   CGF.EmitIgnoredExpr(ReductionOp);
5411 }
5412 
5413 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5414     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5415     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5416     ArrayRef<const Expr *> ReductionOps) {
5417   ASTContext &C = CGM.getContext();
5418 
5419   // void reduction_func(void *LHSArg, void *RHSArg);
5420   FunctionArgList Args;
5421   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5422                            ImplicitParamDecl::Other);
5423   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5424                            ImplicitParamDecl::Other);
5425   Args.push_back(&LHSArg);
5426   Args.push_back(&RHSArg);
5427   const auto &CGFI =
5428       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5429   std::string Name = getName({"omp", "reduction", "reduction_func"});
5430   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5431                                     llvm::GlobalValue::InternalLinkage, Name,
5432                                     &CGM.getModule());
5433   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5434   Fn->setDoesNotRecurse();
5435   CodeGenFunction CGF(CGM);
5436   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5437 
5438   // Dst = (void*[n])(LHSArg);
5439   // Src = (void*[n])(RHSArg);
5440   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5441       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5442       ArgsType), CGF.getPointerAlign());
5443   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5444       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5445       ArgsType), CGF.getPointerAlign());
5446 
5447   //  ...
5448   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5449   //  ...
5450   CodeGenFunction::OMPPrivateScope Scope(CGF);
5451   auto IPriv = Privates.begin();
5452   unsigned Idx = 0;
5453   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5454     const auto *RHSVar =
5455         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5456     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5457       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5458     });
5459     const auto *LHSVar =
5460         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5461     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5462       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5463     });
5464     QualType PrivTy = (*IPriv)->getType();
5465     if (PrivTy->isVariablyModifiedType()) {
5466       // Get array size and emit VLA type.
5467       ++Idx;
5468       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5469       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5470       const VariableArrayType *VLA =
5471           CGF.getContext().getAsVariableArrayType(PrivTy);
5472       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5473       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5474           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5475       CGF.EmitVariablyModifiedType(PrivTy);
5476     }
5477   }
5478   Scope.Privatize();
5479   IPriv = Privates.begin();
5480   auto ILHS = LHSExprs.begin();
5481   auto IRHS = RHSExprs.begin();
5482   for (const Expr *E : ReductionOps) {
5483     if ((*IPriv)->getType()->isArrayType()) {
5484       // Emit reduction for array section.
5485       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5486       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5487       EmitOMPAggregateReduction(
5488           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5489           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5490             emitReductionCombiner(CGF, E);
5491           });
5492     } else {
5493       // Emit reduction for array subscript or single variable.
5494       emitReductionCombiner(CGF, E);
5495     }
5496     ++IPriv;
5497     ++ILHS;
5498     ++IRHS;
5499   }
5500   Scope.ForceCleanup();
5501   CGF.FinishFunction();
5502   return Fn;
5503 }
5504 
5505 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5506                                                   const Expr *ReductionOp,
5507                                                   const Expr *PrivateRef,
5508                                                   const DeclRefExpr *LHS,
5509                                                   const DeclRefExpr *RHS) {
5510   if (PrivateRef->getType()->isArrayType()) {
5511     // Emit reduction for array section.
5512     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5513     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5514     EmitOMPAggregateReduction(
5515         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5516         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5517           emitReductionCombiner(CGF, ReductionOp);
5518         });
5519   } else {
5520     // Emit reduction for array subscript or single variable.
5521     emitReductionCombiner(CGF, ReductionOp);
5522   }
5523 }
5524 
5525 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5526                                     ArrayRef<const Expr *> Privates,
5527                                     ArrayRef<const Expr *> LHSExprs,
5528                                     ArrayRef<const Expr *> RHSExprs,
5529                                     ArrayRef<const Expr *> ReductionOps,
5530                                     ReductionOptionsTy Options) {
5531   if (!CGF.HaveInsertPoint())
5532     return;
5533 
5534   bool WithNowait = Options.WithNowait;
5535   bool SimpleReduction = Options.SimpleReduction;
5536 
5537   // Next code should be emitted for reduction:
5538   //
5539   // static kmp_critical_name lock = { 0 };
5540   //
5541   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5542   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5543   //  ...
5544   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5545   //  *(Type<n>-1*)rhs[<n>-1]);
5546   // }
5547   //
5548   // ...
5549   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5550   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5551   // RedList, reduce_func, &<lock>)) {
5552   // case 1:
5553   //  ...
5554   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5555   //  ...
5556   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5557   // break;
5558   // case 2:
5559   //  ...
5560   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5561   //  ...
5562   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5563   // break;
5564   // default:;
5565   // }
5566   //
5567   // if SimpleReduction is true, only the next code is generated:
5568   //  ...
5569   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5570   //  ...
5571 
5572   ASTContext &C = CGM.getContext();
5573 
5574   if (SimpleReduction) {
5575     CodeGenFunction::RunCleanupsScope Scope(CGF);
5576     auto IPriv = Privates.begin();
5577     auto ILHS = LHSExprs.begin();
5578     auto IRHS = RHSExprs.begin();
5579     for (const Expr *E : ReductionOps) {
5580       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5581                                   cast<DeclRefExpr>(*IRHS));
5582       ++IPriv;
5583       ++ILHS;
5584       ++IRHS;
5585     }
5586     return;
5587   }
5588 
5589   // 1. Build a list of reduction variables.
5590   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5591   auto Size = RHSExprs.size();
5592   for (const Expr *E : Privates) {
5593     if (E->getType()->isVariablyModifiedType())
5594       // Reserve place for array size.
5595       ++Size;
5596   }
5597   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5598   QualType ReductionArrayTy =
5599       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5600                              /*IndexTypeQuals=*/0);
5601   Address ReductionList =
5602       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5603   auto IPriv = Privates.begin();
5604   unsigned Idx = 0;
5605   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5606     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5607     CGF.Builder.CreateStore(
5608         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5609             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5610         Elem);
5611     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5612       // Store array size.
5613       ++Idx;
5614       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5615       llvm::Value *Size = CGF.Builder.CreateIntCast(
5616           CGF.getVLASize(
5617                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5618               .NumElts,
5619           CGF.SizeTy, /*isSigned=*/false);
5620       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5621                               Elem);
5622     }
5623   }
5624 
5625   // 2. Emit reduce_func().
5626   llvm::Function *ReductionFn = emitReductionFunction(
5627       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5628       LHSExprs, RHSExprs, ReductionOps);
5629 
5630   // 3. Create static kmp_critical_name lock = { 0 };
5631   std::string Name = getName({"reduction"});
5632   llvm::Value *Lock = getCriticalRegionLock(Name);
5633 
5634   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5635   // RedList, reduce_func, &<lock>);
5636   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5637   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5638   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5639   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5640       ReductionList.getPointer(), CGF.VoidPtrTy);
5641   llvm::Value *Args[] = {
5642       IdentTLoc,                             // ident_t *<loc>
5643       ThreadId,                              // i32 <gtid>
5644       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5645       ReductionArrayTySize,                  // size_type sizeof(RedList)
5646       RL,                                    // void *RedList
5647       ReductionFn, // void (*) (void *, void *) <reduce_func>
5648       Lock         // kmp_critical_name *&<lock>
5649   };
5650   llvm::Value *Res = CGF.EmitRuntimeCall(
5651       OMPBuilder.getOrCreateRuntimeFunction(
5652           CGM.getModule(),
5653           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5654       Args);
5655 
5656   // 5. Build switch(res)
5657   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5658   llvm::SwitchInst *SwInst =
5659       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5660 
5661   // 6. Build case 1:
5662   //  ...
5663   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5664   //  ...
5665   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5666   // break;
5667   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5668   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5669   CGF.EmitBlock(Case1BB);
5670 
5671   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5672   llvm::Value *EndArgs[] = {
5673       IdentTLoc, // ident_t *<loc>
5674       ThreadId,  // i32 <gtid>
5675       Lock       // kmp_critical_name *&<lock>
5676   };
5677   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5678                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5679     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5680     auto IPriv = Privates.begin();
5681     auto ILHS = LHSExprs.begin();
5682     auto IRHS = RHSExprs.begin();
5683     for (const Expr *E : ReductionOps) {
5684       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5685                                      cast<DeclRefExpr>(*IRHS));
5686       ++IPriv;
5687       ++ILHS;
5688       ++IRHS;
5689     }
5690   };
5691   RegionCodeGenTy RCG(CodeGen);
5692   CommonActionTy Action(
5693       nullptr, llvm::None,
5694       OMPBuilder.getOrCreateRuntimeFunction(
5695           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5696                                       : OMPRTL___kmpc_end_reduce),
5697       EndArgs);
5698   RCG.setAction(Action);
5699   RCG(CGF);
5700 
5701   CGF.EmitBranch(DefaultBB);
5702 
5703   // 7. Build case 2:
5704   //  ...
5705   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5706   //  ...
5707   // break;
5708   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5709   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5710   CGF.EmitBlock(Case2BB);
5711 
5712   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5713                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5714     auto ILHS = LHSExprs.begin();
5715     auto IRHS = RHSExprs.begin();
5716     auto IPriv = Privates.begin();
5717     for (const Expr *E : ReductionOps) {
5718       const Expr *XExpr = nullptr;
5719       const Expr *EExpr = nullptr;
5720       const Expr *UpExpr = nullptr;
5721       BinaryOperatorKind BO = BO_Comma;
5722       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5723         if (BO->getOpcode() == BO_Assign) {
5724           XExpr = BO->getLHS();
5725           UpExpr = BO->getRHS();
5726         }
5727       }
5728       // Try to emit update expression as a simple atomic.
5729       const Expr *RHSExpr = UpExpr;
5730       if (RHSExpr) {
5731         // Analyze RHS part of the whole expression.
5732         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5733                 RHSExpr->IgnoreParenImpCasts())) {
5734           // If this is a conditional operator, analyze its condition for
5735           // min/max reduction operator.
5736           RHSExpr = ACO->getCond();
5737         }
5738         if (const auto *BORHS =
5739                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5740           EExpr = BORHS->getRHS();
5741           BO = BORHS->getOpcode();
5742         }
5743       }
5744       if (XExpr) {
5745         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5746         auto &&AtomicRedGen = [BO, VD,
5747                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5748                                     const Expr *EExpr, const Expr *UpExpr) {
5749           LValue X = CGF.EmitLValue(XExpr);
5750           RValue E;
5751           if (EExpr)
5752             E = CGF.EmitAnyExpr(EExpr);
5753           CGF.EmitOMPAtomicSimpleUpdateExpr(
5754               X, E, BO, /*IsXLHSInRHSPart=*/true,
5755               llvm::AtomicOrdering::Monotonic, Loc,
5756               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5757                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5758                 PrivateScope.addPrivate(
5759                     VD, [&CGF, VD, XRValue, Loc]() {
5760                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5761                       CGF.emitOMPSimpleStore(
5762                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5763                           VD->getType().getNonReferenceType(), Loc);
5764                       return LHSTemp;
5765                     });
5766                 (void)PrivateScope.Privatize();
5767                 return CGF.EmitAnyExpr(UpExpr);
5768               });
5769         };
5770         if ((*IPriv)->getType()->isArrayType()) {
5771           // Emit atomic reduction for array section.
5772           const auto *RHSVar =
5773               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5774           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5775                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5776         } else {
5777           // Emit atomic reduction for array subscript or single variable.
5778           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5779         }
5780       } else {
5781         // Emit as a critical region.
5782         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5783                                            const Expr *, const Expr *) {
5784           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5785           std::string Name = RT.getName({"atomic_reduction"});
5786           RT.emitCriticalRegion(
5787               CGF, Name,
5788               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5789                 Action.Enter(CGF);
5790                 emitReductionCombiner(CGF, E);
5791               },
5792               Loc);
5793         };
5794         if ((*IPriv)->getType()->isArrayType()) {
5795           const auto *LHSVar =
5796               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5797           const auto *RHSVar =
5798               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5799           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5800                                     CritRedGen);
5801         } else {
5802           CritRedGen(CGF, nullptr, nullptr, nullptr);
5803         }
5804       }
5805       ++ILHS;
5806       ++IRHS;
5807       ++IPriv;
5808     }
5809   };
5810   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5811   if (!WithNowait) {
5812     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5813     llvm::Value *EndArgs[] = {
5814         IdentTLoc, // ident_t *<loc>
5815         ThreadId,  // i32 <gtid>
5816         Lock       // kmp_critical_name *&<lock>
5817     };
5818     CommonActionTy Action(nullptr, llvm::None,
5819                           OMPBuilder.getOrCreateRuntimeFunction(
5820                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5821                           EndArgs);
5822     AtomicRCG.setAction(Action);
5823     AtomicRCG(CGF);
5824   } else {
5825     AtomicRCG(CGF);
5826   }
5827 
5828   CGF.EmitBranch(DefaultBB);
5829   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5830 }
5831 
5832 /// Generates unique name for artificial threadprivate variables.
5833 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5834 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5835                                       const Expr *Ref) {
5836   SmallString<256> Buffer;
5837   llvm::raw_svector_ostream Out(Buffer);
5838   const clang::DeclRefExpr *DE;
5839   const VarDecl *D = ::getBaseDecl(Ref, DE);
5840   if (!D)
5841     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5842   D = D->getCanonicalDecl();
5843   std::string Name = CGM.getOpenMPRuntime().getName(
5844       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5845   Out << Prefix << Name << "_"
5846       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5847   return std::string(Out.str());
5848 }
5849 
5850 /// Emits reduction initializer function:
5851 /// \code
5852 /// void @.red_init(void* %arg, void* %orig) {
5853 /// %0 = bitcast void* %arg to <type>*
5854 /// store <type> <init>, <type>* %0
5855 /// ret void
5856 /// }
5857 /// \endcode
5858 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5859                                            SourceLocation Loc,
5860                                            ReductionCodeGen &RCG, unsigned N) {
5861   ASTContext &C = CGM.getContext();
5862   QualType VoidPtrTy = C.VoidPtrTy;
5863   VoidPtrTy.addRestrict();
5864   FunctionArgList Args;
5865   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5866                           ImplicitParamDecl::Other);
5867   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5868                               ImplicitParamDecl::Other);
5869   Args.emplace_back(&Param);
5870   Args.emplace_back(&ParamOrig);
5871   const auto &FnInfo =
5872       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5873   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5874   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5875   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5876                                     Name, &CGM.getModule());
5877   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5878   Fn->setDoesNotRecurse();
5879   CodeGenFunction CGF(CGM);
5880   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5881   Address PrivateAddr = CGF.EmitLoadOfPointer(
5882       CGF.GetAddrOfLocalVar(&Param),
5883       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5884   llvm::Value *Size = nullptr;
5885   // If the size of the reduction item is non-constant, load it from global
5886   // threadprivate variable.
5887   if (RCG.getSizes(N).second) {
5888     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5889         CGF, CGM.getContext().getSizeType(),
5890         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5891     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5892                                 CGM.getContext().getSizeType(), Loc);
5893   }
5894   RCG.emitAggregateType(CGF, N, Size);
5895   LValue OrigLVal;
5896   // If initializer uses initializer from declare reduction construct, emit a
5897   // pointer to the address of the original reduction item (reuired by reduction
5898   // initializer)
5899   if (RCG.usesReductionInitializer(N)) {
5900     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5901     SharedAddr = CGF.EmitLoadOfPointer(
5902         SharedAddr,
5903         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5904     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5905   } else {
5906     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5907         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5908         CGM.getContext().VoidPtrTy);
5909   }
5910   // Emit the initializer:
5911   // %0 = bitcast void* %arg to <type>*
5912   // store <type> <init>, <type>* %0
5913   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5914                          [](CodeGenFunction &) { return false; });
5915   CGF.FinishFunction();
5916   return Fn;
5917 }
5918 
5919 /// Emits reduction combiner function:
5920 /// \code
5921 /// void @.red_comb(void* %arg0, void* %arg1) {
5922 /// %lhs = bitcast void* %arg0 to <type>*
5923 /// %rhs = bitcast void* %arg1 to <type>*
5924 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5925 /// store <type> %2, <type>* %lhs
5926 /// ret void
5927 /// }
5928 /// \endcode
5929 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5930                                            SourceLocation Loc,
5931                                            ReductionCodeGen &RCG, unsigned N,
5932                                            const Expr *ReductionOp,
5933                                            const Expr *LHS, const Expr *RHS,
5934                                            const Expr *PrivateRef) {
5935   ASTContext &C = CGM.getContext();
5936   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5937   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5938   FunctionArgList Args;
5939   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5940                                C.VoidPtrTy, ImplicitParamDecl::Other);
5941   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5942                             ImplicitParamDecl::Other);
5943   Args.emplace_back(&ParamInOut);
5944   Args.emplace_back(&ParamIn);
5945   const auto &FnInfo =
5946       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5947   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5948   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5949   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5950                                     Name, &CGM.getModule());
5951   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5952   Fn->setDoesNotRecurse();
5953   CodeGenFunction CGF(CGM);
5954   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5955   llvm::Value *Size = nullptr;
5956   // If the size of the reduction item is non-constant, load it from global
5957   // threadprivate variable.
5958   if (RCG.getSizes(N).second) {
5959     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5960         CGF, CGM.getContext().getSizeType(),
5961         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5962     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5963                                 CGM.getContext().getSizeType(), Loc);
5964   }
5965   RCG.emitAggregateType(CGF, N, Size);
5966   // Remap lhs and rhs variables to the addresses of the function arguments.
5967   // %lhs = bitcast void* %arg0 to <type>*
5968   // %rhs = bitcast void* %arg1 to <type>*
5969   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5970   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5971     // Pull out the pointer to the variable.
5972     Address PtrAddr = CGF.EmitLoadOfPointer(
5973         CGF.GetAddrOfLocalVar(&ParamInOut),
5974         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5975     return CGF.Builder.CreateElementBitCast(
5976         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5977   });
5978   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5979     // Pull out the pointer to the variable.
5980     Address PtrAddr = CGF.EmitLoadOfPointer(
5981         CGF.GetAddrOfLocalVar(&ParamIn),
5982         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5983     return CGF.Builder.CreateElementBitCast(
5984         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5985   });
5986   PrivateScope.Privatize();
5987   // Emit the combiner body:
5988   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5989   // store <type> %2, <type>* %lhs
5990   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5991       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5992       cast<DeclRefExpr>(RHS));
5993   CGF.FinishFunction();
5994   return Fn;
5995 }
5996 
5997 /// Emits reduction finalizer function:
5998 /// \code
5999 /// void @.red_fini(void* %arg) {
6000 /// %0 = bitcast void* %arg to <type>*
6001 /// <destroy>(<type>* %0)
6002 /// ret void
6003 /// }
6004 /// \endcode
6005 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6006                                            SourceLocation Loc,
6007                                            ReductionCodeGen &RCG, unsigned N) {
6008   if (!RCG.needCleanups(N))
6009     return nullptr;
6010   ASTContext &C = CGM.getContext();
6011   FunctionArgList Args;
6012   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6013                           ImplicitParamDecl::Other);
6014   Args.emplace_back(&Param);
6015   const auto &FnInfo =
6016       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6017   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6018   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6019   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6020                                     Name, &CGM.getModule());
6021   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6022   Fn->setDoesNotRecurse();
6023   CodeGenFunction CGF(CGM);
6024   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6025   Address PrivateAddr = CGF.EmitLoadOfPointer(
6026       CGF.GetAddrOfLocalVar(&Param),
6027       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6028   llvm::Value *Size = nullptr;
6029   // If the size of the reduction item is non-constant, load it from global
6030   // threadprivate variable.
6031   if (RCG.getSizes(N).second) {
6032     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6033         CGF, CGM.getContext().getSizeType(),
6034         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6035     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6036                                 CGM.getContext().getSizeType(), Loc);
6037   }
6038   RCG.emitAggregateType(CGF, N, Size);
6039   // Emit the finalizer body:
6040   // <destroy>(<type>* %0)
6041   RCG.emitCleanups(CGF, N, PrivateAddr);
6042   CGF.FinishFunction(Loc);
6043   return Fn;
6044 }
6045 
6046 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6047     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6048     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6049   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6050     return nullptr;
6051 
6052   // Build typedef struct:
6053   // kmp_taskred_input {
6054   //   void *reduce_shar; // shared reduction item
6055   //   void *reduce_orig; // original reduction item used for initialization
6056   //   size_t reduce_size; // size of data item
6057   //   void *reduce_init; // data initialization routine
6058   //   void *reduce_fini; // data finalization routine
6059   //   void *reduce_comb; // data combiner routine
6060   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6061   // } kmp_taskred_input_t;
6062   ASTContext &C = CGM.getContext();
6063   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6064   RD->startDefinition();
6065   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6066   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6067   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6068   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6069   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6070   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6071   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6072       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6073   RD->completeDefinition();
6074   QualType RDType = C.getRecordType(RD);
6075   unsigned Size = Data.ReductionVars.size();
6076   llvm::APInt ArraySize(/*numBits=*/64, Size);
6077   QualType ArrayRDType = C.getConstantArrayType(
6078       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6079   // kmp_task_red_input_t .rd_input.[Size];
6080   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6081   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6082                        Data.ReductionCopies, Data.ReductionOps);
6083   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6084     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6085     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6086                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6087     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6088         TaskRedInput.getPointer(), Idxs,
6089         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6090         ".rd_input.gep.");
6091     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6092     // ElemLVal.reduce_shar = &Shareds[Cnt];
6093     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6094     RCG.emitSharedOrigLValue(CGF, Cnt);
6095     llvm::Value *CastedShared =
6096         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6097     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6098     // ElemLVal.reduce_orig = &Origs[Cnt];
6099     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6100     llvm::Value *CastedOrig =
6101         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6102     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6103     RCG.emitAggregateType(CGF, Cnt);
6104     llvm::Value *SizeValInChars;
6105     llvm::Value *SizeVal;
6106     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6107     // We use delayed creation/initialization for VLAs and array sections. It is
6108     // required because runtime does not provide the way to pass the sizes of
6109     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6110     // threadprivate global variables are used to store these values and use
6111     // them in the functions.
6112     bool DelayedCreation = !!SizeVal;
6113     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6114                                                /*isSigned=*/false);
6115     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6116     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6117     // ElemLVal.reduce_init = init;
6118     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6119     llvm::Value *InitAddr =
6120         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6121     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6122     // ElemLVal.reduce_fini = fini;
6123     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6124     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6125     llvm::Value *FiniAddr = Fini
6126                                 ? CGF.EmitCastToVoidPtr(Fini)
6127                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6128     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6129     // ElemLVal.reduce_comb = comb;
6130     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6131     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6132         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6133         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6134     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6135     // ElemLVal.flags = 0;
6136     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6137     if (DelayedCreation) {
6138       CGF.EmitStoreOfScalar(
6139           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6140           FlagsLVal);
6141     } else
6142       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6143                                  FlagsLVal.getType());
6144   }
6145   if (Data.IsReductionWithTaskMod) {
6146     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6147     // is_ws, int num, void *data);
6148     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6149     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6150                                                   CGM.IntTy, /*isSigned=*/true);
6151     llvm::Value *Args[] = {
6152         IdentTLoc, GTid,
6153         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6154                                /*isSigned=*/true),
6155         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6156         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6157             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6158     return CGF.EmitRuntimeCall(
6159         OMPBuilder.getOrCreateRuntimeFunction(
6160             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6161         Args);
6162   }
6163   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6164   llvm::Value *Args[] = {
6165       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6166                                 /*isSigned=*/true),
6167       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6168       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6169                                                       CGM.VoidPtrTy)};
6170   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6171                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6172                              Args);
6173 }
6174 
6175 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6176                                             SourceLocation Loc,
6177                                             bool IsWorksharingReduction) {
6178   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6179   // is_ws, int num, void *data);
6180   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6181   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6182                                                 CGM.IntTy, /*isSigned=*/true);
6183   llvm::Value *Args[] = {IdentTLoc, GTid,
6184                          llvm::ConstantInt::get(CGM.IntTy,
6185                                                 IsWorksharingReduction ? 1 : 0,
6186                                                 /*isSigned=*/true)};
6187   (void)CGF.EmitRuntimeCall(
6188       OMPBuilder.getOrCreateRuntimeFunction(
6189           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6190       Args);
6191 }
6192 
6193 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6194                                               SourceLocation Loc,
6195                                               ReductionCodeGen &RCG,
6196                                               unsigned N) {
6197   auto Sizes = RCG.getSizes(N);
6198   // Emit threadprivate global variable if the type is non-constant
6199   // (Sizes.second = nullptr).
6200   if (Sizes.second) {
6201     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6202                                                      /*isSigned=*/false);
6203     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6204         CGF, CGM.getContext().getSizeType(),
6205         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6206     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6207   }
6208 }
6209 
6210 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6211                                               SourceLocation Loc,
6212                                               llvm::Value *ReductionsPtr,
6213                                               LValue SharedLVal) {
6214   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6215   // *d);
6216   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6217                                                    CGM.IntTy,
6218                                                    /*isSigned=*/true),
6219                          ReductionsPtr,
6220                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6221                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6222   return Address(
6223       CGF.EmitRuntimeCall(
6224           OMPBuilder.getOrCreateRuntimeFunction(
6225               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6226           Args),
6227       SharedLVal.getAlignment());
6228 }
6229 
6230 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6231                                        SourceLocation Loc) {
6232   if (!CGF.HaveInsertPoint())
6233     return;
6234 
6235   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6236     OMPBuilder.createTaskwait(CGF.Builder);
6237   } else {
6238     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6239     // global_tid);
6240     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6241     // Ignore return result until untied tasks are supported.
6242     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6243                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6244                         Args);
6245   }
6246 
6247   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6248     Region->emitUntiedSwitch(CGF);
6249 }
6250 
6251 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6252                                            OpenMPDirectiveKind InnerKind,
6253                                            const RegionCodeGenTy &CodeGen,
6254                                            bool HasCancel) {
6255   if (!CGF.HaveInsertPoint())
6256     return;
6257   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6258                                  InnerKind != OMPD_critical &&
6259                                      InnerKind != OMPD_master &&
6260                                      InnerKind != OMPD_masked);
6261   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6262 }
6263 
6264 namespace {
6265 enum RTCancelKind {
6266   CancelNoreq = 0,
6267   CancelParallel = 1,
6268   CancelLoop = 2,
6269   CancelSections = 3,
6270   CancelTaskgroup = 4
6271 };
6272 } // anonymous namespace
6273 
6274 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6275   RTCancelKind CancelKind = CancelNoreq;
6276   if (CancelRegion == OMPD_parallel)
6277     CancelKind = CancelParallel;
6278   else if (CancelRegion == OMPD_for)
6279     CancelKind = CancelLoop;
6280   else if (CancelRegion == OMPD_sections)
6281     CancelKind = CancelSections;
6282   else {
6283     assert(CancelRegion == OMPD_taskgroup);
6284     CancelKind = CancelTaskgroup;
6285   }
6286   return CancelKind;
6287 }
6288 
6289 void CGOpenMPRuntime::emitCancellationPointCall(
6290     CodeGenFunction &CGF, SourceLocation Loc,
6291     OpenMPDirectiveKind CancelRegion) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6295   // global_tid, kmp_int32 cncl_kind);
6296   if (auto *OMPRegionInfo =
6297           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6298     // For 'cancellation point taskgroup', the task region info may not have a
6299     // cancel. This may instead happen in another adjacent task.
6300     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6301       llvm::Value *Args[] = {
6302           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6303           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6304       // Ignore return result until untied tasks are supported.
6305       llvm::Value *Result = CGF.EmitRuntimeCall(
6306           OMPBuilder.getOrCreateRuntimeFunction(
6307               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6308           Args);
6309       // if (__kmpc_cancellationpoint()) {
6310       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6311       //   exit from construct;
6312       // }
6313       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6314       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6315       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6316       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6317       CGF.EmitBlock(ExitBB);
6318       if (CancelRegion == OMPD_parallel)
6319         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6320       // exit from construct;
6321       CodeGenFunction::JumpDest CancelDest =
6322           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6323       CGF.EmitBranchThroughCleanup(CancelDest);
6324       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6325     }
6326   }
6327 }
6328 
6329 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6330                                      const Expr *IfCond,
6331                                      OpenMPDirectiveKind CancelRegion) {
6332   if (!CGF.HaveInsertPoint())
6333     return;
6334   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6335   // kmp_int32 cncl_kind);
6336   auto &M = CGM.getModule();
6337   if (auto *OMPRegionInfo =
6338           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6339     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6340                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6341       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6342       llvm::Value *Args[] = {
6343           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6344           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6345       // Ignore return result until untied tasks are supported.
6346       llvm::Value *Result = CGF.EmitRuntimeCall(
6347           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6348       // if (__kmpc_cancel()) {
6349       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6350       //   exit from construct;
6351       // }
6352       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6353       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6354       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6355       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6356       CGF.EmitBlock(ExitBB);
6357       if (CancelRegion == OMPD_parallel)
6358         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6359       // exit from construct;
6360       CodeGenFunction::JumpDest CancelDest =
6361           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6362       CGF.EmitBranchThroughCleanup(CancelDest);
6363       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6364     };
6365     if (IfCond) {
6366       emitIfClause(CGF, IfCond, ThenGen,
6367                    [](CodeGenFunction &, PrePostActionTy &) {});
6368     } else {
6369       RegionCodeGenTy ThenRCG(ThenGen);
6370       ThenRCG(CGF);
6371     }
6372   }
6373 }
6374 
6375 namespace {
6376 /// Cleanup action for uses_allocators support.
6377 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6378   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6379 
6380 public:
6381   OMPUsesAllocatorsActionTy(
6382       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6383       : Allocators(Allocators) {}
6384   void Enter(CodeGenFunction &CGF) override {
6385     if (!CGF.HaveInsertPoint())
6386       return;
6387     for (const auto &AllocatorData : Allocators) {
6388       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6389           CGF, AllocatorData.first, AllocatorData.second);
6390     }
6391   }
6392   void Exit(CodeGenFunction &CGF) override {
6393     if (!CGF.HaveInsertPoint())
6394       return;
6395     for (const auto &AllocatorData : Allocators) {
6396       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6397                                                         AllocatorData.first);
6398     }
6399   }
6400 };
6401 } // namespace
6402 
6403 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6404     const OMPExecutableDirective &D, StringRef ParentName,
6405     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6406     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6407   assert(!ParentName.empty() && "Invalid target region parent name!");
6408   HasEmittedTargetRegion = true;
6409   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6410   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6411     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6412       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6413       if (!D.AllocatorTraits)
6414         continue;
6415       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6416     }
6417   }
6418   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6419   CodeGen.setAction(UsesAllocatorAction);
6420   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6421                                    IsOffloadEntry, CodeGen);
6422 }
6423 
6424 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6425                                              const Expr *Allocator,
6426                                              const Expr *AllocatorTraits) {
6427   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6428   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6429   // Use default memspace handle.
6430   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6431   llvm::Value *NumTraits = llvm::ConstantInt::get(
6432       CGF.IntTy, cast<ConstantArrayType>(
6433                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6434                      ->getSize()
6435                      .getLimitedValue());
6436   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6437   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6438       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6439   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6440                                            AllocatorTraitsLVal.getBaseInfo(),
6441                                            AllocatorTraitsLVal.getTBAAInfo());
6442   llvm::Value *Traits =
6443       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6444 
6445   llvm::Value *AllocatorVal =
6446       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6447                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6448                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6449   // Store to allocator.
6450   CGF.EmitVarDecl(*cast<VarDecl>(
6451       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6452   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6453   AllocatorVal =
6454       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6455                                Allocator->getType(), Allocator->getExprLoc());
6456   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6457 }
6458 
6459 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6460                                              const Expr *Allocator) {
6461   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6462   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6463   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6464   llvm::Value *AllocatorVal =
6465       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6466   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6467                                           CGF.getContext().VoidPtrTy,
6468                                           Allocator->getExprLoc());
6469   (void)CGF.EmitRuntimeCall(
6470       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6471                                             OMPRTL___kmpc_destroy_allocator),
6472       {ThreadId, AllocatorVal});
6473 }
6474 
6475 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6476     const OMPExecutableDirective &D, StringRef ParentName,
6477     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6478     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6479   // Create a unique name for the entry function using the source location
6480   // information of the current target region. The name will be something like:
6481   //
6482   // __omp_offloading_DD_FFFF_PP_lBB
6483   //
6484   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6485   // mangled name of the function that encloses the target region and BB is the
6486   // line number of the target region.
6487 
6488   unsigned DeviceID;
6489   unsigned FileID;
6490   unsigned Line;
6491   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6492                            Line);
6493   SmallString<64> EntryFnName;
6494   {
6495     llvm::raw_svector_ostream OS(EntryFnName);
6496     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6497        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6498   }
6499 
6500   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6501 
6502   CodeGenFunction CGF(CGM, true);
6503   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6504   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6505 
6506   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6507 
6508   // If this target outline function is not an offload entry, we don't need to
6509   // register it.
6510   if (!IsOffloadEntry)
6511     return;
6512 
6513   // The target region ID is used by the runtime library to identify the current
6514   // target region, so it only has to be unique and not necessarily point to
6515   // anything. It could be the pointer to the outlined function that implements
6516   // the target region, but we aren't using that so that the compiler doesn't
6517   // need to keep that, and could therefore inline the host function if proven
6518   // worthwhile during optimization. In the other hand, if emitting code for the
6519   // device, the ID has to be the function address so that it can retrieved from
6520   // the offloading entry and launched by the runtime library. We also mark the
6521   // outlined function to have external linkage in case we are emitting code for
6522   // the device, because these functions will be entry points to the device.
6523 
6524   if (CGM.getLangOpts().OpenMPIsDevice) {
6525     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6526     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6527     OutlinedFn->setDSOLocal(false);
6528     if (CGM.getTriple().isAMDGCN())
6529       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6530   } else {
6531     std::string Name = getName({EntryFnName, "region_id"});
6532     OutlinedFnID = new llvm::GlobalVariable(
6533         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6534         llvm::GlobalValue::WeakAnyLinkage,
6535         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6536   }
6537 
6538   // Register the information for the entry associated with this target region.
6539   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6540       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6541       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6542 }
6543 
6544 /// Checks if the expression is constant or does not have non-trivial function
6545 /// calls.
6546 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6547   // We can skip constant expressions.
6548   // We can skip expressions with trivial calls or simple expressions.
6549   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6550           !E->hasNonTrivialCall(Ctx)) &&
6551          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6552 }
6553 
6554 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6555                                                     const Stmt *Body) {
6556   const Stmt *Child = Body->IgnoreContainers();
6557   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6558     Child = nullptr;
6559     for (const Stmt *S : C->body()) {
6560       if (const auto *E = dyn_cast<Expr>(S)) {
6561         if (isTrivial(Ctx, E))
6562           continue;
6563       }
6564       // Some of the statements can be ignored.
6565       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6566           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6567         continue;
6568       // Analyze declarations.
6569       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6570         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6571               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6572                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6573                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6574                   isa<UsingDirectiveDecl>(D) ||
6575                   isa<OMPDeclareReductionDecl>(D) ||
6576                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6577                 return true;
6578               const auto *VD = dyn_cast<VarDecl>(D);
6579               if (!VD)
6580                 return false;
6581               return VD->hasGlobalStorage() || !VD->isUsed();
6582             }))
6583           continue;
6584       }
6585       // Found multiple children - cannot get the one child only.
6586       if (Child)
6587         return nullptr;
6588       Child = S;
6589     }
6590     if (Child)
6591       Child = Child->IgnoreContainers();
6592   }
6593   return Child;
6594 }
6595 
6596 /// Emit the number of teams for a target directive.  Inspect the num_teams
6597 /// clause associated with a teams construct combined or closely nested
6598 /// with the target directive.
6599 ///
6600 /// Emit a team of size one for directives such as 'target parallel' that
6601 /// have no associated teams construct.
6602 ///
6603 /// Otherwise, return nullptr.
6604 static llvm::Value *
6605 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6606                                const OMPExecutableDirective &D) {
6607   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6608          "Clauses associated with the teams directive expected to be emitted "
6609          "only for the host!");
6610   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6611   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6612          "Expected target-based executable directive.");
6613   CGBuilderTy &Bld = CGF.Builder;
6614   switch (DirectiveKind) {
6615   case OMPD_target: {
6616     const auto *CS = D.getInnermostCapturedStmt();
6617     const auto *Body =
6618         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6619     const Stmt *ChildStmt =
6620         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6621     if (const auto *NestedDir =
6622             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6623       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6624         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6625           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6626           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6627           const Expr *NumTeams =
6628               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6629           llvm::Value *NumTeamsVal =
6630               CGF.EmitScalarExpr(NumTeams,
6631                                  /*IgnoreResultAssign*/ true);
6632           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6633                                    /*isSigned=*/true);
6634         }
6635         return Bld.getInt32(0);
6636       }
6637       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6638           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6639         return Bld.getInt32(1);
6640       return Bld.getInt32(0);
6641     }
6642     return nullptr;
6643   }
6644   case OMPD_target_teams:
6645   case OMPD_target_teams_distribute:
6646   case OMPD_target_teams_distribute_simd:
6647   case OMPD_target_teams_distribute_parallel_for:
6648   case OMPD_target_teams_distribute_parallel_for_simd: {
6649     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6650       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6651       const Expr *NumTeams =
6652           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6653       llvm::Value *NumTeamsVal =
6654           CGF.EmitScalarExpr(NumTeams,
6655                              /*IgnoreResultAssign*/ true);
6656       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6657                                /*isSigned=*/true);
6658     }
6659     return Bld.getInt32(0);
6660   }
6661   case OMPD_target_parallel:
6662   case OMPD_target_parallel_for:
6663   case OMPD_target_parallel_for_simd:
6664   case OMPD_target_simd:
6665     return Bld.getInt32(1);
6666   case OMPD_parallel:
6667   case OMPD_for:
6668   case OMPD_parallel_for:
6669   case OMPD_parallel_master:
6670   case OMPD_parallel_sections:
6671   case OMPD_for_simd:
6672   case OMPD_parallel_for_simd:
6673   case OMPD_cancel:
6674   case OMPD_cancellation_point:
6675   case OMPD_ordered:
6676   case OMPD_threadprivate:
6677   case OMPD_allocate:
6678   case OMPD_task:
6679   case OMPD_simd:
6680   case OMPD_tile:
6681   case OMPD_unroll:
6682   case OMPD_sections:
6683   case OMPD_section:
6684   case OMPD_single:
6685   case OMPD_master:
6686   case OMPD_critical:
6687   case OMPD_taskyield:
6688   case OMPD_barrier:
6689   case OMPD_taskwait:
6690   case OMPD_taskgroup:
6691   case OMPD_atomic:
6692   case OMPD_flush:
6693   case OMPD_depobj:
6694   case OMPD_scan:
6695   case OMPD_teams:
6696   case OMPD_target_data:
6697   case OMPD_target_exit_data:
6698   case OMPD_target_enter_data:
6699   case OMPD_distribute:
6700   case OMPD_distribute_simd:
6701   case OMPD_distribute_parallel_for:
6702   case OMPD_distribute_parallel_for_simd:
6703   case OMPD_teams_distribute:
6704   case OMPD_teams_distribute_simd:
6705   case OMPD_teams_distribute_parallel_for:
6706   case OMPD_teams_distribute_parallel_for_simd:
6707   case OMPD_target_update:
6708   case OMPD_declare_simd:
6709   case OMPD_declare_variant:
6710   case OMPD_begin_declare_variant:
6711   case OMPD_end_declare_variant:
6712   case OMPD_declare_target:
6713   case OMPD_end_declare_target:
6714   case OMPD_declare_reduction:
6715   case OMPD_declare_mapper:
6716   case OMPD_taskloop:
6717   case OMPD_taskloop_simd:
6718   case OMPD_master_taskloop:
6719   case OMPD_master_taskloop_simd:
6720   case OMPD_parallel_master_taskloop:
6721   case OMPD_parallel_master_taskloop_simd:
6722   case OMPD_requires:
6723   case OMPD_unknown:
6724     break;
6725   default:
6726     break;
6727   }
6728   llvm_unreachable("Unexpected directive kind.");
6729 }
6730 
6731 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6732                                   llvm::Value *DefaultThreadLimitVal) {
6733   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6734       CGF.getContext(), CS->getCapturedStmt());
6735   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6736     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6737       llvm::Value *NumThreads = nullptr;
6738       llvm::Value *CondVal = nullptr;
6739       // Handle if clause. If if clause present, the number of threads is
6740       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6741       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6742         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6743         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6744         const OMPIfClause *IfClause = nullptr;
6745         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6746           if (C->getNameModifier() == OMPD_unknown ||
6747               C->getNameModifier() == OMPD_parallel) {
6748             IfClause = C;
6749             break;
6750           }
6751         }
6752         if (IfClause) {
6753           const Expr *Cond = IfClause->getCondition();
6754           bool Result;
6755           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6756             if (!Result)
6757               return CGF.Builder.getInt32(1);
6758           } else {
6759             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6760             if (const auto *PreInit =
6761                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6762               for (const auto *I : PreInit->decls()) {
6763                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6764                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6765                 } else {
6766                   CodeGenFunction::AutoVarEmission Emission =
6767                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6768                   CGF.EmitAutoVarCleanups(Emission);
6769                 }
6770               }
6771             }
6772             CondVal = CGF.EvaluateExprAsBool(Cond);
6773           }
6774         }
6775       }
6776       // Check the value of num_threads clause iff if clause was not specified
6777       // or is not evaluated to false.
6778       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6779         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6780         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6781         const auto *NumThreadsClause =
6782             Dir->getSingleClause<OMPNumThreadsClause>();
6783         CodeGenFunction::LexicalScope Scope(
6784             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6785         if (const auto *PreInit =
6786                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6787           for (const auto *I : PreInit->decls()) {
6788             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6789               CGF.EmitVarDecl(cast<VarDecl>(*I));
6790             } else {
6791               CodeGenFunction::AutoVarEmission Emission =
6792                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6793               CGF.EmitAutoVarCleanups(Emission);
6794             }
6795           }
6796         }
6797         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6798         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6799                                                /*isSigned=*/false);
6800         if (DefaultThreadLimitVal)
6801           NumThreads = CGF.Builder.CreateSelect(
6802               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6803               DefaultThreadLimitVal, NumThreads);
6804       } else {
6805         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6806                                            : CGF.Builder.getInt32(0);
6807       }
6808       // Process condition of the if clause.
6809       if (CondVal) {
6810         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6811                                               CGF.Builder.getInt32(1));
6812       }
6813       return NumThreads;
6814     }
6815     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6816       return CGF.Builder.getInt32(1);
6817     return DefaultThreadLimitVal;
6818   }
6819   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6820                                : CGF.Builder.getInt32(0);
6821 }
6822 
6823 /// Emit the number of threads for a target directive.  Inspect the
6824 /// thread_limit clause associated with a teams construct combined or closely
6825 /// nested with the target directive.
6826 ///
6827 /// Emit the num_threads clause for directives such as 'target parallel' that
6828 /// have no associated teams construct.
6829 ///
6830 /// Otherwise, return nullptr.
6831 static llvm::Value *
6832 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6833                                  const OMPExecutableDirective &D) {
6834   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6835          "Clauses associated with the teams directive expected to be emitted "
6836          "only for the host!");
6837   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6838   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6839          "Expected target-based executable directive.");
6840   CGBuilderTy &Bld = CGF.Builder;
6841   llvm::Value *ThreadLimitVal = nullptr;
6842   llvm::Value *NumThreadsVal = nullptr;
6843   switch (DirectiveKind) {
6844   case OMPD_target: {
6845     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6846     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6847       return NumThreads;
6848     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6849         CGF.getContext(), CS->getCapturedStmt());
6850     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6851       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6852         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6853         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6854         const auto *ThreadLimitClause =
6855             Dir->getSingleClause<OMPThreadLimitClause>();
6856         CodeGenFunction::LexicalScope Scope(
6857             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6858         if (const auto *PreInit =
6859                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6860           for (const auto *I : PreInit->decls()) {
6861             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6862               CGF.EmitVarDecl(cast<VarDecl>(*I));
6863             } else {
6864               CodeGenFunction::AutoVarEmission Emission =
6865                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6866               CGF.EmitAutoVarCleanups(Emission);
6867             }
6868           }
6869         }
6870         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6871             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6872         ThreadLimitVal =
6873             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6874       }
6875       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6876           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6877         CS = Dir->getInnermostCapturedStmt();
6878         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6879             CGF.getContext(), CS->getCapturedStmt());
6880         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6881       }
6882       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6883           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6884         CS = Dir->getInnermostCapturedStmt();
6885         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6886           return NumThreads;
6887       }
6888       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6889         return Bld.getInt32(1);
6890     }
6891     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6892   }
6893   case OMPD_target_teams: {
6894     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6895       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6896       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6897       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6898           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6899       ThreadLimitVal =
6900           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6901     }
6902     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6903     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6904       return NumThreads;
6905     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6906         CGF.getContext(), CS->getCapturedStmt());
6907     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6908       if (Dir->getDirectiveKind() == OMPD_distribute) {
6909         CS = Dir->getInnermostCapturedStmt();
6910         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6911           return NumThreads;
6912       }
6913     }
6914     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6915   }
6916   case OMPD_target_teams_distribute:
6917     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6918       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6919       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6920       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6921           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6922       ThreadLimitVal =
6923           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6924     }
6925     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6926   case OMPD_target_parallel:
6927   case OMPD_target_parallel_for:
6928   case OMPD_target_parallel_for_simd:
6929   case OMPD_target_teams_distribute_parallel_for:
6930   case OMPD_target_teams_distribute_parallel_for_simd: {
6931     llvm::Value *CondVal = nullptr;
6932     // Handle if clause. If if clause present, the number of threads is
6933     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6934     if (D.hasClausesOfKind<OMPIfClause>()) {
6935       const OMPIfClause *IfClause = nullptr;
6936       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6937         if (C->getNameModifier() == OMPD_unknown ||
6938             C->getNameModifier() == OMPD_parallel) {
6939           IfClause = C;
6940           break;
6941         }
6942       }
6943       if (IfClause) {
6944         const Expr *Cond = IfClause->getCondition();
6945         bool Result;
6946         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6947           if (!Result)
6948             return Bld.getInt32(1);
6949         } else {
6950           CodeGenFunction::RunCleanupsScope Scope(CGF);
6951           CondVal = CGF.EvaluateExprAsBool(Cond);
6952         }
6953       }
6954     }
6955     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6956       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6957       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6958       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6959           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6960       ThreadLimitVal =
6961           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6962     }
6963     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6964       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6965       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6966       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6967           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6968       NumThreadsVal =
6969           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6970       ThreadLimitVal = ThreadLimitVal
6971                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6972                                                                 ThreadLimitVal),
6973                                               NumThreadsVal, ThreadLimitVal)
6974                            : NumThreadsVal;
6975     }
6976     if (!ThreadLimitVal)
6977       ThreadLimitVal = Bld.getInt32(0);
6978     if (CondVal)
6979       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6980     return ThreadLimitVal;
6981   }
6982   case OMPD_target_teams_distribute_simd:
6983   case OMPD_target_simd:
6984     return Bld.getInt32(1);
6985   case OMPD_parallel:
6986   case OMPD_for:
6987   case OMPD_parallel_for:
6988   case OMPD_parallel_master:
6989   case OMPD_parallel_sections:
6990   case OMPD_for_simd:
6991   case OMPD_parallel_for_simd:
6992   case OMPD_cancel:
6993   case OMPD_cancellation_point:
6994   case OMPD_ordered:
6995   case OMPD_threadprivate:
6996   case OMPD_allocate:
6997   case OMPD_task:
6998   case OMPD_simd:
6999   case OMPD_tile:
7000   case OMPD_unroll:
7001   case OMPD_sections:
7002   case OMPD_section:
7003   case OMPD_single:
7004   case OMPD_master:
7005   case OMPD_critical:
7006   case OMPD_taskyield:
7007   case OMPD_barrier:
7008   case OMPD_taskwait:
7009   case OMPD_taskgroup:
7010   case OMPD_atomic:
7011   case OMPD_flush:
7012   case OMPD_depobj:
7013   case OMPD_scan:
7014   case OMPD_teams:
7015   case OMPD_target_data:
7016   case OMPD_target_exit_data:
7017   case OMPD_target_enter_data:
7018   case OMPD_distribute:
7019   case OMPD_distribute_simd:
7020   case OMPD_distribute_parallel_for:
7021   case OMPD_distribute_parallel_for_simd:
7022   case OMPD_teams_distribute:
7023   case OMPD_teams_distribute_simd:
7024   case OMPD_teams_distribute_parallel_for:
7025   case OMPD_teams_distribute_parallel_for_simd:
7026   case OMPD_target_update:
7027   case OMPD_declare_simd:
7028   case OMPD_declare_variant:
7029   case OMPD_begin_declare_variant:
7030   case OMPD_end_declare_variant:
7031   case OMPD_declare_target:
7032   case OMPD_end_declare_target:
7033   case OMPD_declare_reduction:
7034   case OMPD_declare_mapper:
7035   case OMPD_taskloop:
7036   case OMPD_taskloop_simd:
7037   case OMPD_master_taskloop:
7038   case OMPD_master_taskloop_simd:
7039   case OMPD_parallel_master_taskloop:
7040   case OMPD_parallel_master_taskloop_simd:
7041   case OMPD_requires:
7042   case OMPD_unknown:
7043     break;
7044   default:
7045     break;
7046   }
7047   llvm_unreachable("Unsupported directive kind.");
7048 }
7049 
7050 namespace {
7051 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7052 
7053 // Utility to handle information from clauses associated with a given
7054 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7055 // It provides a convenient interface to obtain the information and generate
7056 // code for that information.
7057 class MappableExprsHandler {
7058 public:
7059   /// Values for bit flags used to specify the mapping type for
7060   /// offloading.
7061   enum OpenMPOffloadMappingFlags : uint64_t {
7062     /// No flags
7063     OMP_MAP_NONE = 0x0,
7064     /// Allocate memory on the device and move data from host to device.
7065     OMP_MAP_TO = 0x01,
7066     /// Allocate memory on the device and move data from device to host.
7067     OMP_MAP_FROM = 0x02,
7068     /// Always perform the requested mapping action on the element, even
7069     /// if it was already mapped before.
7070     OMP_MAP_ALWAYS = 0x04,
7071     /// Delete the element from the device environment, ignoring the
7072     /// current reference count associated with the element.
7073     OMP_MAP_DELETE = 0x08,
7074     /// The element being mapped is a pointer-pointee pair; both the
7075     /// pointer and the pointee should be mapped.
7076     OMP_MAP_PTR_AND_OBJ = 0x10,
7077     /// This flags signals that the base address of an entry should be
7078     /// passed to the target kernel as an argument.
7079     OMP_MAP_TARGET_PARAM = 0x20,
7080     /// Signal that the runtime library has to return the device pointer
7081     /// in the current position for the data being mapped. Used when we have the
7082     /// use_device_ptr or use_device_addr clause.
7083     OMP_MAP_RETURN_PARAM = 0x40,
7084     /// This flag signals that the reference being passed is a pointer to
7085     /// private data.
7086     OMP_MAP_PRIVATE = 0x80,
7087     /// Pass the element to the device by value.
7088     OMP_MAP_LITERAL = 0x100,
7089     /// Implicit map
7090     OMP_MAP_IMPLICIT = 0x200,
7091     /// Close is a hint to the runtime to allocate memory close to
7092     /// the target device.
7093     OMP_MAP_CLOSE = 0x400,
7094     /// 0x800 is reserved for compatibility with XLC.
7095     /// Produce a runtime error if the data is not already allocated.
7096     OMP_MAP_PRESENT = 0x1000,
7097     /// Signal that the runtime library should use args as an array of
7098     /// descriptor_dim pointers and use args_size as dims. Used when we have
7099     /// non-contiguous list items in target update directive
7100     OMP_MAP_NON_CONTIG = 0x100000000000,
7101     /// The 16 MSBs of the flags indicate whether the entry is member of some
7102     /// struct/class.
7103     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7104     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7105   };
7106 
7107   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7108   static unsigned getFlagMemberOffset() {
7109     unsigned Offset = 0;
7110     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7111          Remain = Remain >> 1)
7112       Offset++;
7113     return Offset;
7114   }
7115 
7116   /// Class that holds debugging information for a data mapping to be passed to
7117   /// the runtime library.
7118   class MappingExprInfo {
7119     /// The variable declaration used for the data mapping.
7120     const ValueDecl *MapDecl = nullptr;
7121     /// The original expression used in the map clause, or null if there is
7122     /// none.
7123     const Expr *MapExpr = nullptr;
7124 
7125   public:
7126     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7127         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7128 
7129     const ValueDecl *getMapDecl() const { return MapDecl; }
7130     const Expr *getMapExpr() const { return MapExpr; }
7131   };
7132 
7133   /// Class that associates information with a base pointer to be passed to the
7134   /// runtime library.
7135   class BasePointerInfo {
7136     /// The base pointer.
7137     llvm::Value *Ptr = nullptr;
7138     /// The base declaration that refers to this device pointer, or null if
7139     /// there is none.
7140     const ValueDecl *DevPtrDecl = nullptr;
7141 
7142   public:
7143     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7144         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7145     llvm::Value *operator*() const { return Ptr; }
7146     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7147     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7148   };
7149 
7150   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7151   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7152   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7153   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7154   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7155   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7156   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7157 
7158   /// This structure contains combined information generated for mappable
7159   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7160   /// mappers, and non-contiguous information.
7161   struct MapCombinedInfoTy {
7162     struct StructNonContiguousInfo {
7163       bool IsNonContiguous = false;
7164       MapDimArrayTy Dims;
7165       MapNonContiguousArrayTy Offsets;
7166       MapNonContiguousArrayTy Counts;
7167       MapNonContiguousArrayTy Strides;
7168     };
7169     MapExprsArrayTy Exprs;
7170     MapBaseValuesArrayTy BasePointers;
7171     MapValuesArrayTy Pointers;
7172     MapValuesArrayTy Sizes;
7173     MapFlagsArrayTy Types;
7174     MapMappersArrayTy Mappers;
7175     StructNonContiguousInfo NonContigInfo;
7176 
7177     /// Append arrays in \a CurInfo.
7178     void append(MapCombinedInfoTy &CurInfo) {
7179       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7180       BasePointers.append(CurInfo.BasePointers.begin(),
7181                           CurInfo.BasePointers.end());
7182       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7183       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7184       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7185       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7186       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7187                                  CurInfo.NonContigInfo.Dims.end());
7188       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7189                                     CurInfo.NonContigInfo.Offsets.end());
7190       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7191                                    CurInfo.NonContigInfo.Counts.end());
7192       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7193                                     CurInfo.NonContigInfo.Strides.end());
7194     }
7195   };
7196 
7197   /// Map between a struct and the its lowest & highest elements which have been
7198   /// mapped.
7199   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7200   ///                    HE(FieldIndex, Pointer)}
7201   struct StructRangeInfoTy {
7202     MapCombinedInfoTy PreliminaryMapData;
7203     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7204         0, Address::invalid()};
7205     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7206         0, Address::invalid()};
7207     Address Base = Address::invalid();
7208     Address LB = Address::invalid();
7209     bool IsArraySection = false;
7210     bool HasCompleteRecord = false;
7211   };
7212 
7213 private:
7214   /// Kind that defines how a device pointer has to be returned.
7215   struct MapInfo {
7216     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7217     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7218     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7219     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7220     bool ReturnDevicePointer = false;
7221     bool IsImplicit = false;
7222     const ValueDecl *Mapper = nullptr;
7223     const Expr *VarRef = nullptr;
7224     bool ForDeviceAddr = false;
7225 
7226     MapInfo() = default;
7227     MapInfo(
7228         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7229         OpenMPMapClauseKind MapType,
7230         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7231         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7232         bool ReturnDevicePointer, bool IsImplicit,
7233         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7234         bool ForDeviceAddr = false)
7235         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7236           MotionModifiers(MotionModifiers),
7237           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7238           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7239   };
7240 
7241   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7242   /// member and there is no map information about it, then emission of that
7243   /// entry is deferred until the whole struct has been processed.
7244   struct DeferredDevicePtrEntryTy {
7245     const Expr *IE = nullptr;
7246     const ValueDecl *VD = nullptr;
7247     bool ForDeviceAddr = false;
7248 
7249     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7250                              bool ForDeviceAddr)
7251         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7252   };
7253 
7254   /// The target directive from where the mappable clauses were extracted. It
7255   /// is either a executable directive or a user-defined mapper directive.
7256   llvm::PointerUnion<const OMPExecutableDirective *,
7257                      const OMPDeclareMapperDecl *>
7258       CurDir;
7259 
7260   /// Function the directive is being generated for.
7261   CodeGenFunction &CGF;
7262 
7263   /// Set of all first private variables in the current directive.
7264   /// bool data is set to true if the variable is implicitly marked as
7265   /// firstprivate, false otherwise.
7266   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7267 
7268   /// Map between device pointer declarations and their expression components.
7269   /// The key value for declarations in 'this' is null.
7270   llvm::DenseMap<
7271       const ValueDecl *,
7272       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7273       DevPointersMap;
7274 
7275   llvm::Value *getExprTypeSize(const Expr *E) const {
7276     QualType ExprTy = E->getType().getCanonicalType();
7277 
7278     // Calculate the size for array shaping expression.
7279     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7280       llvm::Value *Size =
7281           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7282       for (const Expr *SE : OAE->getDimensions()) {
7283         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7284         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7285                                       CGF.getContext().getSizeType(),
7286                                       SE->getExprLoc());
7287         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7288       }
7289       return Size;
7290     }
7291 
7292     // Reference types are ignored for mapping purposes.
7293     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7294       ExprTy = RefTy->getPointeeType().getCanonicalType();
7295 
7296     // Given that an array section is considered a built-in type, we need to
7297     // do the calculation based on the length of the section instead of relying
7298     // on CGF.getTypeSize(E->getType()).
7299     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7300       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7301                             OAE->getBase()->IgnoreParenImpCasts())
7302                             .getCanonicalType();
7303 
7304       // If there is no length associated with the expression and lower bound is
7305       // not specified too, that means we are using the whole length of the
7306       // base.
7307       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7308           !OAE->getLowerBound())
7309         return CGF.getTypeSize(BaseTy);
7310 
7311       llvm::Value *ElemSize;
7312       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7313         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7314       } else {
7315         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7316         assert(ATy && "Expecting array type if not a pointer type.");
7317         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7318       }
7319 
7320       // If we don't have a length at this point, that is because we have an
7321       // array section with a single element.
7322       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7323         return ElemSize;
7324 
7325       if (const Expr *LenExpr = OAE->getLength()) {
7326         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7327         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7328                                              CGF.getContext().getSizeType(),
7329                                              LenExpr->getExprLoc());
7330         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7331       }
7332       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7333              OAE->getLowerBound() && "expected array_section[lb:].");
7334       // Size = sizetype - lb * elemtype;
7335       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7336       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7337       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7338                                        CGF.getContext().getSizeType(),
7339                                        OAE->getLowerBound()->getExprLoc());
7340       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7341       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7342       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7343       LengthVal = CGF.Builder.CreateSelect(
7344           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7345       return LengthVal;
7346     }
7347     return CGF.getTypeSize(ExprTy);
7348   }
7349 
7350   /// Return the corresponding bits for a given map clause modifier. Add
7351   /// a flag marking the map as a pointer if requested. Add a flag marking the
7352   /// map as the first one of a series of maps that relate to the same map
7353   /// expression.
7354   OpenMPOffloadMappingFlags getMapTypeBits(
7355       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7356       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7357       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7358     OpenMPOffloadMappingFlags Bits =
7359         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7360     switch (MapType) {
7361     case OMPC_MAP_alloc:
7362     case OMPC_MAP_release:
7363       // alloc and release is the default behavior in the runtime library,  i.e.
7364       // if we don't pass any bits alloc/release that is what the runtime is
7365       // going to do. Therefore, we don't need to signal anything for these two
7366       // type modifiers.
7367       break;
7368     case OMPC_MAP_to:
7369       Bits |= OMP_MAP_TO;
7370       break;
7371     case OMPC_MAP_from:
7372       Bits |= OMP_MAP_FROM;
7373       break;
7374     case OMPC_MAP_tofrom:
7375       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7376       break;
7377     case OMPC_MAP_delete:
7378       Bits |= OMP_MAP_DELETE;
7379       break;
7380     case OMPC_MAP_unknown:
7381       llvm_unreachable("Unexpected map type!");
7382     }
7383     if (AddPtrFlag)
7384       Bits |= OMP_MAP_PTR_AND_OBJ;
7385     if (AddIsTargetParamFlag)
7386       Bits |= OMP_MAP_TARGET_PARAM;
7387     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7388         != MapModifiers.end())
7389       Bits |= OMP_MAP_ALWAYS;
7390     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7391         != MapModifiers.end())
7392       Bits |= OMP_MAP_CLOSE;
7393     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7394             MapModifiers.end() ||
7395         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7396             MotionModifiers.end())
7397       Bits |= OMP_MAP_PRESENT;
7398     if (IsNonContiguous)
7399       Bits |= OMP_MAP_NON_CONTIG;
7400     return Bits;
7401   }
7402 
7403   /// Return true if the provided expression is a final array section. A
7404   /// final array section, is one whose length can't be proved to be one.
7405   bool isFinalArraySectionExpression(const Expr *E) const {
7406     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7407 
7408     // It is not an array section and therefore not a unity-size one.
7409     if (!OASE)
7410       return false;
7411 
7412     // An array section with no colon always refer to a single element.
7413     if (OASE->getColonLocFirst().isInvalid())
7414       return false;
7415 
7416     const Expr *Length = OASE->getLength();
7417 
7418     // If we don't have a length we have to check if the array has size 1
7419     // for this dimension. Also, we should always expect a length if the
7420     // base type is pointer.
7421     if (!Length) {
7422       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7423                              OASE->getBase()->IgnoreParenImpCasts())
7424                              .getCanonicalType();
7425       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7426         return ATy->getSize().getSExtValue() != 1;
7427       // If we don't have a constant dimension length, we have to consider
7428       // the current section as having any size, so it is not necessarily
7429       // unitary. If it happen to be unity size, that's user fault.
7430       return true;
7431     }
7432 
7433     // Check if the length evaluates to 1.
7434     Expr::EvalResult Result;
7435     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7436       return true; // Can have more that size 1.
7437 
7438     llvm::APSInt ConstLength = Result.Val.getInt();
7439     return ConstLength.getSExtValue() != 1;
7440   }
7441 
7442   /// Generate the base pointers, section pointers, sizes, map type bits, and
7443   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7444   /// map type, map or motion modifiers, and expression components.
7445   /// \a IsFirstComponent should be set to true if the provided set of
7446   /// components is the first associated with a capture.
7447   void generateInfoForComponentList(
7448       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7449       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7450       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7451       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7452       bool IsFirstComponentList, bool IsImplicit,
7453       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7454       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7455       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7456           OverlappedElements = llvm::None) const {
7457     // The following summarizes what has to be generated for each map and the
7458     // types below. The generated information is expressed in this order:
7459     // base pointer, section pointer, size, flags
7460     // (to add to the ones that come from the map type and modifier).
7461     //
7462     // double d;
7463     // int i[100];
7464     // float *p;
7465     //
7466     // struct S1 {
7467     //   int i;
7468     //   float f[50];
7469     // }
7470     // struct S2 {
7471     //   int i;
7472     //   float f[50];
7473     //   S1 s;
7474     //   double *p;
7475     //   struct S2 *ps;
7476     //   int &ref;
7477     // }
7478     // S2 s;
7479     // S2 *ps;
7480     //
7481     // map(d)
7482     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7483     //
7484     // map(i)
7485     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7486     //
7487     // map(i[1:23])
7488     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7489     //
7490     // map(p)
7491     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7492     //
7493     // map(p[1:24])
7494     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7495     // in unified shared memory mode or for local pointers
7496     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7497     //
7498     // map(s)
7499     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7500     //
7501     // map(s.i)
7502     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7503     //
7504     // map(s.s.f)
7505     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7506     //
7507     // map(s.p)
7508     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7509     //
7510     // map(to: s.p[:22])
7511     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7512     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7513     // &(s.p), &(s.p[0]), 22*sizeof(double),
7514     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7515     // (*) alloc space for struct members, only this is a target parameter
7516     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7517     //      optimizes this entry out, same in the examples below)
7518     // (***) map the pointee (map: to)
7519     //
7520     // map(to: s.ref)
7521     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7522     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7523     // (*) alloc space for struct members, only this is a target parameter
7524     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7525     //      optimizes this entry out, same in the examples below)
7526     // (***) map the pointee (map: to)
7527     //
7528     // map(s.ps)
7529     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7530     //
7531     // map(from: s.ps->s.i)
7532     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7533     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7534     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7535     //
7536     // map(to: s.ps->ps)
7537     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7538     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7539     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7540     //
7541     // map(s.ps->ps->ps)
7542     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7543     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7544     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7545     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7546     //
7547     // map(to: s.ps->ps->s.f[:22])
7548     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7549     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7550     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7551     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7552     //
7553     // map(ps)
7554     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7555     //
7556     // map(ps->i)
7557     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7558     //
7559     // map(ps->s.f)
7560     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7561     //
7562     // map(from: ps->p)
7563     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7564     //
7565     // map(to: ps->p[:22])
7566     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7567     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7568     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7569     //
7570     // map(ps->ps)
7571     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7572     //
7573     // map(from: ps->ps->s.i)
7574     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7575     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7576     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7577     //
7578     // map(from: ps->ps->ps)
7579     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7580     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7581     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7582     //
7583     // map(ps->ps->ps->ps)
7584     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7585     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7586     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7587     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7588     //
7589     // map(to: ps->ps->ps->s.f[:22])
7590     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7591     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7592     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7593     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7594     //
7595     // map(to: s.f[:22]) map(from: s.p[:33])
7596     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7597     //     sizeof(double*) (**), TARGET_PARAM
7598     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7599     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7600     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7601     // (*) allocate contiguous space needed to fit all mapped members even if
7602     //     we allocate space for members not mapped (in this example,
7603     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7604     //     them as well because they fall between &s.f[0] and &s.p)
7605     //
7606     // map(from: s.f[:22]) map(to: ps->p[:33])
7607     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7608     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7609     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7610     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7611     // (*) the struct this entry pertains to is the 2nd element in the list of
7612     //     arguments, hence MEMBER_OF(2)
7613     //
7614     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7615     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7616     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7617     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7618     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7619     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7620     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7621     // (*) the struct this entry pertains to is the 4th element in the list
7622     //     of arguments, hence MEMBER_OF(4)
7623 
7624     // Track if the map information being generated is the first for a capture.
7625     bool IsCaptureFirstInfo = IsFirstComponentList;
7626     // When the variable is on a declare target link or in a to clause with
7627     // unified memory, a reference is needed to hold the host/device address
7628     // of the variable.
7629     bool RequiresReference = false;
7630 
7631     // Scan the components from the base to the complete expression.
7632     auto CI = Components.rbegin();
7633     auto CE = Components.rend();
7634     auto I = CI;
7635 
7636     // Track if the map information being generated is the first for a list of
7637     // components.
7638     bool IsExpressionFirstInfo = true;
7639     bool FirstPointerInComplexData = false;
7640     Address BP = Address::invalid();
7641     const Expr *AssocExpr = I->getAssociatedExpression();
7642     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7643     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7644     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7645 
7646     if (isa<MemberExpr>(AssocExpr)) {
7647       // The base is the 'this' pointer. The content of the pointer is going
7648       // to be the base of the field being mapped.
7649       BP = CGF.LoadCXXThisAddress();
7650     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7651                (OASE &&
7652                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7653       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7654     } else if (OAShE &&
7655                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7656       BP = Address(
7657           CGF.EmitScalarExpr(OAShE->getBase()),
7658           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7659     } else {
7660       // The base is the reference to the variable.
7661       // BP = &Var.
7662       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7663       if (const auto *VD =
7664               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7665         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7666                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7667           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7668               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7669                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7670             RequiresReference = true;
7671             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7672           }
7673         }
7674       }
7675 
7676       // If the variable is a pointer and is being dereferenced (i.e. is not
7677       // the last component), the base has to be the pointer itself, not its
7678       // reference. References are ignored for mapping purposes.
7679       QualType Ty =
7680           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7681       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7682         // No need to generate individual map information for the pointer, it
7683         // can be associated with the combined storage if shared memory mode is
7684         // active or the base declaration is not global variable.
7685         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7686         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7687             !VD || VD->hasLocalStorage())
7688           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7689         else
7690           FirstPointerInComplexData = true;
7691         ++I;
7692       }
7693     }
7694 
7695     // Track whether a component of the list should be marked as MEMBER_OF some
7696     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7697     // in a component list should be marked as MEMBER_OF, all subsequent entries
7698     // do not belong to the base struct. E.g.
7699     // struct S2 s;
7700     // s.ps->ps->ps->f[:]
7701     //   (1) (2) (3) (4)
7702     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7703     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7704     // is the pointee of ps(2) which is not member of struct s, so it should not
7705     // be marked as such (it is still PTR_AND_OBJ).
7706     // The variable is initialized to false so that PTR_AND_OBJ entries which
7707     // are not struct members are not considered (e.g. array of pointers to
7708     // data).
7709     bool ShouldBeMemberOf = false;
7710 
7711     // Variable keeping track of whether or not we have encountered a component
7712     // in the component list which is a member expression. Useful when we have a
7713     // pointer or a final array section, in which case it is the previous
7714     // component in the list which tells us whether we have a member expression.
7715     // E.g. X.f[:]
7716     // While processing the final array section "[:]" it is "f" which tells us
7717     // whether we are dealing with a member of a declared struct.
7718     const MemberExpr *EncounteredME = nullptr;
7719 
7720     // Track for the total number of dimension. Start from one for the dummy
7721     // dimension.
7722     uint64_t DimSize = 1;
7723 
7724     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7725     bool IsPrevMemberReference = false;
7726 
7727     for (; I != CE; ++I) {
7728       // If the current component is member of a struct (parent struct) mark it.
7729       if (!EncounteredME) {
7730         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7731         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7732         // as MEMBER_OF the parent struct.
7733         if (EncounteredME) {
7734           ShouldBeMemberOf = true;
7735           // Do not emit as complex pointer if this is actually not array-like
7736           // expression.
7737           if (FirstPointerInComplexData) {
7738             QualType Ty = std::prev(I)
7739                               ->getAssociatedDeclaration()
7740                               ->getType()
7741                               .getNonReferenceType();
7742             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7743             FirstPointerInComplexData = false;
7744           }
7745         }
7746       }
7747 
7748       auto Next = std::next(I);
7749 
7750       // We need to generate the addresses and sizes if this is the last
7751       // component, if the component is a pointer or if it is an array section
7752       // whose length can't be proved to be one. If this is a pointer, it
7753       // becomes the base address for the following components.
7754 
7755       // A final array section, is one whose length can't be proved to be one.
7756       // If the map item is non-contiguous then we don't treat any array section
7757       // as final array section.
7758       bool IsFinalArraySection =
7759           !IsNonContiguous &&
7760           isFinalArraySectionExpression(I->getAssociatedExpression());
7761 
7762       // If we have a declaration for the mapping use that, otherwise use
7763       // the base declaration of the map clause.
7764       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7765                                      ? I->getAssociatedDeclaration()
7766                                      : BaseDecl;
7767       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7768                                                : MapExpr;
7769 
7770       // Get information on whether the element is a pointer. Have to do a
7771       // special treatment for array sections given that they are built-in
7772       // types.
7773       const auto *OASE =
7774           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7775       const auto *OAShE =
7776           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7777       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7778       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7779       bool IsPointer =
7780           OAShE ||
7781           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7782                        .getCanonicalType()
7783                        ->isAnyPointerType()) ||
7784           I->getAssociatedExpression()->getType()->isAnyPointerType();
7785       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7786                                MapDecl &&
7787                                MapDecl->getType()->isLValueReferenceType();
7788       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7789 
7790       if (OASE)
7791         ++DimSize;
7792 
7793       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7794           IsFinalArraySection) {
7795         // If this is not the last component, we expect the pointer to be
7796         // associated with an array expression or member expression.
7797         assert((Next == CE ||
7798                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7799                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7800                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7801                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7802                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7803                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7804                "Unexpected expression");
7805 
7806         Address LB = Address::invalid();
7807         Address LowestElem = Address::invalid();
7808         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7809                                        const MemberExpr *E) {
7810           const Expr *BaseExpr = E->getBase();
7811           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7812           // scalar.
7813           LValue BaseLV;
7814           if (E->isArrow()) {
7815             LValueBaseInfo BaseInfo;
7816             TBAAAccessInfo TBAAInfo;
7817             Address Addr =
7818                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7819             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7820             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7821           } else {
7822             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7823           }
7824           return BaseLV;
7825         };
7826         if (OAShE) {
7827           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7828                                     CGF.getContext().getTypeAlignInChars(
7829                                         OAShE->getBase()->getType()));
7830         } else if (IsMemberReference) {
7831           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7832           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7833           LowestElem = CGF.EmitLValueForFieldInitialization(
7834                               BaseLVal, cast<FieldDecl>(MapDecl))
7835                            .getAddress(CGF);
7836           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7837                    .getAddress(CGF);
7838         } else {
7839           LowestElem = LB =
7840               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7841                   .getAddress(CGF);
7842         }
7843 
7844         // If this component is a pointer inside the base struct then we don't
7845         // need to create any entry for it - it will be combined with the object
7846         // it is pointing to into a single PTR_AND_OBJ entry.
7847         bool IsMemberPointerOrAddr =
7848             EncounteredME &&
7849             (((IsPointer || ForDeviceAddr) &&
7850               I->getAssociatedExpression() == EncounteredME) ||
7851              (IsPrevMemberReference && !IsPointer) ||
7852              (IsMemberReference && Next != CE &&
7853               !Next->getAssociatedExpression()->getType()->isPointerType()));
7854         if (!OverlappedElements.empty() && Next == CE) {
7855           // Handle base element with the info for overlapped elements.
7856           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7857           assert(!IsPointer &&
7858                  "Unexpected base element with the pointer type.");
7859           // Mark the whole struct as the struct that requires allocation on the
7860           // device.
7861           PartialStruct.LowestElem = {0, LowestElem};
7862           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7863               I->getAssociatedExpression()->getType());
7864           Address HB = CGF.Builder.CreateConstGEP(
7865               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
7866                                                               CGF.VoidPtrTy),
7867               TypeSize.getQuantity() - 1);
7868           PartialStruct.HighestElem = {
7869               std::numeric_limits<decltype(
7870                   PartialStruct.HighestElem.first)>::max(),
7871               HB};
7872           PartialStruct.Base = BP;
7873           PartialStruct.LB = LB;
7874           assert(
7875               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7876               "Overlapped elements must be used only once for the variable.");
7877           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7878           // Emit data for non-overlapped data.
7879           OpenMPOffloadMappingFlags Flags =
7880               OMP_MAP_MEMBER_OF |
7881               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7882                              /*AddPtrFlag=*/false,
7883                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7884           llvm::Value *Size = nullptr;
7885           // Do bitcopy of all non-overlapped structure elements.
7886           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7887                    Component : OverlappedElements) {
7888             Address ComponentLB = Address::invalid();
7889             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7890                  Component) {
7891               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7892                 const auto *FD = dyn_cast<FieldDecl>(VD);
7893                 if (FD && FD->getType()->isLValueReferenceType()) {
7894                   const auto *ME =
7895                       cast<MemberExpr>(MC.getAssociatedExpression());
7896                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7897                   ComponentLB =
7898                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7899                           .getAddress(CGF);
7900                 } else {
7901                   ComponentLB =
7902                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7903                           .getAddress(CGF);
7904                 }
7905                 Size = CGF.Builder.CreatePtrDiff(
7906                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7907                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7908                 break;
7909               }
7910             }
7911             assert(Size && "Failed to determine structure size");
7912             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7913             CombinedInfo.BasePointers.push_back(BP.getPointer());
7914             CombinedInfo.Pointers.push_back(LB.getPointer());
7915             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7916                 Size, CGF.Int64Ty, /*isSigned=*/true));
7917             CombinedInfo.Types.push_back(Flags);
7918             CombinedInfo.Mappers.push_back(nullptr);
7919             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7920                                                                       : 1);
7921             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7922           }
7923           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7924           CombinedInfo.BasePointers.push_back(BP.getPointer());
7925           CombinedInfo.Pointers.push_back(LB.getPointer());
7926           Size = CGF.Builder.CreatePtrDiff(
7927               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7928               CGF.EmitCastToVoidPtr(LB.getPointer()));
7929           CombinedInfo.Sizes.push_back(
7930               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7931           CombinedInfo.Types.push_back(Flags);
7932           CombinedInfo.Mappers.push_back(nullptr);
7933           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7934                                                                     : 1);
7935           break;
7936         }
7937         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7938         if (!IsMemberPointerOrAddr ||
7939             (Next == CE && MapType != OMPC_MAP_unknown)) {
7940           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7941           CombinedInfo.BasePointers.push_back(BP.getPointer());
7942           CombinedInfo.Pointers.push_back(LB.getPointer());
7943           CombinedInfo.Sizes.push_back(
7944               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7945           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7946                                                                     : 1);
7947 
7948           // If Mapper is valid, the last component inherits the mapper.
7949           bool HasMapper = Mapper && Next == CE;
7950           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7951 
7952           // We need to add a pointer flag for each map that comes from the
7953           // same expression except for the first one. We also need to signal
7954           // this map is the first one that relates with the current capture
7955           // (there is a set of entries for each capture).
7956           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7957               MapType, MapModifiers, MotionModifiers, IsImplicit,
7958               !IsExpressionFirstInfo || RequiresReference ||
7959                   FirstPointerInComplexData || IsMemberReference,
7960               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7961 
7962           if (!IsExpressionFirstInfo || IsMemberReference) {
7963             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7964             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7965             if (IsPointer || (IsMemberReference && Next != CE))
7966               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7967                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7968 
7969             if (ShouldBeMemberOf) {
7970               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7971               // should be later updated with the correct value of MEMBER_OF.
7972               Flags |= OMP_MAP_MEMBER_OF;
7973               // From now on, all subsequent PTR_AND_OBJ entries should not be
7974               // marked as MEMBER_OF.
7975               ShouldBeMemberOf = false;
7976             }
7977           }
7978 
7979           CombinedInfo.Types.push_back(Flags);
7980         }
7981 
7982         // If we have encountered a member expression so far, keep track of the
7983         // mapped member. If the parent is "*this", then the value declaration
7984         // is nullptr.
7985         if (EncounteredME) {
7986           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7987           unsigned FieldIndex = FD->getFieldIndex();
7988 
7989           // Update info about the lowest and highest elements for this struct
7990           if (!PartialStruct.Base.isValid()) {
7991             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7992             if (IsFinalArraySection) {
7993               Address HB =
7994                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7995                       .getAddress(CGF);
7996               PartialStruct.HighestElem = {FieldIndex, HB};
7997             } else {
7998               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7999             }
8000             PartialStruct.Base = BP;
8001             PartialStruct.LB = BP;
8002           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8003             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8004           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8005             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8006           }
8007         }
8008 
8009         // Need to emit combined struct for array sections.
8010         if (IsFinalArraySection || IsNonContiguous)
8011           PartialStruct.IsArraySection = true;
8012 
8013         // If we have a final array section, we are done with this expression.
8014         if (IsFinalArraySection)
8015           break;
8016 
8017         // The pointer becomes the base for the next element.
8018         if (Next != CE)
8019           BP = IsMemberReference ? LowestElem : LB;
8020 
8021         IsExpressionFirstInfo = false;
8022         IsCaptureFirstInfo = false;
8023         FirstPointerInComplexData = false;
8024         IsPrevMemberReference = IsMemberReference;
8025       } else if (FirstPointerInComplexData) {
8026         QualType Ty = Components.rbegin()
8027                           ->getAssociatedDeclaration()
8028                           ->getType()
8029                           .getNonReferenceType();
8030         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8031         FirstPointerInComplexData = false;
8032       }
8033     }
8034     // If ran into the whole component - allocate the space for the whole
8035     // record.
8036     if (!EncounteredME)
8037       PartialStruct.HasCompleteRecord = true;
8038 
8039     if (!IsNonContiguous)
8040       return;
8041 
8042     const ASTContext &Context = CGF.getContext();
8043 
8044     // For supporting stride in array section, we need to initialize the first
8045     // dimension size as 1, first offset as 0, and first count as 1
8046     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8047     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8048     MapValuesArrayTy CurStrides;
8049     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8050     uint64_t ElementTypeSize;
8051 
8052     // Collect Size information for each dimension and get the element size as
8053     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8054     // should be [10, 10] and the first stride is 4 btyes.
8055     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8056          Components) {
8057       const Expr *AssocExpr = Component.getAssociatedExpression();
8058       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8059 
8060       if (!OASE)
8061         continue;
8062 
8063       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8064       auto *CAT = Context.getAsConstantArrayType(Ty);
8065       auto *VAT = Context.getAsVariableArrayType(Ty);
8066 
8067       // We need all the dimension size except for the last dimension.
8068       assert((VAT || CAT || &Component == &*Components.begin()) &&
8069              "Should be either ConstantArray or VariableArray if not the "
8070              "first Component");
8071 
8072       // Get element size if CurStrides is empty.
8073       if (CurStrides.empty()) {
8074         const Type *ElementType = nullptr;
8075         if (CAT)
8076           ElementType = CAT->getElementType().getTypePtr();
8077         else if (VAT)
8078           ElementType = VAT->getElementType().getTypePtr();
8079         else
8080           assert(&Component == &*Components.begin() &&
8081                  "Only expect pointer (non CAT or VAT) when this is the "
8082                  "first Component");
8083         // If ElementType is null, then it means the base is a pointer
8084         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8085         // for next iteration.
8086         if (ElementType) {
8087           // For the case that having pointer as base, we need to remove one
8088           // level of indirection.
8089           if (&Component != &*Components.begin())
8090             ElementType = ElementType->getPointeeOrArrayElementType();
8091           ElementTypeSize =
8092               Context.getTypeSizeInChars(ElementType).getQuantity();
8093           CurStrides.push_back(
8094               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8095         }
8096       }
8097       // Get dimension value except for the last dimension since we don't need
8098       // it.
8099       if (DimSizes.size() < Components.size() - 1) {
8100         if (CAT)
8101           DimSizes.push_back(llvm::ConstantInt::get(
8102               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8103         else if (VAT)
8104           DimSizes.push_back(CGF.Builder.CreateIntCast(
8105               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8106               /*IsSigned=*/false));
8107       }
8108     }
8109 
8110     // Skip the dummy dimension since we have already have its information.
8111     auto DI = DimSizes.begin() + 1;
8112     // Product of dimension.
8113     llvm::Value *DimProd =
8114         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8115 
8116     // Collect info for non-contiguous. Notice that offset, count, and stride
8117     // are only meaningful for array-section, so we insert a null for anything
8118     // other than array-section.
8119     // Also, the size of offset, count, and stride are not the same as
8120     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8121     // count, and stride are the same as the number of non-contiguous
8122     // declaration in target update to/from clause.
8123     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8124          Components) {
8125       const Expr *AssocExpr = Component.getAssociatedExpression();
8126 
8127       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8128         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8129             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8130             /*isSigned=*/false);
8131         CurOffsets.push_back(Offset);
8132         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8133         CurStrides.push_back(CurStrides.back());
8134         continue;
8135       }
8136 
8137       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8138 
8139       if (!OASE)
8140         continue;
8141 
8142       // Offset
8143       const Expr *OffsetExpr = OASE->getLowerBound();
8144       llvm::Value *Offset = nullptr;
8145       if (!OffsetExpr) {
8146         // If offset is absent, then we just set it to zero.
8147         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8148       } else {
8149         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8150                                            CGF.Int64Ty,
8151                                            /*isSigned=*/false);
8152       }
8153       CurOffsets.push_back(Offset);
8154 
8155       // Count
8156       const Expr *CountExpr = OASE->getLength();
8157       llvm::Value *Count = nullptr;
8158       if (!CountExpr) {
8159         // In Clang, once a high dimension is an array section, we construct all
8160         // the lower dimension as array section, however, for case like
8161         // arr[0:2][2], Clang construct the inner dimension as an array section
8162         // but it actually is not in an array section form according to spec.
8163         if (!OASE->getColonLocFirst().isValid() &&
8164             !OASE->getColonLocSecond().isValid()) {
8165           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8166         } else {
8167           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8168           // When the length is absent it defaults to ⌈(size −
8169           // lower-bound)/stride⌉, where size is the size of the array
8170           // dimension.
8171           const Expr *StrideExpr = OASE->getStride();
8172           llvm::Value *Stride =
8173               StrideExpr
8174                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8175                                               CGF.Int64Ty, /*isSigned=*/false)
8176                   : nullptr;
8177           if (Stride)
8178             Count = CGF.Builder.CreateUDiv(
8179                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8180           else
8181             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8182         }
8183       } else {
8184         Count = CGF.EmitScalarExpr(CountExpr);
8185       }
8186       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8187       CurCounts.push_back(Count);
8188 
8189       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8190       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8191       //              Offset      Count     Stride
8192       //    D0          0           1         4    (int)    <- dummy dimension
8193       //    D1          0           2         8    (2 * (1) * 4)
8194       //    D2          1           2         20   (1 * (1 * 5) * 4)
8195       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8196       const Expr *StrideExpr = OASE->getStride();
8197       llvm::Value *Stride =
8198           StrideExpr
8199               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8200                                           CGF.Int64Ty, /*isSigned=*/false)
8201               : nullptr;
8202       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8203       if (Stride)
8204         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8205       else
8206         CurStrides.push_back(DimProd);
8207       if (DI != DimSizes.end())
8208         ++DI;
8209     }
8210 
8211     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8212     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8213     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8214   }
8215 
8216   /// Return the adjusted map modifiers if the declaration a capture refers to
8217   /// appears in a first-private clause. This is expected to be used only with
8218   /// directives that start with 'target'.
8219   MappableExprsHandler::OpenMPOffloadMappingFlags
8220   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8221     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8222 
8223     // A first private variable captured by reference will use only the
8224     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8225     // declaration is known as first-private in this handler.
8226     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8227       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8228           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8229         return MappableExprsHandler::OMP_MAP_ALWAYS |
8230                MappableExprsHandler::OMP_MAP_TO;
8231       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8232         return MappableExprsHandler::OMP_MAP_TO |
8233                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8234       return MappableExprsHandler::OMP_MAP_PRIVATE |
8235              MappableExprsHandler::OMP_MAP_TO;
8236     }
8237     return MappableExprsHandler::OMP_MAP_TO |
8238            MappableExprsHandler::OMP_MAP_FROM;
8239   }
8240 
8241   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8242     // Rotate by getFlagMemberOffset() bits.
8243     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8244                                                   << getFlagMemberOffset());
8245   }
8246 
8247   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8248                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8249     // If the entry is PTR_AND_OBJ but has not been marked with the special
8250     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8251     // marked as MEMBER_OF.
8252     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8253         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8254       return;
8255 
8256     // Reset the placeholder value to prepare the flag for the assignment of the
8257     // proper MEMBER_OF value.
8258     Flags &= ~OMP_MAP_MEMBER_OF;
8259     Flags |= MemberOfFlag;
8260   }
8261 
8262   void getPlainLayout(const CXXRecordDecl *RD,
8263                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8264                       bool AsBase) const {
8265     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8266 
8267     llvm::StructType *St =
8268         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8269 
8270     unsigned NumElements = St->getNumElements();
8271     llvm::SmallVector<
8272         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8273         RecordLayout(NumElements);
8274 
8275     // Fill bases.
8276     for (const auto &I : RD->bases()) {
8277       if (I.isVirtual())
8278         continue;
8279       const auto *Base = I.getType()->getAsCXXRecordDecl();
8280       // Ignore empty bases.
8281       if (Base->isEmpty() || CGF.getContext()
8282                                  .getASTRecordLayout(Base)
8283                                  .getNonVirtualSize()
8284                                  .isZero())
8285         continue;
8286 
8287       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8288       RecordLayout[FieldIndex] = Base;
8289     }
8290     // Fill in virtual bases.
8291     for (const auto &I : RD->vbases()) {
8292       const auto *Base = I.getType()->getAsCXXRecordDecl();
8293       // Ignore empty bases.
8294       if (Base->isEmpty())
8295         continue;
8296       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8297       if (RecordLayout[FieldIndex])
8298         continue;
8299       RecordLayout[FieldIndex] = Base;
8300     }
8301     // Fill in all the fields.
8302     assert(!RD->isUnion() && "Unexpected union.");
8303     for (const auto *Field : RD->fields()) {
8304       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8305       // will fill in later.)
8306       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8307         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8308         RecordLayout[FieldIndex] = Field;
8309       }
8310     }
8311     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8312              &Data : RecordLayout) {
8313       if (Data.isNull())
8314         continue;
8315       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8316         getPlainLayout(Base, Layout, /*AsBase=*/true);
8317       else
8318         Layout.push_back(Data.get<const FieldDecl *>());
8319     }
8320   }
8321 
8322   /// Generate all the base pointers, section pointers, sizes, map types, and
8323   /// mappers for the extracted mappable expressions (all included in \a
8324   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8325   /// pair of the relevant declaration and index where it occurs is appended to
8326   /// the device pointers info array.
8327   void generateAllInfoForClauses(
8328       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8329       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8330           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8331     // We have to process the component lists that relate with the same
8332     // declaration in a single chunk so that we can generate the map flags
8333     // correctly. Therefore, we organize all lists in a map.
8334     enum MapKind { Present, Allocs, Other, Total };
8335     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8336                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8337         Info;
8338 
8339     // Helper function to fill the information map for the different supported
8340     // clauses.
8341     auto &&InfoGen =
8342         [&Info, &SkipVarSet](
8343             const ValueDecl *D, MapKind Kind,
8344             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8345             OpenMPMapClauseKind MapType,
8346             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8347             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8348             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8349             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8350           if (SkipVarSet.contains(D))
8351             return;
8352           auto It = Info.find(D);
8353           if (It == Info.end())
8354             It = Info
8355                      .insert(std::make_pair(
8356                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8357                      .first;
8358           It->second[Kind].emplace_back(
8359               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8360               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8361         };
8362 
8363     for (const auto *Cl : Clauses) {
8364       const auto *C = dyn_cast<OMPMapClause>(Cl);
8365       if (!C)
8366         continue;
8367       MapKind Kind = Other;
8368       if (!C->getMapTypeModifiers().empty() &&
8369           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8370             return K == OMPC_MAP_MODIFIER_present;
8371           }))
8372         Kind = Present;
8373       else if (C->getMapType() == OMPC_MAP_alloc)
8374         Kind = Allocs;
8375       const auto *EI = C->getVarRefs().begin();
8376       for (const auto L : C->component_lists()) {
8377         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8378         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8379                 C->getMapTypeModifiers(), llvm::None,
8380                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8381                 E);
8382         ++EI;
8383       }
8384     }
8385     for (const auto *Cl : Clauses) {
8386       const auto *C = dyn_cast<OMPToClause>(Cl);
8387       if (!C)
8388         continue;
8389       MapKind Kind = Other;
8390       if (!C->getMotionModifiers().empty() &&
8391           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8392             return K == OMPC_MOTION_MODIFIER_present;
8393           }))
8394         Kind = Present;
8395       const auto *EI = C->getVarRefs().begin();
8396       for (const auto L : C->component_lists()) {
8397         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8398                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8399                 C->isImplicit(), std::get<2>(L), *EI);
8400         ++EI;
8401       }
8402     }
8403     for (const auto *Cl : Clauses) {
8404       const auto *C = dyn_cast<OMPFromClause>(Cl);
8405       if (!C)
8406         continue;
8407       MapKind Kind = Other;
8408       if (!C->getMotionModifiers().empty() &&
8409           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8410             return K == OMPC_MOTION_MODIFIER_present;
8411           }))
8412         Kind = Present;
8413       const auto *EI = C->getVarRefs().begin();
8414       for (const auto L : C->component_lists()) {
8415         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8416                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8417                 C->isImplicit(), std::get<2>(L), *EI);
8418         ++EI;
8419       }
8420     }
8421 
8422     // Look at the use_device_ptr clause information and mark the existing map
8423     // entries as such. If there is no map information for an entry in the
8424     // use_device_ptr list, we create one with map type 'alloc' and zero size
8425     // section. It is the user fault if that was not mapped before. If there is
8426     // no map information and the pointer is a struct member, then we defer the
8427     // emission of that entry until the whole struct has been processed.
8428     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8429                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8430         DeferredInfo;
8431     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8432 
8433     for (const auto *Cl : Clauses) {
8434       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8435       if (!C)
8436         continue;
8437       for (const auto L : C->component_lists()) {
8438         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8439             std::get<1>(L);
8440         assert(!Components.empty() &&
8441                "Not expecting empty list of components!");
8442         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8443         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8444         const Expr *IE = Components.back().getAssociatedExpression();
8445         // If the first component is a member expression, we have to look into
8446         // 'this', which maps to null in the map of map information. Otherwise
8447         // look directly for the information.
8448         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8449 
8450         // We potentially have map information for this declaration already.
8451         // Look for the first set of components that refer to it.
8452         if (It != Info.end()) {
8453           bool Found = false;
8454           for (auto &Data : It->second) {
8455             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8456               return MI.Components.back().getAssociatedDeclaration() == VD;
8457             });
8458             // If we found a map entry, signal that the pointer has to be
8459             // returned and move on to the next declaration. Exclude cases where
8460             // the base pointer is mapped as array subscript, array section or
8461             // array shaping. The base address is passed as a pointer to base in
8462             // this case and cannot be used as a base for use_device_ptr list
8463             // item.
8464             if (CI != Data.end()) {
8465               auto PrevCI = std::next(CI->Components.rbegin());
8466               const auto *VarD = dyn_cast<VarDecl>(VD);
8467               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8468                   isa<MemberExpr>(IE) ||
8469                   !VD->getType().getNonReferenceType()->isPointerType() ||
8470                   PrevCI == CI->Components.rend() ||
8471                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8472                   VarD->hasLocalStorage()) {
8473                 CI->ReturnDevicePointer = true;
8474                 Found = true;
8475                 break;
8476               }
8477             }
8478           }
8479           if (Found)
8480             continue;
8481         }
8482 
8483         // We didn't find any match in our map information - generate a zero
8484         // size array section - if the pointer is a struct member we defer this
8485         // action until the whole struct has been processed.
8486         if (isa<MemberExpr>(IE)) {
8487           // Insert the pointer into Info to be processed by
8488           // generateInfoForComponentList. Because it is a member pointer
8489           // without a pointee, no entry will be generated for it, therefore
8490           // we need to generate one after the whole struct has been processed.
8491           // Nonetheless, generateInfoForComponentList must be called to take
8492           // the pointer into account for the calculation of the range of the
8493           // partial struct.
8494           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8495                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8496                   nullptr);
8497           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8498         } else {
8499           llvm::Value *Ptr =
8500               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8501           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8502           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8503           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8504           UseDevicePtrCombinedInfo.Sizes.push_back(
8505               llvm::Constant::getNullValue(CGF.Int64Ty));
8506           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8507           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8508         }
8509       }
8510     }
8511 
8512     // Look at the use_device_addr clause information and mark the existing map
8513     // entries as such. If there is no map information for an entry in the
8514     // use_device_addr list, we create one with map type 'alloc' and zero size
8515     // section. It is the user fault if that was not mapped before. If there is
8516     // no map information and the pointer is a struct member, then we defer the
8517     // emission of that entry until the whole struct has been processed.
8518     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8519     for (const auto *Cl : Clauses) {
8520       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8521       if (!C)
8522         continue;
8523       for (const auto L : C->component_lists()) {
8524         assert(!std::get<1>(L).empty() &&
8525                "Not expecting empty list of components!");
8526         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8527         if (!Processed.insert(VD).second)
8528           continue;
8529         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8530         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8531         // If the first component is a member expression, we have to look into
8532         // 'this', which maps to null in the map of map information. Otherwise
8533         // look directly for the information.
8534         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8535 
8536         // We potentially have map information for this declaration already.
8537         // Look for the first set of components that refer to it.
8538         if (It != Info.end()) {
8539           bool Found = false;
8540           for (auto &Data : It->second) {
8541             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8542               return MI.Components.back().getAssociatedDeclaration() == VD;
8543             });
8544             // If we found a map entry, signal that the pointer has to be
8545             // returned and move on to the next declaration.
8546             if (CI != Data.end()) {
8547               CI->ReturnDevicePointer = true;
8548               Found = true;
8549               break;
8550             }
8551           }
8552           if (Found)
8553             continue;
8554         }
8555 
8556         // We didn't find any match in our map information - generate a zero
8557         // size array section - if the pointer is a struct member we defer this
8558         // action until the whole struct has been processed.
8559         if (isa<MemberExpr>(IE)) {
8560           // Insert the pointer into Info to be processed by
8561           // generateInfoForComponentList. Because it is a member pointer
8562           // without a pointee, no entry will be generated for it, therefore
8563           // we need to generate one after the whole struct has been processed.
8564           // Nonetheless, generateInfoForComponentList must be called to take
8565           // the pointer into account for the calculation of the range of the
8566           // partial struct.
8567           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8568                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8569                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8570           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8571         } else {
8572           llvm::Value *Ptr;
8573           if (IE->isGLValue())
8574             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8575           else
8576             Ptr = CGF.EmitScalarExpr(IE);
8577           CombinedInfo.Exprs.push_back(VD);
8578           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8579           CombinedInfo.Pointers.push_back(Ptr);
8580           CombinedInfo.Sizes.push_back(
8581               llvm::Constant::getNullValue(CGF.Int64Ty));
8582           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8583           CombinedInfo.Mappers.push_back(nullptr);
8584         }
8585       }
8586     }
8587 
8588     for (const auto &Data : Info) {
8589       StructRangeInfoTy PartialStruct;
8590       // Temporary generated information.
8591       MapCombinedInfoTy CurInfo;
8592       const Decl *D = Data.first;
8593       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8594       for (const auto &M : Data.second) {
8595         for (const MapInfo &L : M) {
8596           assert(!L.Components.empty() &&
8597                  "Not expecting declaration with no component lists.");
8598 
8599           // Remember the current base pointer index.
8600           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8601           CurInfo.NonContigInfo.IsNonContiguous =
8602               L.Components.back().isNonContiguous();
8603           generateInfoForComponentList(
8604               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8605               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8606               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8607 
8608           // If this entry relates with a device pointer, set the relevant
8609           // declaration and add the 'return pointer' flag.
8610           if (L.ReturnDevicePointer) {
8611             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8612                    "Unexpected number of mapped base pointers.");
8613 
8614             const ValueDecl *RelevantVD =
8615                 L.Components.back().getAssociatedDeclaration();
8616             assert(RelevantVD &&
8617                    "No relevant declaration related with device pointer??");
8618 
8619             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8620                 RelevantVD);
8621             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8622           }
8623         }
8624       }
8625 
8626       // Append any pending zero-length pointers which are struct members and
8627       // used with use_device_ptr or use_device_addr.
8628       auto CI = DeferredInfo.find(Data.first);
8629       if (CI != DeferredInfo.end()) {
8630         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8631           llvm::Value *BasePtr;
8632           llvm::Value *Ptr;
8633           if (L.ForDeviceAddr) {
8634             if (L.IE->isGLValue())
8635               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8636             else
8637               Ptr = this->CGF.EmitScalarExpr(L.IE);
8638             BasePtr = Ptr;
8639             // Entry is RETURN_PARAM. Also, set the placeholder value
8640             // MEMBER_OF=FFFF so that the entry is later updated with the
8641             // correct value of MEMBER_OF.
8642             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8643           } else {
8644             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8645             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8646                                              L.IE->getExprLoc());
8647             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8648             // placeholder value MEMBER_OF=FFFF so that the entry is later
8649             // updated with the correct value of MEMBER_OF.
8650             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8651                                     OMP_MAP_MEMBER_OF);
8652           }
8653           CurInfo.Exprs.push_back(L.VD);
8654           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8655           CurInfo.Pointers.push_back(Ptr);
8656           CurInfo.Sizes.push_back(
8657               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8658           CurInfo.Mappers.push_back(nullptr);
8659         }
8660       }
8661       // If there is an entry in PartialStruct it means we have a struct with
8662       // individual members mapped. Emit an extra combined entry.
8663       if (PartialStruct.Base.isValid()) {
8664         CurInfo.NonContigInfo.Dims.push_back(0);
8665         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8666       }
8667 
8668       // We need to append the results of this capture to what we already
8669       // have.
8670       CombinedInfo.append(CurInfo);
8671     }
8672     // Append data for use_device_ptr clauses.
8673     CombinedInfo.append(UseDevicePtrCombinedInfo);
8674   }
8675 
8676 public:
8677   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8678       : CurDir(&Dir), CGF(CGF) {
8679     // Extract firstprivate clause information.
8680     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8681       for (const auto *D : C->varlists())
8682         FirstPrivateDecls.try_emplace(
8683             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8684     // Extract implicit firstprivates from uses_allocators clauses.
8685     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8686       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8687         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8688         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8689           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8690                                         /*Implicit=*/true);
8691         else if (const auto *VD = dyn_cast<VarDecl>(
8692                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8693                          ->getDecl()))
8694           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8695       }
8696     }
8697     // Extract device pointer clause information.
8698     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8699       for (auto L : C->component_lists())
8700         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8701   }
8702 
8703   /// Constructor for the declare mapper directive.
8704   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8705       : CurDir(&Dir), CGF(CGF) {}
8706 
8707   /// Generate code for the combined entry if we have a partially mapped struct
8708   /// and take care of the mapping flags of the arguments corresponding to
8709   /// individual struct members.
8710   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8711                          MapFlagsArrayTy &CurTypes,
8712                          const StructRangeInfoTy &PartialStruct,
8713                          const ValueDecl *VD = nullptr,
8714                          bool NotTargetParams = true) const {
8715     if (CurTypes.size() == 1 &&
8716         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8717         !PartialStruct.IsArraySection)
8718       return;
8719     Address LBAddr = PartialStruct.LowestElem.second;
8720     Address HBAddr = PartialStruct.HighestElem.second;
8721     if (PartialStruct.HasCompleteRecord) {
8722       LBAddr = PartialStruct.LB;
8723       HBAddr = PartialStruct.LB;
8724     }
8725     CombinedInfo.Exprs.push_back(VD);
8726     // Base is the base of the struct
8727     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8728     // Pointer is the address of the lowest element
8729     llvm::Value *LB = LBAddr.getPointer();
8730     CombinedInfo.Pointers.push_back(LB);
8731     // There should not be a mapper for a combined entry.
8732     CombinedInfo.Mappers.push_back(nullptr);
8733     // Size is (addr of {highest+1} element) - (addr of lowest element)
8734     llvm::Value *HB = HBAddr.getPointer();
8735     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8736     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8737     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8738     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8739     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8740                                                   /*isSigned=*/false);
8741     CombinedInfo.Sizes.push_back(Size);
8742     // Map type is always TARGET_PARAM, if generate info for captures.
8743     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8744                                                  : OMP_MAP_TARGET_PARAM);
8745     // If any element has the present modifier, then make sure the runtime
8746     // doesn't attempt to allocate the struct.
8747     if (CurTypes.end() !=
8748         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8749           return Type & OMP_MAP_PRESENT;
8750         }))
8751       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8752     // Remove TARGET_PARAM flag from the first element
8753     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8754 
8755     // All other current entries will be MEMBER_OF the combined entry
8756     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8757     // 0xFFFF in the MEMBER_OF field).
8758     OpenMPOffloadMappingFlags MemberOfFlag =
8759         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8760     for (auto &M : CurTypes)
8761       setCorrectMemberOfFlag(M, MemberOfFlag);
8762   }
8763 
8764   /// Generate all the base pointers, section pointers, sizes, map types, and
8765   /// mappers for the extracted mappable expressions (all included in \a
8766   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8767   /// pair of the relevant declaration and index where it occurs is appended to
8768   /// the device pointers info array.
8769   void generateAllInfo(
8770       MapCombinedInfoTy &CombinedInfo,
8771       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8772           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8773     assert(CurDir.is<const OMPExecutableDirective *>() &&
8774            "Expect a executable directive");
8775     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8776     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8777   }
8778 
8779   /// Generate all the base pointers, section pointers, sizes, map types, and
8780   /// mappers for the extracted map clauses of user-defined mapper (all included
8781   /// in \a CombinedInfo).
8782   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8783     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8784            "Expect a declare mapper directive");
8785     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8786     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8787   }
8788 
8789   /// Emit capture info for lambdas for variables captured by reference.
8790   void generateInfoForLambdaCaptures(
8791       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8792       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8793     const auto *RD = VD->getType()
8794                          .getCanonicalType()
8795                          .getNonReferenceType()
8796                          ->getAsCXXRecordDecl();
8797     if (!RD || !RD->isLambda())
8798       return;
8799     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8800     LValue VDLVal = CGF.MakeAddrLValue(
8801         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8802     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8803     FieldDecl *ThisCapture = nullptr;
8804     RD->getCaptureFields(Captures, ThisCapture);
8805     if (ThisCapture) {
8806       LValue ThisLVal =
8807           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8808       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8809       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8810                                  VDLVal.getPointer(CGF));
8811       CombinedInfo.Exprs.push_back(VD);
8812       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8813       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8814       CombinedInfo.Sizes.push_back(
8815           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8816                                     CGF.Int64Ty, /*isSigned=*/true));
8817       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8818                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8819       CombinedInfo.Mappers.push_back(nullptr);
8820     }
8821     for (const LambdaCapture &LC : RD->captures()) {
8822       if (!LC.capturesVariable())
8823         continue;
8824       const VarDecl *VD = LC.getCapturedVar();
8825       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8826         continue;
8827       auto It = Captures.find(VD);
8828       assert(It != Captures.end() && "Found lambda capture without field.");
8829       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8830       if (LC.getCaptureKind() == LCK_ByRef) {
8831         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8832         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8833                                    VDLVal.getPointer(CGF));
8834         CombinedInfo.Exprs.push_back(VD);
8835         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8836         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8837         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8838             CGF.getTypeSize(
8839                 VD->getType().getCanonicalType().getNonReferenceType()),
8840             CGF.Int64Ty, /*isSigned=*/true));
8841       } else {
8842         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8843         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8844                                    VDLVal.getPointer(CGF));
8845         CombinedInfo.Exprs.push_back(VD);
8846         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8847         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8848         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8849       }
8850       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8851                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8852       CombinedInfo.Mappers.push_back(nullptr);
8853     }
8854   }
8855 
8856   /// Set correct indices for lambdas captures.
8857   void adjustMemberOfForLambdaCaptures(
8858       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8859       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8860       MapFlagsArrayTy &Types) const {
8861     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8862       // Set correct member_of idx for all implicit lambda captures.
8863       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8864                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8865         continue;
8866       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8867       assert(BasePtr && "Unable to find base lambda address.");
8868       int TgtIdx = -1;
8869       for (unsigned J = I; J > 0; --J) {
8870         unsigned Idx = J - 1;
8871         if (Pointers[Idx] != BasePtr)
8872           continue;
8873         TgtIdx = Idx;
8874         break;
8875       }
8876       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8877       // All other current entries will be MEMBER_OF the combined entry
8878       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8879       // 0xFFFF in the MEMBER_OF field).
8880       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8881       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8882     }
8883   }
8884 
8885   /// Generate the base pointers, section pointers, sizes, map types, and
8886   /// mappers associated to a given capture (all included in \a CombinedInfo).
8887   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8888                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8889                               StructRangeInfoTy &PartialStruct) const {
8890     assert(!Cap->capturesVariableArrayType() &&
8891            "Not expecting to generate map info for a variable array type!");
8892 
8893     // We need to know when we generating information for the first component
8894     const ValueDecl *VD = Cap->capturesThis()
8895                               ? nullptr
8896                               : Cap->getCapturedVar()->getCanonicalDecl();
8897 
8898     // If this declaration appears in a is_device_ptr clause we just have to
8899     // pass the pointer by value. If it is a reference to a declaration, we just
8900     // pass its value.
8901     if (DevPointersMap.count(VD)) {
8902       CombinedInfo.Exprs.push_back(VD);
8903       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8904       CombinedInfo.Pointers.push_back(Arg);
8905       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8906           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8907           /*isSigned=*/true));
8908       CombinedInfo.Types.push_back(
8909           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8910           OMP_MAP_TARGET_PARAM);
8911       CombinedInfo.Mappers.push_back(nullptr);
8912       return;
8913     }
8914 
8915     using MapData =
8916         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8917                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8918                    const ValueDecl *, const Expr *>;
8919     SmallVector<MapData, 4> DeclComponentLists;
8920     assert(CurDir.is<const OMPExecutableDirective *>() &&
8921            "Expect a executable directive");
8922     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8923     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8924       const auto *EI = C->getVarRefs().begin();
8925       for (const auto L : C->decl_component_lists(VD)) {
8926         const ValueDecl *VDecl, *Mapper;
8927         // The Expression is not correct if the mapping is implicit
8928         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8929         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8930         std::tie(VDecl, Components, Mapper) = L;
8931         assert(VDecl == VD && "We got information for the wrong declaration??");
8932         assert(!Components.empty() &&
8933                "Not expecting declaration with no component lists.");
8934         DeclComponentLists.emplace_back(Components, C->getMapType(),
8935                                         C->getMapTypeModifiers(),
8936                                         C->isImplicit(), Mapper, E);
8937         ++EI;
8938       }
8939     }
8940     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8941                                              const MapData &RHS) {
8942       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8943       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8944       bool HasPresent = !MapModifiers.empty() &&
8945                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8946                           return K == clang::OMPC_MAP_MODIFIER_present;
8947                         });
8948       bool HasAllocs = MapType == OMPC_MAP_alloc;
8949       MapModifiers = std::get<2>(RHS);
8950       MapType = std::get<1>(LHS);
8951       bool HasPresentR =
8952           !MapModifiers.empty() &&
8953           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8954             return K == clang::OMPC_MAP_MODIFIER_present;
8955           });
8956       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8957       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8958     });
8959 
8960     // Find overlapping elements (including the offset from the base element).
8961     llvm::SmallDenseMap<
8962         const MapData *,
8963         llvm::SmallVector<
8964             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8965         4>
8966         OverlappedData;
8967     size_t Count = 0;
8968     for (const MapData &L : DeclComponentLists) {
8969       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8970       OpenMPMapClauseKind MapType;
8971       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8972       bool IsImplicit;
8973       const ValueDecl *Mapper;
8974       const Expr *VarRef;
8975       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8976           L;
8977       ++Count;
8978       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8979         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8980         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8981                  VarRef) = L1;
8982         auto CI = Components.rbegin();
8983         auto CE = Components.rend();
8984         auto SI = Components1.rbegin();
8985         auto SE = Components1.rend();
8986         for (; CI != CE && SI != SE; ++CI, ++SI) {
8987           if (CI->getAssociatedExpression()->getStmtClass() !=
8988               SI->getAssociatedExpression()->getStmtClass())
8989             break;
8990           // Are we dealing with different variables/fields?
8991           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8992             break;
8993         }
8994         // Found overlapping if, at least for one component, reached the head
8995         // of the components list.
8996         if (CI == CE || SI == SE) {
8997           // Ignore it if it is the same component.
8998           if (CI == CE && SI == SE)
8999             continue;
9000           const auto It = (SI == SE) ? CI : SI;
9001           // If one component is a pointer and another one is a kind of
9002           // dereference of this pointer (array subscript, section, dereference,
9003           // etc.), it is not an overlapping.
9004           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9005               std::prev(It)
9006                   ->getAssociatedExpression()
9007                   ->getType()
9008                   ->isPointerType())
9009             continue;
9010           const MapData &BaseData = CI == CE ? L : L1;
9011           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9012               SI == SE ? Components : Components1;
9013           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9014           OverlappedElements.getSecond().push_back(SubData);
9015         }
9016       }
9017     }
9018     // Sort the overlapped elements for each item.
9019     llvm::SmallVector<const FieldDecl *, 4> Layout;
9020     if (!OverlappedData.empty()) {
9021       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9022       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9023       while (BaseType != OrigType) {
9024         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9025         OrigType = BaseType->getPointeeOrArrayElementType();
9026       }
9027 
9028       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9029         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9030       else {
9031         const auto *RD = BaseType->getAsRecordDecl();
9032         Layout.append(RD->field_begin(), RD->field_end());
9033       }
9034     }
9035     for (auto &Pair : OverlappedData) {
9036       llvm::stable_sort(
9037           Pair.getSecond(),
9038           [&Layout](
9039               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9040               OMPClauseMappableExprCommon::MappableExprComponentListRef
9041                   Second) {
9042             auto CI = First.rbegin();
9043             auto CE = First.rend();
9044             auto SI = Second.rbegin();
9045             auto SE = Second.rend();
9046             for (; CI != CE && SI != SE; ++CI, ++SI) {
9047               if (CI->getAssociatedExpression()->getStmtClass() !=
9048                   SI->getAssociatedExpression()->getStmtClass())
9049                 break;
9050               // Are we dealing with different variables/fields?
9051               if (CI->getAssociatedDeclaration() !=
9052                   SI->getAssociatedDeclaration())
9053                 break;
9054             }
9055 
9056             // Lists contain the same elements.
9057             if (CI == CE && SI == SE)
9058               return false;
9059 
9060             // List with less elements is less than list with more elements.
9061             if (CI == CE || SI == SE)
9062               return CI == CE;
9063 
9064             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9065             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9066             if (FD1->getParent() == FD2->getParent())
9067               return FD1->getFieldIndex() < FD2->getFieldIndex();
9068             const auto It =
9069                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9070                   return FD == FD1 || FD == FD2;
9071                 });
9072             return *It == FD1;
9073           });
9074     }
9075 
9076     // Associated with a capture, because the mapping flags depend on it.
9077     // Go through all of the elements with the overlapped elements.
9078     bool IsFirstComponentList = true;
9079     for (const auto &Pair : OverlappedData) {
9080       const MapData &L = *Pair.getFirst();
9081       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9082       OpenMPMapClauseKind MapType;
9083       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9084       bool IsImplicit;
9085       const ValueDecl *Mapper;
9086       const Expr *VarRef;
9087       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9088           L;
9089       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9090           OverlappedComponents = Pair.getSecond();
9091       generateInfoForComponentList(
9092           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9093           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9094           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9095       IsFirstComponentList = false;
9096     }
9097     // Go through other elements without overlapped elements.
9098     for (const MapData &L : DeclComponentLists) {
9099       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9100       OpenMPMapClauseKind MapType;
9101       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9102       bool IsImplicit;
9103       const ValueDecl *Mapper;
9104       const Expr *VarRef;
9105       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9106           L;
9107       auto It = OverlappedData.find(&L);
9108       if (It == OverlappedData.end())
9109         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9110                                      Components, CombinedInfo, PartialStruct,
9111                                      IsFirstComponentList, IsImplicit, Mapper,
9112                                      /*ForDeviceAddr=*/false, VD, VarRef);
9113       IsFirstComponentList = false;
9114     }
9115   }
9116 
9117   /// Generate the default map information for a given capture \a CI,
9118   /// record field declaration \a RI and captured value \a CV.
9119   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9120                               const FieldDecl &RI, llvm::Value *CV,
9121                               MapCombinedInfoTy &CombinedInfo) const {
9122     bool IsImplicit = true;
9123     // Do the default mapping.
9124     if (CI.capturesThis()) {
9125       CombinedInfo.Exprs.push_back(nullptr);
9126       CombinedInfo.BasePointers.push_back(CV);
9127       CombinedInfo.Pointers.push_back(CV);
9128       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9129       CombinedInfo.Sizes.push_back(
9130           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9131                                     CGF.Int64Ty, /*isSigned=*/true));
9132       // Default map type.
9133       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9134     } else if (CI.capturesVariableByCopy()) {
9135       const VarDecl *VD = CI.getCapturedVar();
9136       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9137       CombinedInfo.BasePointers.push_back(CV);
9138       CombinedInfo.Pointers.push_back(CV);
9139       if (!RI.getType()->isAnyPointerType()) {
9140         // We have to signal to the runtime captures passed by value that are
9141         // not pointers.
9142         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9143         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9144             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9145       } else {
9146         // Pointers are implicitly mapped with a zero size and no flags
9147         // (other than first map that is added for all implicit maps).
9148         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9149         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9150       }
9151       auto I = FirstPrivateDecls.find(VD);
9152       if (I != FirstPrivateDecls.end())
9153         IsImplicit = I->getSecond();
9154     } else {
9155       assert(CI.capturesVariable() && "Expected captured reference.");
9156       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9157       QualType ElementType = PtrTy->getPointeeType();
9158       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9159           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9160       // The default map type for a scalar/complex type is 'to' because by
9161       // default the value doesn't have to be retrieved. For an aggregate
9162       // type, the default is 'tofrom'.
9163       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9164       const VarDecl *VD = CI.getCapturedVar();
9165       auto I = FirstPrivateDecls.find(VD);
9166       if (I != FirstPrivateDecls.end() &&
9167           VD->getType().isConstant(CGF.getContext())) {
9168         llvm::Constant *Addr =
9169             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9170         // Copy the value of the original variable to the new global copy.
9171         CGF.Builder.CreateMemCpy(
9172             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9173             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9174             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9175         // Use new global variable as the base pointers.
9176         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9177         CombinedInfo.BasePointers.push_back(Addr);
9178         CombinedInfo.Pointers.push_back(Addr);
9179       } else {
9180         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9181         CombinedInfo.BasePointers.push_back(CV);
9182         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9183           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9184               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9185               AlignmentSource::Decl));
9186           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9187         } else {
9188           CombinedInfo.Pointers.push_back(CV);
9189         }
9190       }
9191       if (I != FirstPrivateDecls.end())
9192         IsImplicit = I->getSecond();
9193     }
9194     // Every default map produces a single argument which is a target parameter.
9195     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9196 
9197     // Add flag stating this is an implicit map.
9198     if (IsImplicit)
9199       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9200 
9201     // No user-defined mapper for default mapping.
9202     CombinedInfo.Mappers.push_back(nullptr);
9203   }
9204 };
9205 } // anonymous namespace
9206 
9207 static void emitNonContiguousDescriptor(
9208     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9209     CGOpenMPRuntime::TargetDataInfo &Info) {
9210   CodeGenModule &CGM = CGF.CGM;
9211   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9212       &NonContigInfo = CombinedInfo.NonContigInfo;
9213 
9214   // Build an array of struct descriptor_dim and then assign it to
9215   // offload_args.
9216   //
9217   // struct descriptor_dim {
9218   //  uint64_t offset;
9219   //  uint64_t count;
9220   //  uint64_t stride
9221   // };
9222   ASTContext &C = CGF.getContext();
9223   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9224   RecordDecl *RD;
9225   RD = C.buildImplicitRecord("descriptor_dim");
9226   RD->startDefinition();
9227   addFieldToRecordDecl(C, RD, Int64Ty);
9228   addFieldToRecordDecl(C, RD, Int64Ty);
9229   addFieldToRecordDecl(C, RD, Int64Ty);
9230   RD->completeDefinition();
9231   QualType DimTy = C.getRecordType(RD);
9232 
9233   enum { OffsetFD = 0, CountFD, StrideFD };
9234   // We need two index variable here since the size of "Dims" is the same as the
9235   // size of Components, however, the size of offset, count, and stride is equal
9236   // to the size of base declaration that is non-contiguous.
9237   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9238     // Skip emitting ir if dimension size is 1 since it cannot be
9239     // non-contiguous.
9240     if (NonContigInfo.Dims[I] == 1)
9241       continue;
9242     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9243     QualType ArrayTy =
9244         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9245     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9246     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9247       unsigned RevIdx = EE - II - 1;
9248       LValue DimsLVal = CGF.MakeAddrLValue(
9249           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9250       // Offset
9251       LValue OffsetLVal = CGF.EmitLValueForField(
9252           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9253       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9254       // Count
9255       LValue CountLVal = CGF.EmitLValueForField(
9256           DimsLVal, *std::next(RD->field_begin(), CountFD));
9257       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9258       // Stride
9259       LValue StrideLVal = CGF.EmitLValueForField(
9260           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9261       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9262     }
9263     // args[I] = &dims
9264     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9265         DimsAddr, CGM.Int8PtrTy);
9266     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9267         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9268         Info.PointersArray, 0, I);
9269     Address PAddr(P, CGF.getPointerAlign());
9270     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9271     ++L;
9272   }
9273 }
9274 
9275 /// Emit a string constant containing the names of the values mapped to the
9276 /// offloading runtime library.
9277 llvm::Constant *
9278 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9279                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9280   llvm::Constant *SrcLocStr;
9281   if (!MapExprs.getMapDecl()) {
9282     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9283   } else {
9284     std::string ExprName = "";
9285     if (MapExprs.getMapExpr()) {
9286       PrintingPolicy P(CGF.getContext().getLangOpts());
9287       llvm::raw_string_ostream OS(ExprName);
9288       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9289       OS.flush();
9290     } else {
9291       ExprName = MapExprs.getMapDecl()->getNameAsString();
9292     }
9293 
9294     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9295     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9296     const char *FileName = PLoc.getFilename();
9297     unsigned Line = PLoc.getLine();
9298     unsigned Column = PLoc.getColumn();
9299     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9300                                                 Line, Column);
9301   }
9302   return SrcLocStr;
9303 }
9304 
9305 /// Emit the arrays used to pass the captures and map information to the
9306 /// offloading runtime library. If there is no map or capture information,
9307 /// return nullptr by reference.
9308 static void emitOffloadingArrays(
9309     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9310     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9311     bool IsNonContiguous = false) {
9312   CodeGenModule &CGM = CGF.CGM;
9313   ASTContext &Ctx = CGF.getContext();
9314 
9315   // Reset the array information.
9316   Info.clearArrayInfo();
9317   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9318 
9319   if (Info.NumberOfPtrs) {
9320     // Detect if we have any capture size requiring runtime evaluation of the
9321     // size so that a constant array could be eventually used.
9322     bool hasRuntimeEvaluationCaptureSize = false;
9323     for (llvm::Value *S : CombinedInfo.Sizes)
9324       if (!isa<llvm::Constant>(S)) {
9325         hasRuntimeEvaluationCaptureSize = true;
9326         break;
9327       }
9328 
9329     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9330     QualType PointerArrayType = Ctx.getConstantArrayType(
9331         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9332         /*IndexTypeQuals=*/0);
9333 
9334     Info.BasePointersArray =
9335         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9336     Info.PointersArray =
9337         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9338     Address MappersArray =
9339         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9340     Info.MappersArray = MappersArray.getPointer();
9341 
9342     // If we don't have any VLA types or other types that require runtime
9343     // evaluation, we can use a constant array for the map sizes, otherwise we
9344     // need to fill up the arrays as we do for the pointers.
9345     QualType Int64Ty =
9346         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9347     if (hasRuntimeEvaluationCaptureSize) {
9348       QualType SizeArrayType = Ctx.getConstantArrayType(
9349           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9350           /*IndexTypeQuals=*/0);
9351       Info.SizesArray =
9352           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9353     } else {
9354       // We expect all the sizes to be constant, so we collect them to create
9355       // a constant array.
9356       SmallVector<llvm::Constant *, 16> ConstSizes;
9357       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9358         if (IsNonContiguous &&
9359             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9360           ConstSizes.push_back(llvm::ConstantInt::get(
9361               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9362         } else {
9363           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9364         }
9365       }
9366 
9367       auto *SizesArrayInit = llvm::ConstantArray::get(
9368           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9369       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9370       auto *SizesArrayGbl = new llvm::GlobalVariable(
9371           CGM.getModule(), SizesArrayInit->getType(),
9372           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9373           SizesArrayInit, Name);
9374       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9375       Info.SizesArray = SizesArrayGbl;
9376     }
9377 
9378     // The map types are always constant so we don't need to generate code to
9379     // fill arrays. Instead, we create an array constant.
9380     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9381     llvm::copy(CombinedInfo.Types, Mapping.begin());
9382     std::string MaptypesName =
9383         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9384     auto *MapTypesArrayGbl =
9385         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9386     Info.MapTypesArray = MapTypesArrayGbl;
9387 
9388     // The information types are only built if there is debug information
9389     // requested.
9390     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9391       Info.MapNamesArray = llvm::Constant::getNullValue(
9392           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9393     } else {
9394       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9395         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9396       };
9397       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9398       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9399       std::string MapnamesName =
9400           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9401       auto *MapNamesArrayGbl =
9402           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9403       Info.MapNamesArray = MapNamesArrayGbl;
9404     }
9405 
9406     // If there's a present map type modifier, it must not be applied to the end
9407     // of a region, so generate a separate map type array in that case.
9408     if (Info.separateBeginEndCalls()) {
9409       bool EndMapTypesDiffer = false;
9410       for (uint64_t &Type : Mapping) {
9411         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9412           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9413           EndMapTypesDiffer = true;
9414         }
9415       }
9416       if (EndMapTypesDiffer) {
9417         MapTypesArrayGbl =
9418             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9419         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9420       }
9421     }
9422 
9423     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9424       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9425       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9426           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9427           Info.BasePointersArray, 0, I);
9428       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9429           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9430       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9431       CGF.Builder.CreateStore(BPVal, BPAddr);
9432 
9433       if (Info.requiresDevicePointerInfo())
9434         if (const ValueDecl *DevVD =
9435                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9436           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9437 
9438       llvm::Value *PVal = CombinedInfo.Pointers[I];
9439       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9440           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9441           Info.PointersArray, 0, I);
9442       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9443           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9444       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9445       CGF.Builder.CreateStore(PVal, PAddr);
9446 
9447       if (hasRuntimeEvaluationCaptureSize) {
9448         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9449             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9450             Info.SizesArray,
9451             /*Idx0=*/0,
9452             /*Idx1=*/I);
9453         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9454         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9455                                                           CGM.Int64Ty,
9456                                                           /*isSigned=*/true),
9457                                 SAddr);
9458       }
9459 
9460       // Fill up the mapper array.
9461       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9462       if (CombinedInfo.Mappers[I]) {
9463         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9464             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9465         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9466         Info.HasMapper = true;
9467       }
9468       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9469       CGF.Builder.CreateStore(MFunc, MAddr);
9470     }
9471   }
9472 
9473   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9474       Info.NumberOfPtrs == 0)
9475     return;
9476 
9477   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9478 }
9479 
9480 namespace {
9481 /// Additional arguments for emitOffloadingArraysArgument function.
9482 struct ArgumentsOptions {
9483   bool ForEndCall = false;
9484   ArgumentsOptions() = default;
9485   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9486 };
9487 } // namespace
9488 
9489 /// Emit the arguments to be passed to the runtime library based on the
9490 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9491 /// ForEndCall, emit map types to be passed for the end of the region instead of
9492 /// the beginning.
9493 static void emitOffloadingArraysArgument(
9494     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9495     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9496     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9497     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9498     const ArgumentsOptions &Options = ArgumentsOptions()) {
9499   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9500          "expected region end call to runtime only when end call is separate");
9501   CodeGenModule &CGM = CGF.CGM;
9502   if (Info.NumberOfPtrs) {
9503     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9504         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9505         Info.BasePointersArray,
9506         /*Idx0=*/0, /*Idx1=*/0);
9507     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9508         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9509         Info.PointersArray,
9510         /*Idx0=*/0,
9511         /*Idx1=*/0);
9512     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9513         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9514         /*Idx0=*/0, /*Idx1=*/0);
9515     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9516         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9517         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9518                                                     : Info.MapTypesArray,
9519         /*Idx0=*/0,
9520         /*Idx1=*/0);
9521 
9522     // Only emit the mapper information arrays if debug information is
9523     // requested.
9524     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9525       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9526     else
9527       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9528           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9529           Info.MapNamesArray,
9530           /*Idx0=*/0,
9531           /*Idx1=*/0);
9532     // If there is no user-defined mapper, set the mapper array to nullptr to
9533     // avoid an unnecessary data privatization
9534     if (!Info.HasMapper)
9535       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9536     else
9537       MappersArrayArg =
9538           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9539   } else {
9540     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9541     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9542     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9543     MapTypesArrayArg =
9544         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9545     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9546     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9547   }
9548 }
9549 
9550 /// Check for inner distribute directive.
9551 static const OMPExecutableDirective *
9552 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9553   const auto *CS = D.getInnermostCapturedStmt();
9554   const auto *Body =
9555       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9556   const Stmt *ChildStmt =
9557       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9558 
9559   if (const auto *NestedDir =
9560           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9561     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9562     switch (D.getDirectiveKind()) {
9563     case OMPD_target:
9564       if (isOpenMPDistributeDirective(DKind))
9565         return NestedDir;
9566       if (DKind == OMPD_teams) {
9567         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9568             /*IgnoreCaptured=*/true);
9569         if (!Body)
9570           return nullptr;
9571         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9572         if (const auto *NND =
9573                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9574           DKind = NND->getDirectiveKind();
9575           if (isOpenMPDistributeDirective(DKind))
9576             return NND;
9577         }
9578       }
9579       return nullptr;
9580     case OMPD_target_teams:
9581       if (isOpenMPDistributeDirective(DKind))
9582         return NestedDir;
9583       return nullptr;
9584     case OMPD_target_parallel:
9585     case OMPD_target_simd:
9586     case OMPD_target_parallel_for:
9587     case OMPD_target_parallel_for_simd:
9588       return nullptr;
9589     case OMPD_target_teams_distribute:
9590     case OMPD_target_teams_distribute_simd:
9591     case OMPD_target_teams_distribute_parallel_for:
9592     case OMPD_target_teams_distribute_parallel_for_simd:
9593     case OMPD_parallel:
9594     case OMPD_for:
9595     case OMPD_parallel_for:
9596     case OMPD_parallel_master:
9597     case OMPD_parallel_sections:
9598     case OMPD_for_simd:
9599     case OMPD_parallel_for_simd:
9600     case OMPD_cancel:
9601     case OMPD_cancellation_point:
9602     case OMPD_ordered:
9603     case OMPD_threadprivate:
9604     case OMPD_allocate:
9605     case OMPD_task:
9606     case OMPD_simd:
9607     case OMPD_tile:
9608     case OMPD_unroll:
9609     case OMPD_sections:
9610     case OMPD_section:
9611     case OMPD_single:
9612     case OMPD_master:
9613     case OMPD_critical:
9614     case OMPD_taskyield:
9615     case OMPD_barrier:
9616     case OMPD_taskwait:
9617     case OMPD_taskgroup:
9618     case OMPD_atomic:
9619     case OMPD_flush:
9620     case OMPD_depobj:
9621     case OMPD_scan:
9622     case OMPD_teams:
9623     case OMPD_target_data:
9624     case OMPD_target_exit_data:
9625     case OMPD_target_enter_data:
9626     case OMPD_distribute:
9627     case OMPD_distribute_simd:
9628     case OMPD_distribute_parallel_for:
9629     case OMPD_distribute_parallel_for_simd:
9630     case OMPD_teams_distribute:
9631     case OMPD_teams_distribute_simd:
9632     case OMPD_teams_distribute_parallel_for:
9633     case OMPD_teams_distribute_parallel_for_simd:
9634     case OMPD_target_update:
9635     case OMPD_declare_simd:
9636     case OMPD_declare_variant:
9637     case OMPD_begin_declare_variant:
9638     case OMPD_end_declare_variant:
9639     case OMPD_declare_target:
9640     case OMPD_end_declare_target:
9641     case OMPD_declare_reduction:
9642     case OMPD_declare_mapper:
9643     case OMPD_taskloop:
9644     case OMPD_taskloop_simd:
9645     case OMPD_master_taskloop:
9646     case OMPD_master_taskloop_simd:
9647     case OMPD_parallel_master_taskloop:
9648     case OMPD_parallel_master_taskloop_simd:
9649     case OMPD_requires:
9650     case OMPD_unknown:
9651     default:
9652       llvm_unreachable("Unexpected directive.");
9653     }
9654   }
9655 
9656   return nullptr;
9657 }
9658 
9659 /// Emit the user-defined mapper function. The code generation follows the
9660 /// pattern in the example below.
9661 /// \code
9662 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9663 ///                                           void *base, void *begin,
9664 ///                                           int64_t size, int64_t type,
9665 ///                                           void *name = nullptr) {
9666 ///   // Allocate space for an array section first or add a base/begin for
9667 ///   // pointer dereference.
9668 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9669 ///       !maptype.IsDelete)
9670 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9671 ///                                 size*sizeof(Ty), clearToFromMember(type));
9672 ///   // Map members.
9673 ///   for (unsigned i = 0; i < size; i++) {
9674 ///     // For each component specified by this mapper:
9675 ///     for (auto c : begin[i]->all_components) {
9676 ///       if (c.hasMapper())
9677 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9678 ///                       c.arg_type, c.arg_name);
9679 ///       else
9680 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9681 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9682 ///                                     c.arg_name);
9683 ///     }
9684 ///   }
9685 ///   // Delete the array section.
9686 ///   if (size > 1 && maptype.IsDelete)
9687 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9688 ///                                 size*sizeof(Ty), clearToFromMember(type));
9689 /// }
9690 /// \endcode
9691 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9692                                             CodeGenFunction *CGF) {
9693   if (UDMMap.count(D) > 0)
9694     return;
9695   ASTContext &C = CGM.getContext();
9696   QualType Ty = D->getType();
9697   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9698   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9699   auto *MapperVarDecl =
9700       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9701   SourceLocation Loc = D->getLocation();
9702   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9703 
9704   // Prepare mapper function arguments and attributes.
9705   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9706                               C.VoidPtrTy, ImplicitParamDecl::Other);
9707   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9708                             ImplicitParamDecl::Other);
9709   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9710                              C.VoidPtrTy, ImplicitParamDecl::Other);
9711   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9712                             ImplicitParamDecl::Other);
9713   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9714                             ImplicitParamDecl::Other);
9715   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9716                             ImplicitParamDecl::Other);
9717   FunctionArgList Args;
9718   Args.push_back(&HandleArg);
9719   Args.push_back(&BaseArg);
9720   Args.push_back(&BeginArg);
9721   Args.push_back(&SizeArg);
9722   Args.push_back(&TypeArg);
9723   Args.push_back(&NameArg);
9724   const CGFunctionInfo &FnInfo =
9725       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9726   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9727   SmallString<64> TyStr;
9728   llvm::raw_svector_ostream Out(TyStr);
9729   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9730   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9731   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9732                                     Name, &CGM.getModule());
9733   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9734   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9735   // Start the mapper function code generation.
9736   CodeGenFunction MapperCGF(CGM);
9737   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9738   // Compute the starting and end addresses of array elements.
9739   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9740       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9741       C.getPointerType(Int64Ty), Loc);
9742   // Prepare common arguments for array initiation and deletion.
9743   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9744       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9745       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9746   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9747       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9748       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9749   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9750       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9751       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9752   // Convert the size in bytes into the number of array elements.
9753   Size = MapperCGF.Builder.CreateExactUDiv(
9754       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9755   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9756       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9757   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9758   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9759       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9760       C.getPointerType(Int64Ty), Loc);
9761   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9762       MapperCGF.GetAddrOfLocalVar(&NameArg),
9763       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9764 
9765   // Emit array initiation if this is an array section and \p MapType indicates
9766   // that memory allocation is required.
9767   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9768   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9769                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9770 
9771   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9772 
9773   // Emit the loop header block.
9774   MapperCGF.EmitBlock(HeadBB);
9775   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9776   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9777   // Evaluate whether the initial condition is satisfied.
9778   llvm::Value *IsEmpty =
9779       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9780   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9781   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9782 
9783   // Emit the loop body block.
9784   MapperCGF.EmitBlock(BodyBB);
9785   llvm::BasicBlock *LastBB = BodyBB;
9786   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9787       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9788   PtrPHI->addIncoming(PtrBegin, EntryBB);
9789   Address PtrCurrent =
9790       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9791                           .getAlignment()
9792                           .alignmentOfArrayElement(ElementSize));
9793   // Privatize the declared variable of mapper to be the current array element.
9794   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9795   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9796   (void)Scope.Privatize();
9797 
9798   // Get map clause information. Fill up the arrays with all mapped variables.
9799   MappableExprsHandler::MapCombinedInfoTy Info;
9800   MappableExprsHandler MEHandler(*D, MapperCGF);
9801   MEHandler.generateAllInfoForMapper(Info);
9802 
9803   // Call the runtime API __tgt_mapper_num_components to get the number of
9804   // pre-existing components.
9805   llvm::Value *OffloadingArgs[] = {Handle};
9806   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9807       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9808                                             OMPRTL___tgt_mapper_num_components),
9809       OffloadingArgs);
9810   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9811       PreviousSize,
9812       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9813 
9814   // Fill up the runtime mapper handle for all components.
9815   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9816     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9817         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9818     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9819         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9820     llvm::Value *CurSizeArg = Info.Sizes[I];
9821     llvm::Value *CurNameArg =
9822         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9823             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9824             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9825 
9826     // Extract the MEMBER_OF field from the map type.
9827     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9828     llvm::Value *MemberMapType =
9829         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9830 
9831     // Combine the map type inherited from user-defined mapper with that
9832     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9833     // bits of the \a MapType, which is the input argument of the mapper
9834     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9835     // bits of MemberMapType.
9836     // [OpenMP 5.0], 1.2.6. map-type decay.
9837     //        | alloc |  to   | from  | tofrom | release | delete
9838     // ----------------------------------------------------------
9839     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9840     // to     | alloc |  to   | alloc |   to   | release | delete
9841     // from   | alloc | alloc | from  |  from  | release | delete
9842     // tofrom | alloc |  to   | from  | tofrom | release | delete
9843     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9844         MapType,
9845         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9846                                    MappableExprsHandler::OMP_MAP_FROM));
9847     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9848     llvm::BasicBlock *AllocElseBB =
9849         MapperCGF.createBasicBlock("omp.type.alloc.else");
9850     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9851     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9852     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9853     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9854     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9855     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9856     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9857     MapperCGF.EmitBlock(AllocBB);
9858     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9859         MemberMapType,
9860         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9861                                      MappableExprsHandler::OMP_MAP_FROM)));
9862     MapperCGF.Builder.CreateBr(EndBB);
9863     MapperCGF.EmitBlock(AllocElseBB);
9864     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9865         LeftToFrom,
9866         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9867     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9868     // In case of to, clear OMP_MAP_FROM.
9869     MapperCGF.EmitBlock(ToBB);
9870     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9871         MemberMapType,
9872         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9873     MapperCGF.Builder.CreateBr(EndBB);
9874     MapperCGF.EmitBlock(ToElseBB);
9875     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9876         LeftToFrom,
9877         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9878     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9879     // In case of from, clear OMP_MAP_TO.
9880     MapperCGF.EmitBlock(FromBB);
9881     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9882         MemberMapType,
9883         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9884     // In case of tofrom, do nothing.
9885     MapperCGF.EmitBlock(EndBB);
9886     LastBB = EndBB;
9887     llvm::PHINode *CurMapType =
9888         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9889     CurMapType->addIncoming(AllocMapType, AllocBB);
9890     CurMapType->addIncoming(ToMapType, ToBB);
9891     CurMapType->addIncoming(FromMapType, FromBB);
9892     CurMapType->addIncoming(MemberMapType, ToElseBB);
9893 
9894     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9895                                      CurSizeArg, CurMapType, CurNameArg};
9896     if (Info.Mappers[I]) {
9897       // Call the corresponding mapper function.
9898       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9899           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9900       assert(MapperFunc && "Expect a valid mapper function is available.");
9901       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9902     } else {
9903       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9904       // data structure.
9905       MapperCGF.EmitRuntimeCall(
9906           OMPBuilder.getOrCreateRuntimeFunction(
9907               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9908           OffloadingArgs);
9909     }
9910   }
9911 
9912   // Update the pointer to point to the next element that needs to be mapped,
9913   // and check whether we have mapped all elements.
9914   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9915       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9916   PtrPHI->addIncoming(PtrNext, LastBB);
9917   llvm::Value *IsDone =
9918       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9919   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9920   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9921 
9922   MapperCGF.EmitBlock(ExitBB);
9923   // Emit array deletion if this is an array section and \p MapType indicates
9924   // that deletion is required.
9925   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9926                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9927 
9928   // Emit the function exit block.
9929   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9930   MapperCGF.FinishFunction();
9931   UDMMap.try_emplace(D, Fn);
9932   if (CGF) {
9933     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9934     Decls.second.push_back(D);
9935   }
9936 }
9937 
9938 /// Emit the array initialization or deletion portion for user-defined mapper
9939 /// code generation. First, it evaluates whether an array section is mapped and
9940 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9941 /// true, and \a MapType indicates to not delete this array, array
9942 /// initialization code is generated. If \a IsInit is false, and \a MapType
9943 /// indicates to not this array, array deletion code is generated.
9944 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9945     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9946     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9947     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9948     bool IsInit) {
9949   StringRef Prefix = IsInit ? ".init" : ".del";
9950 
9951   // Evaluate if this is an array section.
9952   llvm::BasicBlock *BodyBB =
9953       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9954   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9955       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9956   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9957       MapType,
9958       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9959   llvm::Value *DeleteCond;
9960   llvm::Value *Cond;
9961   if (IsInit) {
9962     // base != begin?
9963     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9964         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9965     // IsPtrAndObj?
9966     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9967         MapType,
9968         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9969     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9970     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9971     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9972     DeleteCond = MapperCGF.Builder.CreateIsNull(
9973         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9974   } else {
9975     Cond = IsArray;
9976     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9977         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9978   }
9979   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9980   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9981 
9982   MapperCGF.EmitBlock(BodyBB);
9983   // Get the array size by multiplying element size and element number (i.e., \p
9984   // Size).
9985   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9986       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9987   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9988   // memory allocation/deletion purpose only.
9989   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9990       MapType,
9991       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9992                                    MappableExprsHandler::OMP_MAP_FROM)));
9993   MapTypeArg = MapperCGF.Builder.CreateOr(
9994       MapTypeArg,
9995       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
9996 
9997   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9998   // data structure.
9999   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10000                                    ArraySize, MapTypeArg, MapName};
10001   MapperCGF.EmitRuntimeCall(
10002       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10003                                             OMPRTL___tgt_push_mapper_component),
10004       OffloadingArgs);
10005 }
10006 
10007 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10008     const OMPDeclareMapperDecl *D) {
10009   auto I = UDMMap.find(D);
10010   if (I != UDMMap.end())
10011     return I->second;
10012   emitUserDefinedMapper(D);
10013   return UDMMap.lookup(D);
10014 }
10015 
10016 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10017     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10018     llvm::Value *DeviceID,
10019     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10020                                      const OMPLoopDirective &D)>
10021         SizeEmitter) {
10022   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10023   const OMPExecutableDirective *TD = &D;
10024   // Get nested teams distribute kind directive, if any.
10025   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10026     TD = getNestedDistributeDirective(CGM.getContext(), D);
10027   if (!TD)
10028     return;
10029   const auto *LD = cast<OMPLoopDirective>(TD);
10030   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10031                                                          PrePostActionTy &) {
10032     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10033       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10034       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10035       CGF.EmitRuntimeCall(
10036           OMPBuilder.getOrCreateRuntimeFunction(
10037               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10038           Args);
10039     }
10040   };
10041   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10042 }
10043 
10044 void CGOpenMPRuntime::emitTargetCall(
10045     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10046     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10047     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10048     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10049                                      const OMPLoopDirective &D)>
10050         SizeEmitter) {
10051   if (!CGF.HaveInsertPoint())
10052     return;
10053 
10054   assert(OutlinedFn && "Invalid outlined function!");
10055 
10056   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10057                                  D.hasClausesOfKind<OMPNowaitClause>();
10058   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10059   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10060   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10061                                             PrePostActionTy &) {
10062     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10063   };
10064   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10065 
10066   CodeGenFunction::OMPTargetDataInfo InputInfo;
10067   llvm::Value *MapTypesArray = nullptr;
10068   llvm::Value *MapNamesArray = nullptr;
10069   // Fill up the pointer arrays and transfer execution to the device.
10070   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10071                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10072                     &CapturedVars,
10073                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10074     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10075       // Reverse offloading is not supported, so just execute on the host.
10076       if (RequiresOuterTask) {
10077         CapturedVars.clear();
10078         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10079       }
10080       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10081       return;
10082     }
10083 
10084     // On top of the arrays that were filled up, the target offloading call
10085     // takes as arguments the device id as well as the host pointer. The host
10086     // pointer is used by the runtime library to identify the current target
10087     // region, so it only has to be unique and not necessarily point to
10088     // anything. It could be the pointer to the outlined function that
10089     // implements the target region, but we aren't using that so that the
10090     // compiler doesn't need to keep that, and could therefore inline the host
10091     // function if proven worthwhile during optimization.
10092 
10093     // From this point on, we need to have an ID of the target region defined.
10094     assert(OutlinedFnID && "Invalid outlined function ID!");
10095 
10096     // Emit device ID if any.
10097     llvm::Value *DeviceID;
10098     if (Device.getPointer()) {
10099       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10100               Device.getInt() == OMPC_DEVICE_device_num) &&
10101              "Expected device_num modifier.");
10102       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10103       DeviceID =
10104           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10105     } else {
10106       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10107     }
10108 
10109     // Emit the number of elements in the offloading arrays.
10110     llvm::Value *PointerNum =
10111         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10112 
10113     // Return value of the runtime offloading call.
10114     llvm::Value *Return;
10115 
10116     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10117     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10118 
10119     // Source location for the ident struct
10120     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10121 
10122     // Emit tripcount for the target loop-based directive.
10123     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10124 
10125     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10126     // The target region is an outlined function launched by the runtime
10127     // via calls __tgt_target() or __tgt_target_teams().
10128     //
10129     // __tgt_target() launches a target region with one team and one thread,
10130     // executing a serial region.  This master thread may in turn launch
10131     // more threads within its team upon encountering a parallel region,
10132     // however, no additional teams can be launched on the device.
10133     //
10134     // __tgt_target_teams() launches a target region with one or more teams,
10135     // each with one or more threads.  This call is required for target
10136     // constructs such as:
10137     //  'target teams'
10138     //  'target' / 'teams'
10139     //  'target teams distribute parallel for'
10140     //  'target parallel'
10141     // and so on.
10142     //
10143     // Note that on the host and CPU targets, the runtime implementation of
10144     // these calls simply call the outlined function without forking threads.
10145     // The outlined functions themselves have runtime calls to
10146     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10147     // the compiler in emitTeamsCall() and emitParallelCall().
10148     //
10149     // In contrast, on the NVPTX target, the implementation of
10150     // __tgt_target_teams() launches a GPU kernel with the requested number
10151     // of teams and threads so no additional calls to the runtime are required.
10152     if (NumTeams) {
10153       // If we have NumTeams defined this means that we have an enclosed teams
10154       // region. Therefore we also expect to have NumThreads defined. These two
10155       // values should be defined in the presence of a teams directive,
10156       // regardless of having any clauses associated. If the user is using teams
10157       // but no clauses, these two values will be the default that should be
10158       // passed to the runtime library - a 32-bit integer with the value zero.
10159       assert(NumThreads && "Thread limit expression should be available along "
10160                            "with number of teams.");
10161       llvm::Value *OffloadingArgs[] = {RTLoc,
10162                                        DeviceID,
10163                                        OutlinedFnID,
10164                                        PointerNum,
10165                                        InputInfo.BasePointersArray.getPointer(),
10166                                        InputInfo.PointersArray.getPointer(),
10167                                        InputInfo.SizesArray.getPointer(),
10168                                        MapTypesArray,
10169                                        MapNamesArray,
10170                                        InputInfo.MappersArray.getPointer(),
10171                                        NumTeams,
10172                                        NumThreads};
10173       Return = CGF.EmitRuntimeCall(
10174           OMPBuilder.getOrCreateRuntimeFunction(
10175               CGM.getModule(), HasNowait
10176                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10177                                    : OMPRTL___tgt_target_teams_mapper),
10178           OffloadingArgs);
10179     } else {
10180       llvm::Value *OffloadingArgs[] = {RTLoc,
10181                                        DeviceID,
10182                                        OutlinedFnID,
10183                                        PointerNum,
10184                                        InputInfo.BasePointersArray.getPointer(),
10185                                        InputInfo.PointersArray.getPointer(),
10186                                        InputInfo.SizesArray.getPointer(),
10187                                        MapTypesArray,
10188                                        MapNamesArray,
10189                                        InputInfo.MappersArray.getPointer()};
10190       Return = CGF.EmitRuntimeCall(
10191           OMPBuilder.getOrCreateRuntimeFunction(
10192               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10193                                          : OMPRTL___tgt_target_mapper),
10194           OffloadingArgs);
10195     }
10196 
10197     // Check the error code and execute the host version if required.
10198     llvm::BasicBlock *OffloadFailedBlock =
10199         CGF.createBasicBlock("omp_offload.failed");
10200     llvm::BasicBlock *OffloadContBlock =
10201         CGF.createBasicBlock("omp_offload.cont");
10202     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10203     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10204 
10205     CGF.EmitBlock(OffloadFailedBlock);
10206     if (RequiresOuterTask) {
10207       CapturedVars.clear();
10208       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10209     }
10210     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10211     CGF.EmitBranch(OffloadContBlock);
10212 
10213     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10214   };
10215 
10216   // Notify that the host version must be executed.
10217   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10218                     RequiresOuterTask](CodeGenFunction &CGF,
10219                                        PrePostActionTy &) {
10220     if (RequiresOuterTask) {
10221       CapturedVars.clear();
10222       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10223     }
10224     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10225   };
10226 
10227   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10228                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10229                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10230     // Fill up the arrays with all the captured variables.
10231     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10232 
10233     // Get mappable expression information.
10234     MappableExprsHandler MEHandler(D, CGF);
10235     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10236     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10237 
10238     auto RI = CS.getCapturedRecordDecl()->field_begin();
10239     auto *CV = CapturedVars.begin();
10240     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10241                                               CE = CS.capture_end();
10242          CI != CE; ++CI, ++RI, ++CV) {
10243       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10244       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10245 
10246       // VLA sizes are passed to the outlined region by copy and do not have map
10247       // information associated.
10248       if (CI->capturesVariableArrayType()) {
10249         CurInfo.Exprs.push_back(nullptr);
10250         CurInfo.BasePointers.push_back(*CV);
10251         CurInfo.Pointers.push_back(*CV);
10252         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10253             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10254         // Copy to the device as an argument. No need to retrieve it.
10255         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10256                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10257                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10258         CurInfo.Mappers.push_back(nullptr);
10259       } else {
10260         // If we have any information in the map clause, we use it, otherwise we
10261         // just do a default mapping.
10262         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10263         if (!CI->capturesThis())
10264           MappedVarSet.insert(CI->getCapturedVar());
10265         else
10266           MappedVarSet.insert(nullptr);
10267         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10268           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10269         // Generate correct mapping for variables captured by reference in
10270         // lambdas.
10271         if (CI->capturesVariable())
10272           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10273                                                   CurInfo, LambdaPointers);
10274       }
10275       // We expect to have at least an element of information for this capture.
10276       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10277              "Non-existing map pointer for capture!");
10278       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10279              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10280              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10281              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10282              "Inconsistent map information sizes!");
10283 
10284       // If there is an entry in PartialStruct it means we have a struct with
10285       // individual members mapped. Emit an extra combined entry.
10286       if (PartialStruct.Base.isValid()) {
10287         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10288         MEHandler.emitCombinedEntry(
10289             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10290             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10291       }
10292 
10293       // We need to append the results of this capture to what we already have.
10294       CombinedInfo.append(CurInfo);
10295     }
10296     // Adjust MEMBER_OF flags for the lambdas captures.
10297     MEHandler.adjustMemberOfForLambdaCaptures(
10298         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10299         CombinedInfo.Types);
10300     // Map any list items in a map clause that were not captures because they
10301     // weren't referenced within the construct.
10302     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10303 
10304     TargetDataInfo Info;
10305     // Fill up the arrays and create the arguments.
10306     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10307     emitOffloadingArraysArgument(
10308         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10309         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10310         {/*ForEndTask=*/false});
10311 
10312     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10313     InputInfo.BasePointersArray =
10314         Address(Info.BasePointersArray, CGM.getPointerAlign());
10315     InputInfo.PointersArray =
10316         Address(Info.PointersArray, CGM.getPointerAlign());
10317     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10318     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10319     MapTypesArray = Info.MapTypesArray;
10320     MapNamesArray = Info.MapNamesArray;
10321     if (RequiresOuterTask)
10322       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10323     else
10324       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10325   };
10326 
10327   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10328                              CodeGenFunction &CGF, PrePostActionTy &) {
10329     if (RequiresOuterTask) {
10330       CodeGenFunction::OMPTargetDataInfo InputInfo;
10331       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10332     } else {
10333       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10334     }
10335   };
10336 
10337   // If we have a target function ID it means that we need to support
10338   // offloading, otherwise, just execute on the host. We need to execute on host
10339   // regardless of the conditional in the if clause if, e.g., the user do not
10340   // specify target triples.
10341   if (OutlinedFnID) {
10342     if (IfCond) {
10343       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10344     } else {
10345       RegionCodeGenTy ThenRCG(TargetThenGen);
10346       ThenRCG(CGF);
10347     }
10348   } else {
10349     RegionCodeGenTy ElseRCG(TargetElseGen);
10350     ElseRCG(CGF);
10351   }
10352 }
10353 
10354 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10355                                                     StringRef ParentName) {
10356   if (!S)
10357     return;
10358 
10359   // Codegen OMP target directives that offload compute to the device.
10360   bool RequiresDeviceCodegen =
10361       isa<OMPExecutableDirective>(S) &&
10362       isOpenMPTargetExecutionDirective(
10363           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10364 
10365   if (RequiresDeviceCodegen) {
10366     const auto &E = *cast<OMPExecutableDirective>(S);
10367     unsigned DeviceID;
10368     unsigned FileID;
10369     unsigned Line;
10370     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10371                              FileID, Line);
10372 
10373     // Is this a target region that should not be emitted as an entry point? If
10374     // so just signal we are done with this target region.
10375     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10376                                                             ParentName, Line))
10377       return;
10378 
10379     switch (E.getDirectiveKind()) {
10380     case OMPD_target:
10381       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10382                                                    cast<OMPTargetDirective>(E));
10383       break;
10384     case OMPD_target_parallel:
10385       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10386           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10387       break;
10388     case OMPD_target_teams:
10389       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10390           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10391       break;
10392     case OMPD_target_teams_distribute:
10393       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10394           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10395       break;
10396     case OMPD_target_teams_distribute_simd:
10397       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10398           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10399       break;
10400     case OMPD_target_parallel_for:
10401       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10402           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10403       break;
10404     case OMPD_target_parallel_for_simd:
10405       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10406           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10407       break;
10408     case OMPD_target_simd:
10409       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10410           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10411       break;
10412     case OMPD_target_teams_distribute_parallel_for:
10413       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10414           CGM, ParentName,
10415           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10416       break;
10417     case OMPD_target_teams_distribute_parallel_for_simd:
10418       CodeGenFunction::
10419           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10420               CGM, ParentName,
10421               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10422       break;
10423     case OMPD_parallel:
10424     case OMPD_for:
10425     case OMPD_parallel_for:
10426     case OMPD_parallel_master:
10427     case OMPD_parallel_sections:
10428     case OMPD_for_simd:
10429     case OMPD_parallel_for_simd:
10430     case OMPD_cancel:
10431     case OMPD_cancellation_point:
10432     case OMPD_ordered:
10433     case OMPD_threadprivate:
10434     case OMPD_allocate:
10435     case OMPD_task:
10436     case OMPD_simd:
10437     case OMPD_tile:
10438     case OMPD_unroll:
10439     case OMPD_sections:
10440     case OMPD_section:
10441     case OMPD_single:
10442     case OMPD_master:
10443     case OMPD_critical:
10444     case OMPD_taskyield:
10445     case OMPD_barrier:
10446     case OMPD_taskwait:
10447     case OMPD_taskgroup:
10448     case OMPD_atomic:
10449     case OMPD_flush:
10450     case OMPD_depobj:
10451     case OMPD_scan:
10452     case OMPD_teams:
10453     case OMPD_target_data:
10454     case OMPD_target_exit_data:
10455     case OMPD_target_enter_data:
10456     case OMPD_distribute:
10457     case OMPD_distribute_simd:
10458     case OMPD_distribute_parallel_for:
10459     case OMPD_distribute_parallel_for_simd:
10460     case OMPD_teams_distribute:
10461     case OMPD_teams_distribute_simd:
10462     case OMPD_teams_distribute_parallel_for:
10463     case OMPD_teams_distribute_parallel_for_simd:
10464     case OMPD_target_update:
10465     case OMPD_declare_simd:
10466     case OMPD_declare_variant:
10467     case OMPD_begin_declare_variant:
10468     case OMPD_end_declare_variant:
10469     case OMPD_declare_target:
10470     case OMPD_end_declare_target:
10471     case OMPD_declare_reduction:
10472     case OMPD_declare_mapper:
10473     case OMPD_taskloop:
10474     case OMPD_taskloop_simd:
10475     case OMPD_master_taskloop:
10476     case OMPD_master_taskloop_simd:
10477     case OMPD_parallel_master_taskloop:
10478     case OMPD_parallel_master_taskloop_simd:
10479     case OMPD_requires:
10480     case OMPD_unknown:
10481     default:
10482       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10483     }
10484     return;
10485   }
10486 
10487   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10488     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10489       return;
10490 
10491     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10492     return;
10493   }
10494 
10495   // If this is a lambda function, look into its body.
10496   if (const auto *L = dyn_cast<LambdaExpr>(S))
10497     S = L->getBody();
10498 
10499   // Keep looking for target regions recursively.
10500   for (const Stmt *II : S->children())
10501     scanForTargetRegionsFunctions(II, ParentName);
10502 }
10503 
10504 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10505   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10506       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10507   if (!DevTy)
10508     return false;
10509   // Do not emit device_type(nohost) functions for the host.
10510   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10511     return true;
10512   // Do not emit device_type(host) functions for the device.
10513   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10514     return true;
10515   return false;
10516 }
10517 
10518 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10519   // If emitting code for the host, we do not process FD here. Instead we do
10520   // the normal code generation.
10521   if (!CGM.getLangOpts().OpenMPIsDevice) {
10522     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10523       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10524                                   CGM.getLangOpts().OpenMPIsDevice))
10525         return true;
10526     return false;
10527   }
10528 
10529   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10530   // Try to detect target regions in the function.
10531   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10532     StringRef Name = CGM.getMangledName(GD);
10533     scanForTargetRegionsFunctions(FD->getBody(), Name);
10534     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10535                                 CGM.getLangOpts().OpenMPIsDevice))
10536       return true;
10537   }
10538 
10539   // Do not to emit function if it is not marked as declare target.
10540   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10541          AlreadyEmittedTargetDecls.count(VD) == 0;
10542 }
10543 
10544 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10545   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10546                               CGM.getLangOpts().OpenMPIsDevice))
10547     return true;
10548 
10549   if (!CGM.getLangOpts().OpenMPIsDevice)
10550     return false;
10551 
10552   // Check if there are Ctors/Dtors in this declaration and look for target
10553   // regions in it. We use the complete variant to produce the kernel name
10554   // mangling.
10555   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10556   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10557     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10558       StringRef ParentName =
10559           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10560       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10561     }
10562     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10563       StringRef ParentName =
10564           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10565       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10566     }
10567   }
10568 
10569   // Do not to emit variable if it is not marked as declare target.
10570   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10571       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10572           cast<VarDecl>(GD.getDecl()));
10573   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10574       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10575        HasRequiresUnifiedSharedMemory)) {
10576     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10577     return true;
10578   }
10579   return false;
10580 }
10581 
10582 llvm::Constant *
10583 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10584                                                 const VarDecl *VD) {
10585   assert(VD->getType().isConstant(CGM.getContext()) &&
10586          "Expected constant variable.");
10587   StringRef VarName;
10588   llvm::Constant *Addr;
10589   llvm::GlobalValue::LinkageTypes Linkage;
10590   QualType Ty = VD->getType();
10591   SmallString<128> Buffer;
10592   {
10593     unsigned DeviceID;
10594     unsigned FileID;
10595     unsigned Line;
10596     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10597                              FileID, Line);
10598     llvm::raw_svector_ostream OS(Buffer);
10599     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10600        << llvm::format("_%x_", FileID);
10601     if (CGM.getLangOpts().CPlusPlus) {
10602       CGM.getCXXABI().getMangleContext().mangleTypeName(VD->getType(), OS);
10603       OS << "_";
10604     }
10605     OS << VD->getName() << "_l" << Line;
10606     VarName = OS.str();
10607   }
10608   Linkage = llvm::GlobalValue::InternalLinkage;
10609   Addr =
10610       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10611                                   getDefaultFirstprivateAddressSpace());
10612   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10613   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10614   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10615   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10616       VarName, Addr, VarSize,
10617       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10618   return Addr;
10619 }
10620 
10621 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10622                                                    llvm::Constant *Addr) {
10623   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10624       !CGM.getLangOpts().OpenMPIsDevice)
10625     return;
10626 
10627   // If we have host/nohost variables, they do not need to be registered.
10628   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10629       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10630   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10631     return;
10632 
10633   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10634       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10635   if (!Res) {
10636     if (CGM.getLangOpts().OpenMPIsDevice) {
10637       // Register non-target variables being emitted in device code (debug info
10638       // may cause this).
10639       StringRef VarName = CGM.getMangledName(VD);
10640       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10641     }
10642     return;
10643   }
10644   // Register declare target variables.
10645   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10646   StringRef VarName;
10647   CharUnits VarSize;
10648   llvm::GlobalValue::LinkageTypes Linkage;
10649 
10650   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10651       !HasRequiresUnifiedSharedMemory) {
10652     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10653     VarName = CGM.getMangledName(VD);
10654     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10655       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10656       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10657     } else {
10658       VarSize = CharUnits::Zero();
10659     }
10660     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10661     // Temp solution to prevent optimizations of the internal variables.
10662     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10663       // Do not create a "ref-variable" if the original is not also available
10664       // on the host.
10665       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10666         return;
10667       std::string RefName = getName({VarName, "ref"});
10668       if (!CGM.GetGlobalValue(RefName)) {
10669         llvm::Constant *AddrRef =
10670             getOrCreateInternalVariable(Addr->getType(), RefName);
10671         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10672         GVAddrRef->setConstant(/*Val=*/true);
10673         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10674         GVAddrRef->setInitializer(Addr);
10675         CGM.addCompilerUsedGlobal(GVAddrRef);
10676       }
10677     }
10678   } else {
10679     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10680             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10681              HasRequiresUnifiedSharedMemory)) &&
10682            "Declare target attribute must link or to with unified memory.");
10683     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10684       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10685     else
10686       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10687 
10688     if (CGM.getLangOpts().OpenMPIsDevice) {
10689       VarName = Addr->getName();
10690       Addr = nullptr;
10691     } else {
10692       VarName = getAddrOfDeclareTargetVar(VD).getName();
10693       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10694     }
10695     VarSize = CGM.getPointerSize();
10696     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10697   }
10698 
10699   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10700       VarName, Addr, VarSize, Flags, Linkage);
10701 }
10702 
10703 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10704   if (isa<FunctionDecl>(GD.getDecl()) ||
10705       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10706     return emitTargetFunctions(GD);
10707 
10708   return emitTargetGlobalVariable(GD);
10709 }
10710 
10711 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10712   for (const VarDecl *VD : DeferredGlobalVariables) {
10713     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10714         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10715     if (!Res)
10716       continue;
10717     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10718         !HasRequiresUnifiedSharedMemory) {
10719       CGM.EmitGlobal(VD);
10720     } else {
10721       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10722               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10723                HasRequiresUnifiedSharedMemory)) &&
10724              "Expected link clause or to clause with unified memory.");
10725       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10726     }
10727   }
10728 }
10729 
10730 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10731     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10732   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10733          " Expected target-based directive.");
10734 }
10735 
10736 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10737   for (const OMPClause *Clause : D->clauselists()) {
10738     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10739       HasRequiresUnifiedSharedMemory = true;
10740     } else if (const auto *AC =
10741                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10742       switch (AC->getAtomicDefaultMemOrderKind()) {
10743       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10744         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10745         break;
10746       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10747         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10748         break;
10749       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10750         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10751         break;
10752       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10753         break;
10754       }
10755     }
10756   }
10757 }
10758 
10759 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10760   return RequiresAtomicOrdering;
10761 }
10762 
10763 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10764                                                        LangAS &AS) {
10765   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10766     return false;
10767   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10768   switch(A->getAllocatorType()) {
10769   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10770   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10771   // Not supported, fallback to the default mem space.
10772   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10773   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10774   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10775   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10776   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10777   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10778   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10779     AS = LangAS::Default;
10780     return true;
10781   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10782     llvm_unreachable("Expected predefined allocator for the variables with the "
10783                      "static storage.");
10784   }
10785   return false;
10786 }
10787 
10788 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10789   return HasRequiresUnifiedSharedMemory;
10790 }
10791 
10792 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10793     CodeGenModule &CGM)
10794     : CGM(CGM) {
10795   if (CGM.getLangOpts().OpenMPIsDevice) {
10796     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10797     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10798   }
10799 }
10800 
10801 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10802   if (CGM.getLangOpts().OpenMPIsDevice)
10803     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10804 }
10805 
10806 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10807   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10808     return true;
10809 
10810   const auto *D = cast<FunctionDecl>(GD.getDecl());
10811   // Do not to emit function if it is marked as declare target as it was already
10812   // emitted.
10813   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10814     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10815       if (auto *F = dyn_cast_or_null<llvm::Function>(
10816               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10817         return !F->isDeclaration();
10818       return false;
10819     }
10820     return true;
10821   }
10822 
10823   return !AlreadyEmittedTargetDecls.insert(D).second;
10824 }
10825 
10826 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10827   // If we don't have entries or if we are emitting code for the device, we
10828   // don't need to do anything.
10829   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10830       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10831       (OffloadEntriesInfoManager.empty() &&
10832        !HasEmittedDeclareTargetRegion &&
10833        !HasEmittedTargetRegion))
10834     return nullptr;
10835 
10836   // Create and register the function that handles the requires directives.
10837   ASTContext &C = CGM.getContext();
10838 
10839   llvm::Function *RequiresRegFn;
10840   {
10841     CodeGenFunction CGF(CGM);
10842     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10843     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10844     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10845     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10846     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10847     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10848     // TODO: check for other requires clauses.
10849     // The requires directive takes effect only when a target region is
10850     // present in the compilation unit. Otherwise it is ignored and not
10851     // passed to the runtime. This avoids the runtime from throwing an error
10852     // for mismatching requires clauses across compilation units that don't
10853     // contain at least 1 target region.
10854     assert((HasEmittedTargetRegion ||
10855             HasEmittedDeclareTargetRegion ||
10856             !OffloadEntriesInfoManager.empty()) &&
10857            "Target or declare target region expected.");
10858     if (HasRequiresUnifiedSharedMemory)
10859       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10860     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10861                             CGM.getModule(), OMPRTL___tgt_register_requires),
10862                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10863     CGF.FinishFunction();
10864   }
10865   return RequiresRegFn;
10866 }
10867 
10868 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10869                                     const OMPExecutableDirective &D,
10870                                     SourceLocation Loc,
10871                                     llvm::Function *OutlinedFn,
10872                                     ArrayRef<llvm::Value *> CapturedVars) {
10873   if (!CGF.HaveInsertPoint())
10874     return;
10875 
10876   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10877   CodeGenFunction::RunCleanupsScope Scope(CGF);
10878 
10879   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10880   llvm::Value *Args[] = {
10881       RTLoc,
10882       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10883       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10884   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10885   RealArgs.append(std::begin(Args), std::end(Args));
10886   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10887 
10888   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10889       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10890   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10891 }
10892 
10893 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10894                                          const Expr *NumTeams,
10895                                          const Expr *ThreadLimit,
10896                                          SourceLocation Loc) {
10897   if (!CGF.HaveInsertPoint())
10898     return;
10899 
10900   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10901 
10902   llvm::Value *NumTeamsVal =
10903       NumTeams
10904           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10905                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10906           : CGF.Builder.getInt32(0);
10907 
10908   llvm::Value *ThreadLimitVal =
10909       ThreadLimit
10910           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10911                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10912           : CGF.Builder.getInt32(0);
10913 
10914   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10915   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10916                                      ThreadLimitVal};
10917   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10918                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10919                       PushNumTeamsArgs);
10920 }
10921 
10922 void CGOpenMPRuntime::emitTargetDataCalls(
10923     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10924     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10925   if (!CGF.HaveInsertPoint())
10926     return;
10927 
10928   // Action used to replace the default codegen action and turn privatization
10929   // off.
10930   PrePostActionTy NoPrivAction;
10931 
10932   // Generate the code for the opening of the data environment. Capture all the
10933   // arguments of the runtime call by reference because they are used in the
10934   // closing of the region.
10935   auto &&BeginThenGen = [this, &D, Device, &Info,
10936                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10937     // Fill up the arrays with all the mapped variables.
10938     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10939 
10940     // Get map clause information.
10941     MappableExprsHandler MEHandler(D, CGF);
10942     MEHandler.generateAllInfo(CombinedInfo);
10943 
10944     // Fill up the arrays and create the arguments.
10945     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10946                          /*IsNonContiguous=*/true);
10947 
10948     llvm::Value *BasePointersArrayArg = nullptr;
10949     llvm::Value *PointersArrayArg = nullptr;
10950     llvm::Value *SizesArrayArg = nullptr;
10951     llvm::Value *MapTypesArrayArg = nullptr;
10952     llvm::Value *MapNamesArrayArg = nullptr;
10953     llvm::Value *MappersArrayArg = nullptr;
10954     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10955                                  SizesArrayArg, MapTypesArrayArg,
10956                                  MapNamesArrayArg, MappersArrayArg, Info);
10957 
10958     // Emit device ID if any.
10959     llvm::Value *DeviceID = nullptr;
10960     if (Device) {
10961       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10962                                            CGF.Int64Ty, /*isSigned=*/true);
10963     } else {
10964       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10965     }
10966 
10967     // Emit the number of elements in the offloading arrays.
10968     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10969     //
10970     // Source location for the ident struct
10971     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10972 
10973     llvm::Value *OffloadingArgs[] = {RTLoc,
10974                                      DeviceID,
10975                                      PointerNum,
10976                                      BasePointersArrayArg,
10977                                      PointersArrayArg,
10978                                      SizesArrayArg,
10979                                      MapTypesArrayArg,
10980                                      MapNamesArrayArg,
10981                                      MappersArrayArg};
10982     CGF.EmitRuntimeCall(
10983         OMPBuilder.getOrCreateRuntimeFunction(
10984             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10985         OffloadingArgs);
10986 
10987     // If device pointer privatization is required, emit the body of the region
10988     // here. It will have to be duplicated: with and without privatization.
10989     if (!Info.CaptureDeviceAddrMap.empty())
10990       CodeGen(CGF);
10991   };
10992 
10993   // Generate code for the closing of the data region.
10994   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10995                                                 PrePostActionTy &) {
10996     assert(Info.isValid() && "Invalid data environment closing arguments.");
10997 
10998     llvm::Value *BasePointersArrayArg = nullptr;
10999     llvm::Value *PointersArrayArg = nullptr;
11000     llvm::Value *SizesArrayArg = nullptr;
11001     llvm::Value *MapTypesArrayArg = nullptr;
11002     llvm::Value *MapNamesArrayArg = nullptr;
11003     llvm::Value *MappersArrayArg = nullptr;
11004     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11005                                  SizesArrayArg, MapTypesArrayArg,
11006                                  MapNamesArrayArg, MappersArrayArg, Info,
11007                                  {/*ForEndCall=*/true});
11008 
11009     // Emit device ID if any.
11010     llvm::Value *DeviceID = nullptr;
11011     if (Device) {
11012       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11013                                            CGF.Int64Ty, /*isSigned=*/true);
11014     } else {
11015       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11016     }
11017 
11018     // Emit the number of elements in the offloading arrays.
11019     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11020 
11021     // Source location for the ident struct
11022     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11023 
11024     llvm::Value *OffloadingArgs[] = {RTLoc,
11025                                      DeviceID,
11026                                      PointerNum,
11027                                      BasePointersArrayArg,
11028                                      PointersArrayArg,
11029                                      SizesArrayArg,
11030                                      MapTypesArrayArg,
11031                                      MapNamesArrayArg,
11032                                      MappersArrayArg};
11033     CGF.EmitRuntimeCall(
11034         OMPBuilder.getOrCreateRuntimeFunction(
11035             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11036         OffloadingArgs);
11037   };
11038 
11039   // If we need device pointer privatization, we need to emit the body of the
11040   // region with no privatization in the 'else' branch of the conditional.
11041   // Otherwise, we don't have to do anything.
11042   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11043                                                          PrePostActionTy &) {
11044     if (!Info.CaptureDeviceAddrMap.empty()) {
11045       CodeGen.setAction(NoPrivAction);
11046       CodeGen(CGF);
11047     }
11048   };
11049 
11050   // We don't have to do anything to close the region if the if clause evaluates
11051   // to false.
11052   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11053 
11054   if (IfCond) {
11055     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11056   } else {
11057     RegionCodeGenTy RCG(BeginThenGen);
11058     RCG(CGF);
11059   }
11060 
11061   // If we don't require privatization of device pointers, we emit the body in
11062   // between the runtime calls. This avoids duplicating the body code.
11063   if (Info.CaptureDeviceAddrMap.empty()) {
11064     CodeGen.setAction(NoPrivAction);
11065     CodeGen(CGF);
11066   }
11067 
11068   if (IfCond) {
11069     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11070   } else {
11071     RegionCodeGenTy RCG(EndThenGen);
11072     RCG(CGF);
11073   }
11074 }
11075 
11076 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11077     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11078     const Expr *Device) {
11079   if (!CGF.HaveInsertPoint())
11080     return;
11081 
11082   assert((isa<OMPTargetEnterDataDirective>(D) ||
11083           isa<OMPTargetExitDataDirective>(D) ||
11084           isa<OMPTargetUpdateDirective>(D)) &&
11085          "Expecting either target enter, exit data, or update directives.");
11086 
11087   CodeGenFunction::OMPTargetDataInfo InputInfo;
11088   llvm::Value *MapTypesArray = nullptr;
11089   llvm::Value *MapNamesArray = nullptr;
11090   // Generate the code for the opening of the data environment.
11091   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11092                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11093     // Emit device ID if any.
11094     llvm::Value *DeviceID = nullptr;
11095     if (Device) {
11096       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11097                                            CGF.Int64Ty, /*isSigned=*/true);
11098     } else {
11099       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11100     }
11101 
11102     // Emit the number of elements in the offloading arrays.
11103     llvm::Constant *PointerNum =
11104         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11105 
11106     // Source location for the ident struct
11107     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11108 
11109     llvm::Value *OffloadingArgs[] = {RTLoc,
11110                                      DeviceID,
11111                                      PointerNum,
11112                                      InputInfo.BasePointersArray.getPointer(),
11113                                      InputInfo.PointersArray.getPointer(),
11114                                      InputInfo.SizesArray.getPointer(),
11115                                      MapTypesArray,
11116                                      MapNamesArray,
11117                                      InputInfo.MappersArray.getPointer()};
11118 
11119     // Select the right runtime function call for each standalone
11120     // directive.
11121     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11122     RuntimeFunction RTLFn;
11123     switch (D.getDirectiveKind()) {
11124     case OMPD_target_enter_data:
11125       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11126                         : OMPRTL___tgt_target_data_begin_mapper;
11127       break;
11128     case OMPD_target_exit_data:
11129       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11130                         : OMPRTL___tgt_target_data_end_mapper;
11131       break;
11132     case OMPD_target_update:
11133       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11134                         : OMPRTL___tgt_target_data_update_mapper;
11135       break;
11136     case OMPD_parallel:
11137     case OMPD_for:
11138     case OMPD_parallel_for:
11139     case OMPD_parallel_master:
11140     case OMPD_parallel_sections:
11141     case OMPD_for_simd:
11142     case OMPD_parallel_for_simd:
11143     case OMPD_cancel:
11144     case OMPD_cancellation_point:
11145     case OMPD_ordered:
11146     case OMPD_threadprivate:
11147     case OMPD_allocate:
11148     case OMPD_task:
11149     case OMPD_simd:
11150     case OMPD_tile:
11151     case OMPD_unroll:
11152     case OMPD_sections:
11153     case OMPD_section:
11154     case OMPD_single:
11155     case OMPD_master:
11156     case OMPD_critical:
11157     case OMPD_taskyield:
11158     case OMPD_barrier:
11159     case OMPD_taskwait:
11160     case OMPD_taskgroup:
11161     case OMPD_atomic:
11162     case OMPD_flush:
11163     case OMPD_depobj:
11164     case OMPD_scan:
11165     case OMPD_teams:
11166     case OMPD_target_data:
11167     case OMPD_distribute:
11168     case OMPD_distribute_simd:
11169     case OMPD_distribute_parallel_for:
11170     case OMPD_distribute_parallel_for_simd:
11171     case OMPD_teams_distribute:
11172     case OMPD_teams_distribute_simd:
11173     case OMPD_teams_distribute_parallel_for:
11174     case OMPD_teams_distribute_parallel_for_simd:
11175     case OMPD_declare_simd:
11176     case OMPD_declare_variant:
11177     case OMPD_begin_declare_variant:
11178     case OMPD_end_declare_variant:
11179     case OMPD_declare_target:
11180     case OMPD_end_declare_target:
11181     case OMPD_declare_reduction:
11182     case OMPD_declare_mapper:
11183     case OMPD_taskloop:
11184     case OMPD_taskloop_simd:
11185     case OMPD_master_taskloop:
11186     case OMPD_master_taskloop_simd:
11187     case OMPD_parallel_master_taskloop:
11188     case OMPD_parallel_master_taskloop_simd:
11189     case OMPD_target:
11190     case OMPD_target_simd:
11191     case OMPD_target_teams_distribute:
11192     case OMPD_target_teams_distribute_simd:
11193     case OMPD_target_teams_distribute_parallel_for:
11194     case OMPD_target_teams_distribute_parallel_for_simd:
11195     case OMPD_target_teams:
11196     case OMPD_target_parallel:
11197     case OMPD_target_parallel_for:
11198     case OMPD_target_parallel_for_simd:
11199     case OMPD_requires:
11200     case OMPD_unknown:
11201     default:
11202       llvm_unreachable("Unexpected standalone target data directive.");
11203       break;
11204     }
11205     CGF.EmitRuntimeCall(
11206         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11207         OffloadingArgs);
11208   };
11209 
11210   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11211                           &MapNamesArray](CodeGenFunction &CGF,
11212                                           PrePostActionTy &) {
11213     // Fill up the arrays with all the mapped variables.
11214     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11215 
11216     // Get map clause information.
11217     MappableExprsHandler MEHandler(D, CGF);
11218     MEHandler.generateAllInfo(CombinedInfo);
11219 
11220     TargetDataInfo Info;
11221     // Fill up the arrays and create the arguments.
11222     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11223                          /*IsNonContiguous=*/true);
11224     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11225                              D.hasClausesOfKind<OMPNowaitClause>();
11226     emitOffloadingArraysArgument(
11227         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11228         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11229         {/*ForEndTask=*/false});
11230     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11231     InputInfo.BasePointersArray =
11232         Address(Info.BasePointersArray, CGM.getPointerAlign());
11233     InputInfo.PointersArray =
11234         Address(Info.PointersArray, CGM.getPointerAlign());
11235     InputInfo.SizesArray =
11236         Address(Info.SizesArray, CGM.getPointerAlign());
11237     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11238     MapTypesArray = Info.MapTypesArray;
11239     MapNamesArray = Info.MapNamesArray;
11240     if (RequiresOuterTask)
11241       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11242     else
11243       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11244   };
11245 
11246   if (IfCond) {
11247     emitIfClause(CGF, IfCond, TargetThenGen,
11248                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11249   } else {
11250     RegionCodeGenTy ThenRCG(TargetThenGen);
11251     ThenRCG(CGF);
11252   }
11253 }
11254 
11255 namespace {
11256   /// Kind of parameter in a function with 'declare simd' directive.
11257   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11258   /// Attribute set of the parameter.
11259   struct ParamAttrTy {
11260     ParamKindTy Kind = Vector;
11261     llvm::APSInt StrideOrArg;
11262     llvm::APSInt Alignment;
11263   };
11264 } // namespace
11265 
11266 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11267                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11268   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11269   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11270   // of that clause. The VLEN value must be power of 2.
11271   // In other case the notion of the function`s "characteristic data type" (CDT)
11272   // is used to compute the vector length.
11273   // CDT is defined in the following order:
11274   //   a) For non-void function, the CDT is the return type.
11275   //   b) If the function has any non-uniform, non-linear parameters, then the
11276   //   CDT is the type of the first such parameter.
11277   //   c) If the CDT determined by a) or b) above is struct, union, or class
11278   //   type which is pass-by-value (except for the type that maps to the
11279   //   built-in complex data type), the characteristic data type is int.
11280   //   d) If none of the above three cases is applicable, the CDT is int.
11281   // The VLEN is then determined based on the CDT and the size of vector
11282   // register of that ISA for which current vector version is generated. The
11283   // VLEN is computed using the formula below:
11284   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11285   // where vector register size specified in section 3.2.1 Registers and the
11286   // Stack Frame of original AMD64 ABI document.
11287   QualType RetType = FD->getReturnType();
11288   if (RetType.isNull())
11289     return 0;
11290   ASTContext &C = FD->getASTContext();
11291   QualType CDT;
11292   if (!RetType.isNull() && !RetType->isVoidType()) {
11293     CDT = RetType;
11294   } else {
11295     unsigned Offset = 0;
11296     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11297       if (ParamAttrs[Offset].Kind == Vector)
11298         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11299       ++Offset;
11300     }
11301     if (CDT.isNull()) {
11302       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11303         if (ParamAttrs[I + Offset].Kind == Vector) {
11304           CDT = FD->getParamDecl(I)->getType();
11305           break;
11306         }
11307       }
11308     }
11309   }
11310   if (CDT.isNull())
11311     CDT = C.IntTy;
11312   CDT = CDT->getCanonicalTypeUnqualified();
11313   if (CDT->isRecordType() || CDT->isUnionType())
11314     CDT = C.IntTy;
11315   return C.getTypeSize(CDT);
11316 }
11317 
11318 static void
11319 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11320                            const llvm::APSInt &VLENVal,
11321                            ArrayRef<ParamAttrTy> ParamAttrs,
11322                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11323   struct ISADataTy {
11324     char ISA;
11325     unsigned VecRegSize;
11326   };
11327   ISADataTy ISAData[] = {
11328       {
11329           'b', 128
11330       }, // SSE
11331       {
11332           'c', 256
11333       }, // AVX
11334       {
11335           'd', 256
11336       }, // AVX2
11337       {
11338           'e', 512
11339       }, // AVX512
11340   };
11341   llvm::SmallVector<char, 2> Masked;
11342   switch (State) {
11343   case OMPDeclareSimdDeclAttr::BS_Undefined:
11344     Masked.push_back('N');
11345     Masked.push_back('M');
11346     break;
11347   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11348     Masked.push_back('N');
11349     break;
11350   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11351     Masked.push_back('M');
11352     break;
11353   }
11354   for (char Mask : Masked) {
11355     for (const ISADataTy &Data : ISAData) {
11356       SmallString<256> Buffer;
11357       llvm::raw_svector_ostream Out(Buffer);
11358       Out << "_ZGV" << Data.ISA << Mask;
11359       if (!VLENVal) {
11360         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11361         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11362         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11363       } else {
11364         Out << VLENVal;
11365       }
11366       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11367         switch (ParamAttr.Kind){
11368         case LinearWithVarStride:
11369           Out << 's' << ParamAttr.StrideOrArg;
11370           break;
11371         case Linear:
11372           Out << 'l';
11373           if (ParamAttr.StrideOrArg != 1)
11374             Out << ParamAttr.StrideOrArg;
11375           break;
11376         case Uniform:
11377           Out << 'u';
11378           break;
11379         case Vector:
11380           Out << 'v';
11381           break;
11382         }
11383         if (!!ParamAttr.Alignment)
11384           Out << 'a' << ParamAttr.Alignment;
11385       }
11386       Out << '_' << Fn->getName();
11387       Fn->addFnAttr(Out.str());
11388     }
11389   }
11390 }
11391 
11392 // This are the Functions that are needed to mangle the name of the
11393 // vector functions generated by the compiler, according to the rules
11394 // defined in the "Vector Function ABI specifications for AArch64",
11395 // available at
11396 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11397 
11398 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11399 ///
11400 /// TODO: Need to implement the behavior for reference marked with a
11401 /// var or no linear modifiers (1.b in the section). For this, we
11402 /// need to extend ParamKindTy to support the linear modifiers.
11403 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11404   QT = QT.getCanonicalType();
11405 
11406   if (QT->isVoidType())
11407     return false;
11408 
11409   if (Kind == ParamKindTy::Uniform)
11410     return false;
11411 
11412   if (Kind == ParamKindTy::Linear)
11413     return false;
11414 
11415   // TODO: Handle linear references with modifiers
11416 
11417   if (Kind == ParamKindTy::LinearWithVarStride)
11418     return false;
11419 
11420   return true;
11421 }
11422 
11423 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11424 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11425   QT = QT.getCanonicalType();
11426   unsigned Size = C.getTypeSize(QT);
11427 
11428   // Only scalars and complex within 16 bytes wide set PVB to true.
11429   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11430     return false;
11431 
11432   if (QT->isFloatingType())
11433     return true;
11434 
11435   if (QT->isIntegerType())
11436     return true;
11437 
11438   if (QT->isPointerType())
11439     return true;
11440 
11441   // TODO: Add support for complex types (section 3.1.2, item 2).
11442 
11443   return false;
11444 }
11445 
11446 /// Computes the lane size (LS) of a return type or of an input parameter,
11447 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11448 /// TODO: Add support for references, section 3.2.1, item 1.
11449 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11450   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11451     QualType PTy = QT.getCanonicalType()->getPointeeType();
11452     if (getAArch64PBV(PTy, C))
11453       return C.getTypeSize(PTy);
11454   }
11455   if (getAArch64PBV(QT, C))
11456     return C.getTypeSize(QT);
11457 
11458   return C.getTypeSize(C.getUIntPtrType());
11459 }
11460 
11461 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11462 // signature of the scalar function, as defined in 3.2.2 of the
11463 // AAVFABI.
11464 static std::tuple<unsigned, unsigned, bool>
11465 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11466   QualType RetType = FD->getReturnType().getCanonicalType();
11467 
11468   ASTContext &C = FD->getASTContext();
11469 
11470   bool OutputBecomesInput = false;
11471 
11472   llvm::SmallVector<unsigned, 8> Sizes;
11473   if (!RetType->isVoidType()) {
11474     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11475     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11476       OutputBecomesInput = true;
11477   }
11478   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11479     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11480     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11481   }
11482 
11483   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11484   // The LS of a function parameter / return value can only be a power
11485   // of 2, starting from 8 bits, up to 128.
11486   assert(std::all_of(Sizes.begin(), Sizes.end(),
11487                      [](unsigned Size) {
11488                        return Size == 8 || Size == 16 || Size == 32 ||
11489                               Size == 64 || Size == 128;
11490                      }) &&
11491          "Invalid size");
11492 
11493   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11494                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11495                          OutputBecomesInput);
11496 }
11497 
11498 /// Mangle the parameter part of the vector function name according to
11499 /// their OpenMP classification. The mangling function is defined in
11500 /// section 3.5 of the AAVFABI.
11501 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11502   SmallString<256> Buffer;
11503   llvm::raw_svector_ostream Out(Buffer);
11504   for (const auto &ParamAttr : ParamAttrs) {
11505     switch (ParamAttr.Kind) {
11506     case LinearWithVarStride:
11507       Out << "ls" << ParamAttr.StrideOrArg;
11508       break;
11509     case Linear:
11510       Out << 'l';
11511       // Don't print the step value if it is not present or if it is
11512       // equal to 1.
11513       if (ParamAttr.StrideOrArg != 1)
11514         Out << ParamAttr.StrideOrArg;
11515       break;
11516     case Uniform:
11517       Out << 'u';
11518       break;
11519     case Vector:
11520       Out << 'v';
11521       break;
11522     }
11523 
11524     if (!!ParamAttr.Alignment)
11525       Out << 'a' << ParamAttr.Alignment;
11526   }
11527 
11528   return std::string(Out.str());
11529 }
11530 
11531 // Function used to add the attribute. The parameter `VLEN` is
11532 // templated to allow the use of "x" when targeting scalable functions
11533 // for SVE.
11534 template <typename T>
11535 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11536                                  char ISA, StringRef ParSeq,
11537                                  StringRef MangledName, bool OutputBecomesInput,
11538                                  llvm::Function *Fn) {
11539   SmallString<256> Buffer;
11540   llvm::raw_svector_ostream Out(Buffer);
11541   Out << Prefix << ISA << LMask << VLEN;
11542   if (OutputBecomesInput)
11543     Out << "v";
11544   Out << ParSeq << "_" << MangledName;
11545   Fn->addFnAttr(Out.str());
11546 }
11547 
11548 // Helper function to generate the Advanced SIMD names depending on
11549 // the value of the NDS when simdlen is not present.
11550 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11551                                       StringRef Prefix, char ISA,
11552                                       StringRef ParSeq, StringRef MangledName,
11553                                       bool OutputBecomesInput,
11554                                       llvm::Function *Fn) {
11555   switch (NDS) {
11556   case 8:
11557     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11558                          OutputBecomesInput, Fn);
11559     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11560                          OutputBecomesInput, Fn);
11561     break;
11562   case 16:
11563     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11564                          OutputBecomesInput, Fn);
11565     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11566                          OutputBecomesInput, Fn);
11567     break;
11568   case 32:
11569     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11570                          OutputBecomesInput, Fn);
11571     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11572                          OutputBecomesInput, Fn);
11573     break;
11574   case 64:
11575   case 128:
11576     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11577                          OutputBecomesInput, Fn);
11578     break;
11579   default:
11580     llvm_unreachable("Scalar type is too wide.");
11581   }
11582 }
11583 
11584 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11585 static void emitAArch64DeclareSimdFunction(
11586     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11587     ArrayRef<ParamAttrTy> ParamAttrs,
11588     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11589     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11590 
11591   // Get basic data for building the vector signature.
11592   const auto Data = getNDSWDS(FD, ParamAttrs);
11593   const unsigned NDS = std::get<0>(Data);
11594   const unsigned WDS = std::get<1>(Data);
11595   const bool OutputBecomesInput = std::get<2>(Data);
11596 
11597   // Check the values provided via `simdlen` by the user.
11598   // 1. A `simdlen(1)` doesn't produce vector signatures,
11599   if (UserVLEN == 1) {
11600     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11601         DiagnosticsEngine::Warning,
11602         "The clause simdlen(1) has no effect when targeting aarch64.");
11603     CGM.getDiags().Report(SLoc, DiagID);
11604     return;
11605   }
11606 
11607   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11608   // Advanced SIMD output.
11609   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11610     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11611         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11612                                     "power of 2 when targeting Advanced SIMD.");
11613     CGM.getDiags().Report(SLoc, DiagID);
11614     return;
11615   }
11616 
11617   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11618   // limits.
11619   if (ISA == 's' && UserVLEN != 0) {
11620     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11621       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11622           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11623                                       "lanes in the architectural constraints "
11624                                       "for SVE (min is 128-bit, max is "
11625                                       "2048-bit, by steps of 128-bit)");
11626       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11627       return;
11628     }
11629   }
11630 
11631   // Sort out parameter sequence.
11632   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11633   StringRef Prefix = "_ZGV";
11634   // Generate simdlen from user input (if any).
11635   if (UserVLEN) {
11636     if (ISA == 's') {
11637       // SVE generates only a masked function.
11638       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11639                            OutputBecomesInput, Fn);
11640     } else {
11641       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11642       // Advanced SIMD generates one or two functions, depending on
11643       // the `[not]inbranch` clause.
11644       switch (State) {
11645       case OMPDeclareSimdDeclAttr::BS_Undefined:
11646         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11647                              OutputBecomesInput, Fn);
11648         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11649                              OutputBecomesInput, Fn);
11650         break;
11651       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11652         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11653                              OutputBecomesInput, Fn);
11654         break;
11655       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11656         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11657                              OutputBecomesInput, Fn);
11658         break;
11659       }
11660     }
11661   } else {
11662     // If no user simdlen is provided, follow the AAVFABI rules for
11663     // generating the vector length.
11664     if (ISA == 's') {
11665       // SVE, section 3.4.1, item 1.
11666       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11667                            OutputBecomesInput, Fn);
11668     } else {
11669       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11670       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11671       // two vector names depending on the use of the clause
11672       // `[not]inbranch`.
11673       switch (State) {
11674       case OMPDeclareSimdDeclAttr::BS_Undefined:
11675         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11676                                   OutputBecomesInput, Fn);
11677         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11678                                   OutputBecomesInput, Fn);
11679         break;
11680       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11681         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11682                                   OutputBecomesInput, Fn);
11683         break;
11684       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11685         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11686                                   OutputBecomesInput, Fn);
11687         break;
11688       }
11689     }
11690   }
11691 }
11692 
11693 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11694                                               llvm::Function *Fn) {
11695   ASTContext &C = CGM.getContext();
11696   FD = FD->getMostRecentDecl();
11697   // Map params to their positions in function decl.
11698   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11699   if (isa<CXXMethodDecl>(FD))
11700     ParamPositions.try_emplace(FD, 0);
11701   unsigned ParamPos = ParamPositions.size();
11702   for (const ParmVarDecl *P : FD->parameters()) {
11703     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11704     ++ParamPos;
11705   }
11706   while (FD) {
11707     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11708       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11709       // Mark uniform parameters.
11710       for (const Expr *E : Attr->uniforms()) {
11711         E = E->IgnoreParenImpCasts();
11712         unsigned Pos;
11713         if (isa<CXXThisExpr>(E)) {
11714           Pos = ParamPositions[FD];
11715         } else {
11716           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11717                                 ->getCanonicalDecl();
11718           Pos = ParamPositions[PVD];
11719         }
11720         ParamAttrs[Pos].Kind = Uniform;
11721       }
11722       // Get alignment info.
11723       auto NI = Attr->alignments_begin();
11724       for (const Expr *E : Attr->aligneds()) {
11725         E = E->IgnoreParenImpCasts();
11726         unsigned Pos;
11727         QualType ParmTy;
11728         if (isa<CXXThisExpr>(E)) {
11729           Pos = ParamPositions[FD];
11730           ParmTy = E->getType();
11731         } else {
11732           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11733                                 ->getCanonicalDecl();
11734           Pos = ParamPositions[PVD];
11735           ParmTy = PVD->getType();
11736         }
11737         ParamAttrs[Pos].Alignment =
11738             (*NI)
11739                 ? (*NI)->EvaluateKnownConstInt(C)
11740                 : llvm::APSInt::getUnsigned(
11741                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11742                           .getQuantity());
11743         ++NI;
11744       }
11745       // Mark linear parameters.
11746       auto SI = Attr->steps_begin();
11747       auto MI = Attr->modifiers_begin();
11748       for (const Expr *E : Attr->linears()) {
11749         E = E->IgnoreParenImpCasts();
11750         unsigned Pos;
11751         // Rescaling factor needed to compute the linear parameter
11752         // value in the mangled name.
11753         unsigned PtrRescalingFactor = 1;
11754         if (isa<CXXThisExpr>(E)) {
11755           Pos = ParamPositions[FD];
11756         } else {
11757           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11758                                 ->getCanonicalDecl();
11759           Pos = ParamPositions[PVD];
11760           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11761             PtrRescalingFactor = CGM.getContext()
11762                                      .getTypeSizeInChars(P->getPointeeType())
11763                                      .getQuantity();
11764         }
11765         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11766         ParamAttr.Kind = Linear;
11767         // Assuming a stride of 1, for `linear` without modifiers.
11768         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11769         if (*SI) {
11770           Expr::EvalResult Result;
11771           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11772             if (const auto *DRE =
11773                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11774               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11775                 ParamAttr.Kind = LinearWithVarStride;
11776                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11777                     ParamPositions[StridePVD->getCanonicalDecl()]);
11778               }
11779             }
11780           } else {
11781             ParamAttr.StrideOrArg = Result.Val.getInt();
11782           }
11783         }
11784         // If we are using a linear clause on a pointer, we need to
11785         // rescale the value of linear_step with the byte size of the
11786         // pointee type.
11787         if (Linear == ParamAttr.Kind)
11788           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11789         ++SI;
11790         ++MI;
11791       }
11792       llvm::APSInt VLENVal;
11793       SourceLocation ExprLoc;
11794       const Expr *VLENExpr = Attr->getSimdlen();
11795       if (VLENExpr) {
11796         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11797         ExprLoc = VLENExpr->getExprLoc();
11798       }
11799       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11800       if (CGM.getTriple().isX86()) {
11801         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11802       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11803         unsigned VLEN = VLENVal.getExtValue();
11804         StringRef MangledName = Fn->getName();
11805         if (CGM.getTarget().hasFeature("sve"))
11806           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11807                                          MangledName, 's', 128, Fn, ExprLoc);
11808         if (CGM.getTarget().hasFeature("neon"))
11809           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11810                                          MangledName, 'n', 128, Fn, ExprLoc);
11811       }
11812     }
11813     FD = FD->getPreviousDecl();
11814   }
11815 }
11816 
11817 namespace {
11818 /// Cleanup action for doacross support.
11819 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11820 public:
11821   static const int DoacrossFinArgs = 2;
11822 
11823 private:
11824   llvm::FunctionCallee RTLFn;
11825   llvm::Value *Args[DoacrossFinArgs];
11826 
11827 public:
11828   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11829                     ArrayRef<llvm::Value *> CallArgs)
11830       : RTLFn(RTLFn) {
11831     assert(CallArgs.size() == DoacrossFinArgs);
11832     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11833   }
11834   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11835     if (!CGF.HaveInsertPoint())
11836       return;
11837     CGF.EmitRuntimeCall(RTLFn, Args);
11838   }
11839 };
11840 } // namespace
11841 
11842 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11843                                        const OMPLoopDirective &D,
11844                                        ArrayRef<Expr *> NumIterations) {
11845   if (!CGF.HaveInsertPoint())
11846     return;
11847 
11848   ASTContext &C = CGM.getContext();
11849   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11850   RecordDecl *RD;
11851   if (KmpDimTy.isNull()) {
11852     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11853     //  kmp_int64 lo; // lower
11854     //  kmp_int64 up; // upper
11855     //  kmp_int64 st; // stride
11856     // };
11857     RD = C.buildImplicitRecord("kmp_dim");
11858     RD->startDefinition();
11859     addFieldToRecordDecl(C, RD, Int64Ty);
11860     addFieldToRecordDecl(C, RD, Int64Ty);
11861     addFieldToRecordDecl(C, RD, Int64Ty);
11862     RD->completeDefinition();
11863     KmpDimTy = C.getRecordType(RD);
11864   } else {
11865     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11866   }
11867   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11868   QualType ArrayTy =
11869       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11870 
11871   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11872   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11873   enum { LowerFD = 0, UpperFD, StrideFD };
11874   // Fill dims with data.
11875   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11876     LValue DimsLVal = CGF.MakeAddrLValue(
11877         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11878     // dims.upper = num_iterations;
11879     LValue UpperLVal = CGF.EmitLValueForField(
11880         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11881     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11882         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11883         Int64Ty, NumIterations[I]->getExprLoc());
11884     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11885     // dims.stride = 1;
11886     LValue StrideLVal = CGF.EmitLValueForField(
11887         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11888     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11889                           StrideLVal);
11890   }
11891 
11892   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11893   // kmp_int32 num_dims, struct kmp_dim * dims);
11894   llvm::Value *Args[] = {
11895       emitUpdateLocation(CGF, D.getBeginLoc()),
11896       getThreadID(CGF, D.getBeginLoc()),
11897       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11898       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11899           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11900           CGM.VoidPtrTy)};
11901 
11902   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11903       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11904   CGF.EmitRuntimeCall(RTLFn, Args);
11905   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11906       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11907   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11908       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11909   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11910                                              llvm::makeArrayRef(FiniArgs));
11911 }
11912 
11913 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11914                                           const OMPDependClause *C) {
11915   QualType Int64Ty =
11916       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11917   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11918   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11919       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11920   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11921   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11922     const Expr *CounterVal = C->getLoopData(I);
11923     assert(CounterVal);
11924     llvm::Value *CntVal = CGF.EmitScalarConversion(
11925         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11926         CounterVal->getExprLoc());
11927     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11928                           /*Volatile=*/false, Int64Ty);
11929   }
11930   llvm::Value *Args[] = {
11931       emitUpdateLocation(CGF, C->getBeginLoc()),
11932       getThreadID(CGF, C->getBeginLoc()),
11933       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11934   llvm::FunctionCallee RTLFn;
11935   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11936     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11937                                                   OMPRTL___kmpc_doacross_post);
11938   } else {
11939     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11940     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11941                                                   OMPRTL___kmpc_doacross_wait);
11942   }
11943   CGF.EmitRuntimeCall(RTLFn, Args);
11944 }
11945 
11946 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11947                                llvm::FunctionCallee Callee,
11948                                ArrayRef<llvm::Value *> Args) const {
11949   assert(Loc.isValid() && "Outlined function call location must be valid.");
11950   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11951 
11952   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11953     if (Fn->doesNotThrow()) {
11954       CGF.EmitNounwindRuntimeCall(Fn, Args);
11955       return;
11956     }
11957   }
11958   CGF.EmitRuntimeCall(Callee, Args);
11959 }
11960 
11961 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11962     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11963     ArrayRef<llvm::Value *> Args) const {
11964   emitCall(CGF, Loc, OutlinedFn, Args);
11965 }
11966 
11967 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11968   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11969     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11970       HasEmittedDeclareTargetRegion = true;
11971 }
11972 
11973 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11974                                              const VarDecl *NativeParam,
11975                                              const VarDecl *TargetParam) const {
11976   return CGF.GetAddrOfLocalVar(NativeParam);
11977 }
11978 
11979 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11980                                                    const VarDecl *VD) {
11981   if (!VD)
11982     return Address::invalid();
11983   Address UntiedAddr = Address::invalid();
11984   Address UntiedRealAddr = Address::invalid();
11985   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11986   if (It != FunctionToUntiedTaskStackMap.end()) {
11987     const UntiedLocalVarsAddressesMap &UntiedData =
11988         UntiedLocalVarsStack[It->second];
11989     auto I = UntiedData.find(VD);
11990     if (I != UntiedData.end()) {
11991       UntiedAddr = I->second.first;
11992       UntiedRealAddr = I->second.second;
11993     }
11994   }
11995   const VarDecl *CVD = VD->getCanonicalDecl();
11996   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11997     // Use the default allocation.
11998     if (!isAllocatableDecl(VD))
11999       return UntiedAddr;
12000     llvm::Value *Size;
12001     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12002     if (CVD->getType()->isVariablyModifiedType()) {
12003       Size = CGF.getTypeSize(CVD->getType());
12004       // Align the size: ((size + align - 1) / align) * align
12005       Size = CGF.Builder.CreateNUWAdd(
12006           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12007       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12008       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12009     } else {
12010       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12011       Size = CGM.getSize(Sz.alignTo(Align));
12012     }
12013     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12014     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12015     assert(AA->getAllocator() &&
12016            "Expected allocator expression for non-default allocator.");
12017     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12018     // According to the standard, the original allocator type is a enum
12019     // (integer). Convert to pointer type, if required.
12020     Allocator = CGF.EmitScalarConversion(
12021         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12022         AA->getAllocator()->getExprLoc());
12023     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12024 
12025     llvm::Value *Addr =
12026         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12027                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12028                             Args, getName({CVD->getName(), ".void.addr"}));
12029     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12030         CGM.getModule(), OMPRTL___kmpc_free);
12031     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12032     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12033         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12034     if (UntiedAddr.isValid())
12035       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12036 
12037     // Cleanup action for allocate support.
12038     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12039       llvm::FunctionCallee RTLFn;
12040       unsigned LocEncoding;
12041       Address Addr;
12042       const Expr *Allocator;
12043 
12044     public:
12045       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
12046                            Address Addr, const Expr *Allocator)
12047           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12048             Allocator(Allocator) {}
12049       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12050         if (!CGF.HaveInsertPoint())
12051           return;
12052         llvm::Value *Args[3];
12053         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12054             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12055         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12056             Addr.getPointer(), CGF.VoidPtrTy);
12057         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12058         // According to the standard, the original allocator type is a enum
12059         // (integer). Convert to pointer type, if required.
12060         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12061                                             CGF.getContext().VoidPtrTy,
12062                                             Allocator->getExprLoc());
12063         Args[2] = AllocVal;
12064 
12065         CGF.EmitRuntimeCall(RTLFn, Args);
12066       }
12067     };
12068     Address VDAddr =
12069         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12070     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12071         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12072         VDAddr, AA->getAllocator());
12073     if (UntiedRealAddr.isValid())
12074       if (auto *Region =
12075               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12076         Region->emitUntiedSwitch(CGF);
12077     return VDAddr;
12078   }
12079   return UntiedAddr;
12080 }
12081 
12082 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12083                                              const VarDecl *VD) const {
12084   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12085   if (It == FunctionToUntiedTaskStackMap.end())
12086     return false;
12087   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12088 }
12089 
12090 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12091     CodeGenModule &CGM, const OMPLoopDirective &S)
12092     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12093   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12094   if (!NeedToPush)
12095     return;
12096   NontemporalDeclsSet &DS =
12097       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12098   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12099     for (const Stmt *Ref : C->private_refs()) {
12100       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12101       const ValueDecl *VD;
12102       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12103         VD = DRE->getDecl();
12104       } else {
12105         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12106         assert((ME->isImplicitCXXThis() ||
12107                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12108                "Expected member of current class.");
12109         VD = ME->getMemberDecl();
12110       }
12111       DS.insert(VD);
12112     }
12113   }
12114 }
12115 
12116 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12117   if (!NeedToPush)
12118     return;
12119   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12120 }
12121 
12122 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12123     CodeGenFunction &CGF,
12124     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12125                           std::pair<Address, Address>> &LocalVars)
12126     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12127   if (!NeedToPush)
12128     return;
12129   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12130       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12131   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12132 }
12133 
12134 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12135   if (!NeedToPush)
12136     return;
12137   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12138 }
12139 
12140 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12141   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12142 
12143   return llvm::any_of(
12144       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12145       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12146 }
12147 
12148 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12149     const OMPExecutableDirective &S,
12150     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12151     const {
12152   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12153   // Vars in target/task regions must be excluded completely.
12154   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12155       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12156     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12157     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12158     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12159     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12160       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12161         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12162     }
12163   }
12164   // Exclude vars in private clauses.
12165   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12166     for (const Expr *Ref : C->varlists()) {
12167       if (!Ref->getType()->isScalarType())
12168         continue;
12169       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12170       if (!DRE)
12171         continue;
12172       NeedToCheckForLPCs.insert(DRE->getDecl());
12173     }
12174   }
12175   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12176     for (const Expr *Ref : C->varlists()) {
12177       if (!Ref->getType()->isScalarType())
12178         continue;
12179       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12180       if (!DRE)
12181         continue;
12182       NeedToCheckForLPCs.insert(DRE->getDecl());
12183     }
12184   }
12185   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12186     for (const Expr *Ref : C->varlists()) {
12187       if (!Ref->getType()->isScalarType())
12188         continue;
12189       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12190       if (!DRE)
12191         continue;
12192       NeedToCheckForLPCs.insert(DRE->getDecl());
12193     }
12194   }
12195   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12196     for (const Expr *Ref : C->varlists()) {
12197       if (!Ref->getType()->isScalarType())
12198         continue;
12199       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12200       if (!DRE)
12201         continue;
12202       NeedToCheckForLPCs.insert(DRE->getDecl());
12203     }
12204   }
12205   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12206     for (const Expr *Ref : C->varlists()) {
12207       if (!Ref->getType()->isScalarType())
12208         continue;
12209       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12210       if (!DRE)
12211         continue;
12212       NeedToCheckForLPCs.insert(DRE->getDecl());
12213     }
12214   }
12215   for (const Decl *VD : NeedToCheckForLPCs) {
12216     for (const LastprivateConditionalData &Data :
12217          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12218       if (Data.DeclToUniqueName.count(VD) > 0) {
12219         if (!Data.Disabled)
12220           NeedToAddForLPCsAsDisabled.insert(VD);
12221         break;
12222       }
12223     }
12224   }
12225 }
12226 
12227 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12228     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12229     : CGM(CGF.CGM),
12230       Action((CGM.getLangOpts().OpenMP >= 50 &&
12231               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12232                            [](const OMPLastprivateClause *C) {
12233                              return C->getKind() ==
12234                                     OMPC_LASTPRIVATE_conditional;
12235                            }))
12236                  ? ActionToDo::PushAsLastprivateConditional
12237                  : ActionToDo::DoNotPush) {
12238   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12239   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12240     return;
12241   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12242          "Expected a push action.");
12243   LastprivateConditionalData &Data =
12244       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12245   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12246     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12247       continue;
12248 
12249     for (const Expr *Ref : C->varlists()) {
12250       Data.DeclToUniqueName.insert(std::make_pair(
12251           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12252           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12253     }
12254   }
12255   Data.IVLVal = IVLVal;
12256   Data.Fn = CGF.CurFn;
12257 }
12258 
12259 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12260     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12261     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12262   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12263   if (CGM.getLangOpts().OpenMP < 50)
12264     return;
12265   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12266   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12267   if (!NeedToAddForLPCsAsDisabled.empty()) {
12268     Action = ActionToDo::DisableLastprivateConditional;
12269     LastprivateConditionalData &Data =
12270         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12271     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12272       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12273     Data.Fn = CGF.CurFn;
12274     Data.Disabled = true;
12275   }
12276 }
12277 
12278 CGOpenMPRuntime::LastprivateConditionalRAII
12279 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12280     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12281   return LastprivateConditionalRAII(CGF, S);
12282 }
12283 
12284 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12285   if (CGM.getLangOpts().OpenMP < 50)
12286     return;
12287   if (Action == ActionToDo::DisableLastprivateConditional) {
12288     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12289            "Expected list of disabled private vars.");
12290     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12291   }
12292   if (Action == ActionToDo::PushAsLastprivateConditional) {
12293     assert(
12294         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12295         "Expected list of lastprivate conditional vars.");
12296     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12297   }
12298 }
12299 
12300 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12301                                                         const VarDecl *VD) {
12302   ASTContext &C = CGM.getContext();
12303   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12304   if (I == LastprivateConditionalToTypes.end())
12305     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12306   QualType NewType;
12307   const FieldDecl *VDField;
12308   const FieldDecl *FiredField;
12309   LValue BaseLVal;
12310   auto VI = I->getSecond().find(VD);
12311   if (VI == I->getSecond().end()) {
12312     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12313     RD->startDefinition();
12314     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12315     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12316     RD->completeDefinition();
12317     NewType = C.getRecordType(RD);
12318     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12319     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12320     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12321   } else {
12322     NewType = std::get<0>(VI->getSecond());
12323     VDField = std::get<1>(VI->getSecond());
12324     FiredField = std::get<2>(VI->getSecond());
12325     BaseLVal = std::get<3>(VI->getSecond());
12326   }
12327   LValue FiredLVal =
12328       CGF.EmitLValueForField(BaseLVal, FiredField);
12329   CGF.EmitStoreOfScalar(
12330       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12331       FiredLVal);
12332   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12333 }
12334 
12335 namespace {
12336 /// Checks if the lastprivate conditional variable is referenced in LHS.
12337 class LastprivateConditionalRefChecker final
12338     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12339   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12340   const Expr *FoundE = nullptr;
12341   const Decl *FoundD = nullptr;
12342   StringRef UniqueDeclName;
12343   LValue IVLVal;
12344   llvm::Function *FoundFn = nullptr;
12345   SourceLocation Loc;
12346 
12347 public:
12348   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12349     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12350          llvm::reverse(LPM)) {
12351       auto It = D.DeclToUniqueName.find(E->getDecl());
12352       if (It == D.DeclToUniqueName.end())
12353         continue;
12354       if (D.Disabled)
12355         return false;
12356       FoundE = E;
12357       FoundD = E->getDecl()->getCanonicalDecl();
12358       UniqueDeclName = It->second;
12359       IVLVal = D.IVLVal;
12360       FoundFn = D.Fn;
12361       break;
12362     }
12363     return FoundE == E;
12364   }
12365   bool VisitMemberExpr(const MemberExpr *E) {
12366     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12367       return false;
12368     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12369          llvm::reverse(LPM)) {
12370       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12371       if (It == D.DeclToUniqueName.end())
12372         continue;
12373       if (D.Disabled)
12374         return false;
12375       FoundE = E;
12376       FoundD = E->getMemberDecl()->getCanonicalDecl();
12377       UniqueDeclName = It->second;
12378       IVLVal = D.IVLVal;
12379       FoundFn = D.Fn;
12380       break;
12381     }
12382     return FoundE == E;
12383   }
12384   bool VisitStmt(const Stmt *S) {
12385     for (const Stmt *Child : S->children()) {
12386       if (!Child)
12387         continue;
12388       if (const auto *E = dyn_cast<Expr>(Child))
12389         if (!E->isGLValue())
12390           continue;
12391       if (Visit(Child))
12392         return true;
12393     }
12394     return false;
12395   }
12396   explicit LastprivateConditionalRefChecker(
12397       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12398       : LPM(LPM) {}
12399   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12400   getFoundData() const {
12401     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12402   }
12403 };
12404 } // namespace
12405 
12406 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12407                                                        LValue IVLVal,
12408                                                        StringRef UniqueDeclName,
12409                                                        LValue LVal,
12410                                                        SourceLocation Loc) {
12411   // Last updated loop counter for the lastprivate conditional var.
12412   // int<xx> last_iv = 0;
12413   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12414   llvm::Constant *LastIV =
12415       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12416   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12417       IVLVal.getAlignment().getAsAlign());
12418   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12419 
12420   // Last value of the lastprivate conditional.
12421   // decltype(priv_a) last_a;
12422   llvm::Constant *Last = getOrCreateInternalVariable(
12423       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12424   cast<llvm::GlobalVariable>(Last)->setAlignment(
12425       LVal.getAlignment().getAsAlign());
12426   LValue LastLVal =
12427       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12428 
12429   // Global loop counter. Required to handle inner parallel-for regions.
12430   // iv
12431   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12432 
12433   // #pragma omp critical(a)
12434   // if (last_iv <= iv) {
12435   //   last_iv = iv;
12436   //   last_a = priv_a;
12437   // }
12438   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12439                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12440     Action.Enter(CGF);
12441     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12442     // (last_iv <= iv) ? Check if the variable is updated and store new
12443     // value in global var.
12444     llvm::Value *CmpRes;
12445     if (IVLVal.getType()->isSignedIntegerType()) {
12446       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12447     } else {
12448       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12449              "Loop iteration variable must be integer.");
12450       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12451     }
12452     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12453     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12454     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12455     // {
12456     CGF.EmitBlock(ThenBB);
12457 
12458     //   last_iv = iv;
12459     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12460 
12461     //   last_a = priv_a;
12462     switch (CGF.getEvaluationKind(LVal.getType())) {
12463     case TEK_Scalar: {
12464       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12465       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12466       break;
12467     }
12468     case TEK_Complex: {
12469       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12470       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12471       break;
12472     }
12473     case TEK_Aggregate:
12474       llvm_unreachable(
12475           "Aggregates are not supported in lastprivate conditional.");
12476     }
12477     // }
12478     CGF.EmitBranch(ExitBB);
12479     // There is no need to emit line number for unconditional branch.
12480     (void)ApplyDebugLocation::CreateEmpty(CGF);
12481     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12482   };
12483 
12484   if (CGM.getLangOpts().OpenMPSimd) {
12485     // Do not emit as a critical region as no parallel region could be emitted.
12486     RegionCodeGenTy ThenRCG(CodeGen);
12487     ThenRCG(CGF);
12488   } else {
12489     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12490   }
12491 }
12492 
12493 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12494                                                          const Expr *LHS) {
12495   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12496     return;
12497   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12498   if (!Checker.Visit(LHS))
12499     return;
12500   const Expr *FoundE;
12501   const Decl *FoundD;
12502   StringRef UniqueDeclName;
12503   LValue IVLVal;
12504   llvm::Function *FoundFn;
12505   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12506       Checker.getFoundData();
12507   if (FoundFn != CGF.CurFn) {
12508     // Special codegen for inner parallel regions.
12509     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12510     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12511     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12512            "Lastprivate conditional is not found in outer region.");
12513     QualType StructTy = std::get<0>(It->getSecond());
12514     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12515     LValue PrivLVal = CGF.EmitLValue(FoundE);
12516     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12517         PrivLVal.getAddress(CGF),
12518         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12519     LValue BaseLVal =
12520         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12521     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12522     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12523                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12524                         FiredLVal, llvm::AtomicOrdering::Unordered,
12525                         /*IsVolatile=*/true, /*isInit=*/false);
12526     return;
12527   }
12528 
12529   // Private address of the lastprivate conditional in the current context.
12530   // priv_a
12531   LValue LVal = CGF.EmitLValue(FoundE);
12532   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12533                                    FoundE->getExprLoc());
12534 }
12535 
12536 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12537     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12538     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12539   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12540     return;
12541   auto Range = llvm::reverse(LastprivateConditionalStack);
12542   auto It = llvm::find_if(
12543       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12544   if (It == Range.end() || It->Fn != CGF.CurFn)
12545     return;
12546   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12547   assert(LPCI != LastprivateConditionalToTypes.end() &&
12548          "Lastprivates must be registered already.");
12549   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12550   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12551   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12552   for (const auto &Pair : It->DeclToUniqueName) {
12553     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12554     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12555       continue;
12556     auto I = LPCI->getSecond().find(Pair.first);
12557     assert(I != LPCI->getSecond().end() &&
12558            "Lastprivate must be rehistered already.");
12559     // bool Cmp = priv_a.Fired != 0;
12560     LValue BaseLVal = std::get<3>(I->getSecond());
12561     LValue FiredLVal =
12562         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12563     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12564     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12565     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12566     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12567     // if (Cmp) {
12568     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12569     CGF.EmitBlock(ThenBB);
12570     Address Addr = CGF.GetAddrOfLocalVar(VD);
12571     LValue LVal;
12572     if (VD->getType()->isReferenceType())
12573       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12574                                            AlignmentSource::Decl);
12575     else
12576       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12577                                 AlignmentSource::Decl);
12578     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12579                                      D.getBeginLoc());
12580     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12581     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12582     // }
12583   }
12584 }
12585 
12586 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12587     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12588     SourceLocation Loc) {
12589   if (CGF.getLangOpts().OpenMP < 50)
12590     return;
12591   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12592   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12593          "Unknown lastprivate conditional variable.");
12594   StringRef UniqueName = It->second;
12595   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12596   // The variable was not updated in the region - exit.
12597   if (!GV)
12598     return;
12599   LValue LPLVal = CGF.MakeAddrLValue(
12600       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12601   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12602   CGF.EmitStoreOfScalar(Res, PrivLVal);
12603 }
12604 
12605 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12606     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12607     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12608   llvm_unreachable("Not supported in SIMD-only mode");
12609 }
12610 
12611 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12612     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12613     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12614   llvm_unreachable("Not supported in SIMD-only mode");
12615 }
12616 
12617 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12618     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12619     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12620     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12621     bool Tied, unsigned &NumberOfParts) {
12622   llvm_unreachable("Not supported in SIMD-only mode");
12623 }
12624 
12625 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12626                                            SourceLocation Loc,
12627                                            llvm::Function *OutlinedFn,
12628                                            ArrayRef<llvm::Value *> CapturedVars,
12629                                            const Expr *IfCond) {
12630   llvm_unreachable("Not supported in SIMD-only mode");
12631 }
12632 
12633 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12634     CodeGenFunction &CGF, StringRef CriticalName,
12635     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12636     const Expr *Hint) {
12637   llvm_unreachable("Not supported in SIMD-only mode");
12638 }
12639 
12640 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12641                                            const RegionCodeGenTy &MasterOpGen,
12642                                            SourceLocation Loc) {
12643   llvm_unreachable("Not supported in SIMD-only mode");
12644 }
12645 
12646 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12647                                            const RegionCodeGenTy &MasterOpGen,
12648                                            SourceLocation Loc,
12649                                            const Expr *Filter) {
12650   llvm_unreachable("Not supported in SIMD-only mode");
12651 }
12652 
12653 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12654                                             SourceLocation Loc) {
12655   llvm_unreachable("Not supported in SIMD-only mode");
12656 }
12657 
12658 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12659     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12660     SourceLocation Loc) {
12661   llvm_unreachable("Not supported in SIMD-only mode");
12662 }
12663 
12664 void CGOpenMPSIMDRuntime::emitSingleRegion(
12665     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12666     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12667     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12668     ArrayRef<const Expr *> AssignmentOps) {
12669   llvm_unreachable("Not supported in SIMD-only mode");
12670 }
12671 
12672 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12673                                             const RegionCodeGenTy &OrderedOpGen,
12674                                             SourceLocation Loc,
12675                                             bool IsThreads) {
12676   llvm_unreachable("Not supported in SIMD-only mode");
12677 }
12678 
12679 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12680                                           SourceLocation Loc,
12681                                           OpenMPDirectiveKind Kind,
12682                                           bool EmitChecks,
12683                                           bool ForceSimpleCall) {
12684   llvm_unreachable("Not supported in SIMD-only mode");
12685 }
12686 
12687 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12688     CodeGenFunction &CGF, SourceLocation Loc,
12689     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12690     bool Ordered, const DispatchRTInput &DispatchValues) {
12691   llvm_unreachable("Not supported in SIMD-only mode");
12692 }
12693 
12694 void CGOpenMPSIMDRuntime::emitForStaticInit(
12695     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12696     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12697   llvm_unreachable("Not supported in SIMD-only mode");
12698 }
12699 
12700 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12701     CodeGenFunction &CGF, SourceLocation Loc,
12702     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12703   llvm_unreachable("Not supported in SIMD-only mode");
12704 }
12705 
12706 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12707                                                      SourceLocation Loc,
12708                                                      unsigned IVSize,
12709                                                      bool IVSigned) {
12710   llvm_unreachable("Not supported in SIMD-only mode");
12711 }
12712 
12713 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12714                                               SourceLocation Loc,
12715                                               OpenMPDirectiveKind DKind) {
12716   llvm_unreachable("Not supported in SIMD-only mode");
12717 }
12718 
12719 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12720                                               SourceLocation Loc,
12721                                               unsigned IVSize, bool IVSigned,
12722                                               Address IL, Address LB,
12723                                               Address UB, Address ST) {
12724   llvm_unreachable("Not supported in SIMD-only mode");
12725 }
12726 
12727 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12728                                                llvm::Value *NumThreads,
12729                                                SourceLocation Loc) {
12730   llvm_unreachable("Not supported in SIMD-only mode");
12731 }
12732 
12733 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12734                                              ProcBindKind ProcBind,
12735                                              SourceLocation Loc) {
12736   llvm_unreachable("Not supported in SIMD-only mode");
12737 }
12738 
12739 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12740                                                     const VarDecl *VD,
12741                                                     Address VDAddr,
12742                                                     SourceLocation Loc) {
12743   llvm_unreachable("Not supported in SIMD-only mode");
12744 }
12745 
12746 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12747     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12748     CodeGenFunction *CGF) {
12749   llvm_unreachable("Not supported in SIMD-only mode");
12750 }
12751 
12752 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12753     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12754   llvm_unreachable("Not supported in SIMD-only mode");
12755 }
12756 
12757 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12758                                     ArrayRef<const Expr *> Vars,
12759                                     SourceLocation Loc,
12760                                     llvm::AtomicOrdering AO) {
12761   llvm_unreachable("Not supported in SIMD-only mode");
12762 }
12763 
12764 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12765                                        const OMPExecutableDirective &D,
12766                                        llvm::Function *TaskFunction,
12767                                        QualType SharedsTy, Address Shareds,
12768                                        const Expr *IfCond,
12769                                        const OMPTaskDataTy &Data) {
12770   llvm_unreachable("Not supported in SIMD-only mode");
12771 }
12772 
12773 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12774     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12775     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12776     const Expr *IfCond, const OMPTaskDataTy &Data) {
12777   llvm_unreachable("Not supported in SIMD-only mode");
12778 }
12779 
12780 void CGOpenMPSIMDRuntime::emitReduction(
12781     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12782     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12783     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12784   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12785   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12786                                  ReductionOps, Options);
12787 }
12788 
12789 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12790     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12791     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12792   llvm_unreachable("Not supported in SIMD-only mode");
12793 }
12794 
12795 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12796                                                 SourceLocation Loc,
12797                                                 bool IsWorksharingReduction) {
12798   llvm_unreachable("Not supported in SIMD-only mode");
12799 }
12800 
12801 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12802                                                   SourceLocation Loc,
12803                                                   ReductionCodeGen &RCG,
12804                                                   unsigned N) {
12805   llvm_unreachable("Not supported in SIMD-only mode");
12806 }
12807 
12808 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12809                                                   SourceLocation Loc,
12810                                                   llvm::Value *ReductionsPtr,
12811                                                   LValue SharedLVal) {
12812   llvm_unreachable("Not supported in SIMD-only mode");
12813 }
12814 
12815 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12816                                            SourceLocation Loc) {
12817   llvm_unreachable("Not supported in SIMD-only mode");
12818 }
12819 
12820 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12821     CodeGenFunction &CGF, SourceLocation Loc,
12822     OpenMPDirectiveKind CancelRegion) {
12823   llvm_unreachable("Not supported in SIMD-only mode");
12824 }
12825 
12826 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12827                                          SourceLocation Loc, const Expr *IfCond,
12828                                          OpenMPDirectiveKind CancelRegion) {
12829   llvm_unreachable("Not supported in SIMD-only mode");
12830 }
12831 
12832 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12833     const OMPExecutableDirective &D, StringRef ParentName,
12834     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12835     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12836   llvm_unreachable("Not supported in SIMD-only mode");
12837 }
12838 
12839 void CGOpenMPSIMDRuntime::emitTargetCall(
12840     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12841     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12842     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12843     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12844                                      const OMPLoopDirective &D)>
12845         SizeEmitter) {
12846   llvm_unreachable("Not supported in SIMD-only mode");
12847 }
12848 
12849 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12850   llvm_unreachable("Not supported in SIMD-only mode");
12851 }
12852 
12853 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12854   llvm_unreachable("Not supported in SIMD-only mode");
12855 }
12856 
12857 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12858   return false;
12859 }
12860 
12861 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12862                                         const OMPExecutableDirective &D,
12863                                         SourceLocation Loc,
12864                                         llvm::Function *OutlinedFn,
12865                                         ArrayRef<llvm::Value *> CapturedVars) {
12866   llvm_unreachable("Not supported in SIMD-only mode");
12867 }
12868 
12869 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12870                                              const Expr *NumTeams,
12871                                              const Expr *ThreadLimit,
12872                                              SourceLocation Loc) {
12873   llvm_unreachable("Not supported in SIMD-only mode");
12874 }
12875 
12876 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12877     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12878     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12879   llvm_unreachable("Not supported in SIMD-only mode");
12880 }
12881 
12882 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12883     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12884     const Expr *Device) {
12885   llvm_unreachable("Not supported in SIMD-only mode");
12886 }
12887 
12888 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12889                                            const OMPLoopDirective &D,
12890                                            ArrayRef<Expr *> NumIterations) {
12891   llvm_unreachable("Not supported in SIMD-only mode");
12892 }
12893 
12894 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12895                                               const OMPDependClause *C) {
12896   llvm_unreachable("Not supported in SIMD-only mode");
12897 }
12898 
12899 const VarDecl *
12900 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12901                                         const VarDecl *NativeParam) const {
12902   llvm_unreachable("Not supported in SIMD-only mode");
12903 }
12904 
12905 Address
12906 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12907                                          const VarDecl *NativeParam,
12908                                          const VarDecl *TargetParam) const {
12909   llvm_unreachable("Not supported in SIMD-only mode");
12910 }
12911