1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr =
1452         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1564   assert((IVSize == 32 || IVSize == 64) &&
1565          "IV size is not compatible with the omp runtime");
1566   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567                                             : "__kmpc_for_static_init_4u")
1568                                 : (IVSigned ? "__kmpc_for_static_init_8"
1569                                             : "__kmpc_for_static_init_8u");
1570   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1571   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1572   llvm::Type *TypeParams[] = {
1573     getIdentTyPointerTy(),                     // loc
1574     CGM.Int32Ty,                               // tid
1575     CGM.Int32Ty,                               // schedtype
1576     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1577     PtrTy,                                     // p_lower
1578     PtrTy,                                     // p_upper
1579     PtrTy,                                     // p_stride
1580     ITy,                                       // incr
1581     ITy                                        // chunk
1582   };
1583   auto *FnTy =
1584       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1585   return CGM.CreateRuntimeFunction(FnTy, Name);
1586 }
1587 
1588 llvm::FunctionCallee
1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1590   assert((IVSize == 32 || IVSize == 64) &&
1591          "IV size is not compatible with the omp runtime");
1592   StringRef Name =
1593       IVSize == 32
1594           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1595           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1596   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1597   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1598                                CGM.Int32Ty,           // tid
1599                                CGM.Int32Ty,           // schedtype
1600                                ITy,                   // lower
1601                                ITy,                   // upper
1602                                ITy,                   // stride
1603                                ITy                    // chunk
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 llvm::FunctionCallee
1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1612   assert((IVSize == 32 || IVSize == 64) &&
1613          "IV size is not compatible with the omp runtime");
1614   StringRef Name =
1615       IVSize == 32
1616           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1617           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1618   llvm::Type *TypeParams[] = {
1619       getIdentTyPointerTy(), // loc
1620       CGM.Int32Ty,           // tid
1621   };
1622   auto *FnTy =
1623       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1624   return CGM.CreateRuntimeFunction(FnTy, Name);
1625 }
1626 
1627 llvm::FunctionCallee
1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1629   assert((IVSize == 32 || IVSize == 64) &&
1630          "IV size is not compatible with the omp runtime");
1631   StringRef Name =
1632       IVSize == 32
1633           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1634           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1635   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1636   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1637   llvm::Type *TypeParams[] = {
1638     getIdentTyPointerTy(),                     // loc
1639     CGM.Int32Ty,                               // tid
1640     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1641     PtrTy,                                     // p_lower
1642     PtrTy,                                     // p_upper
1643     PtrTy                                      // p_stride
1644   };
1645   auto *FnTy =
1646       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1647   return CGM.CreateRuntimeFunction(FnTy, Name);
1648 }
1649 
1650 /// Obtain information that uniquely identifies a target entry. This
1651 /// consists of the file and device IDs as well as line number associated with
1652 /// the relevant entry source location.
1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1654                                      unsigned &DeviceID, unsigned &FileID,
1655                                      unsigned &LineNum) {
1656   SourceManager &SM = C.getSourceManager();
1657 
1658   // The loc should be always valid and have a file ID (the user cannot use
1659   // #pragma directives in macros)
1660 
1661   assert(Loc.isValid() && "Source location is expected to be always valid.");
1662 
1663   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1664   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1665 
1666   llvm::sys::fs::UniqueID ID;
1667   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1668     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1669     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1671       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1672           << PLoc.getFilename() << EC.message();
1673   }
1674 
1675   DeviceID = ID.getDevice();
1676   FileID = ID.getFile();
1677   LineNum = PLoc.getLine();
1678 }
1679 
1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1681   if (CGM.getLangOpts().OpenMPSimd)
1682     return Address::invalid();
1683   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1684       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1685   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1686               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1687                HasRequiresUnifiedSharedMemory))) {
1688     SmallString<64> PtrName;
1689     {
1690       llvm::raw_svector_ostream OS(PtrName);
1691       OS << CGM.getMangledName(GlobalDecl(VD));
1692       if (!VD->isExternallyVisible()) {
1693         unsigned DeviceID, FileID, Line;
1694         getTargetEntryUniqueInfo(CGM.getContext(),
1695                                  VD->getCanonicalDecl()->getBeginLoc(),
1696                                  DeviceID, FileID, Line);
1697         OS << llvm::format("_%x", FileID);
1698       }
1699       OS << "_decl_tgt_ref_ptr";
1700     }
1701     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1702     if (!Ptr) {
1703       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1704       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1705                                         PtrName);
1706 
1707       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1708       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1709 
1710       if (!CGM.getLangOpts().OpenMPIsDevice)
1711         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1712       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1713     }
1714     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1715   }
1716   return Address::invalid();
1717 }
1718 
1719 llvm::Constant *
1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1721   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1722          !CGM.getContext().getTargetInfo().isTLSSupported());
1723   // Lookup the entry, lazily creating it if necessary.
1724   std::string Suffix = getName({"cache", ""});
1725   return getOrCreateInternalVariable(
1726       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1727 }
1728 
1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1730                                                 const VarDecl *VD,
1731                                                 Address VDAddr,
1732                                                 SourceLocation Loc) {
1733   if (CGM.getLangOpts().OpenMPUseTLS &&
1734       CGM.getContext().getTargetInfo().isTLSSupported())
1735     return VDAddr;
1736 
1737   llvm::Type *VarTy = VDAddr.getElementType();
1738   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1739                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1740                                                        CGM.Int8PtrTy),
1741                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1742                          getOrCreateThreadPrivateCache(VD)};
1743   return Address(CGF.EmitRuntimeCall(
1744                      OMPBuilder.getOrCreateRuntimeFunction(
1745                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1746                      Args),
1747                  VDAddr.getAlignment());
1748 }
1749 
1750 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1751     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1752     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1753   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1754   // library.
1755   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1756   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1757                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1758                       OMPLoc);
1759   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1760   // to register constructor/destructor for variable.
1761   llvm::Value *Args[] = {
1762       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1763       Ctor, CopyCtor, Dtor};
1764   CGF.EmitRuntimeCall(
1765       OMPBuilder.getOrCreateRuntimeFunction(
1766           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1767       Args);
1768 }
1769 
1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772     bool PerformInit, CodeGenFunction *CGF) {
1773   if (CGM.getLangOpts().OpenMPUseTLS &&
1774       CGM.getContext().getTargetInfo().isTLSSupported())
1775     return nullptr;
1776 
1777   VD = VD->getDefinition(CGM.getContext());
1778   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1779     QualType ASTTy = VD->getType();
1780 
1781     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1782     const Expr *Init = VD->getAnyInitializer();
1783     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1784       // Generate function that re-emits the declaration's initializer into the
1785       // threadprivate copy of the variable VD
1786       CodeGenFunction CtorCGF(CGM);
1787       FunctionArgList Args;
1788       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1789                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1790                             ImplicitParamDecl::Other);
1791       Args.push_back(&Dst);
1792 
1793       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794           CGM.getContext().VoidPtrTy, Args);
1795       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1796       std::string Name = getName({"__kmpc_global_ctor_", ""});
1797       llvm::Function *Fn =
1798           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1799       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1800                             Args, Loc, Loc);
1801       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1802           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1803           CGM.getContext().VoidPtrTy, Dst.getLocation());
1804       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1805       Arg = CtorCGF.Builder.CreateElementBitCast(
1806           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1807       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1808                                /*IsInitializer=*/true);
1809       ArgVal = CtorCGF.EmitLoadOfScalar(
1810           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1811           CGM.getContext().VoidPtrTy, Dst.getLocation());
1812       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1813       CtorCGF.FinishFunction();
1814       Ctor = Fn;
1815     }
1816     if (VD->getType().isDestructedType() != QualType::DK_none) {
1817       // Generate function that emits destructor call for the threadprivate copy
1818       // of the variable VD
1819       CodeGenFunction DtorCGF(CGM);
1820       FunctionArgList Args;
1821       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1822                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1823                             ImplicitParamDecl::Other);
1824       Args.push_back(&Dst);
1825 
1826       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1827           CGM.getContext().VoidTy, Args);
1828       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1829       std::string Name = getName({"__kmpc_global_dtor_", ""});
1830       llvm::Function *Fn =
1831           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1832       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1833       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1834                             Loc, Loc);
1835       // Create a scope with an artificial location for the body of this function.
1836       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1837       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1838           DtorCGF.GetAddrOfLocalVar(&Dst),
1839           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1840       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1841                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1842                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1843       DtorCGF.FinishFunction();
1844       Dtor = Fn;
1845     }
1846     // Do not emit init function if it is not required.
1847     if (!Ctor && !Dtor)
1848       return nullptr;
1849 
1850     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1851     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1852                                                /*isVarArg=*/false)
1853                            ->getPointerTo();
1854     // Copying constructor for the threadprivate variable.
1855     // Must be NULL - reserved by runtime, but currently it requires that this
1856     // parameter is always NULL. Otherwise it fires assertion.
1857     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1858     if (Ctor == nullptr) {
1859       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1860                                              /*isVarArg=*/false)
1861                          ->getPointerTo();
1862       Ctor = llvm::Constant::getNullValue(CtorTy);
1863     }
1864     if (Dtor == nullptr) {
1865       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1866                                              /*isVarArg=*/false)
1867                          ->getPointerTo();
1868       Dtor = llvm::Constant::getNullValue(DtorTy);
1869     }
1870     if (!CGF) {
1871       auto *InitFunctionTy =
1872           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1873       std::string Name = getName({"__omp_threadprivate_init_", ""});
1874       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1875           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1876       CodeGenFunction InitCGF(CGM);
1877       FunctionArgList ArgList;
1878       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1879                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1880                             Loc, Loc);
1881       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882       InitCGF.FinishFunction();
1883       return InitFunction;
1884     }
1885     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886   }
1887   return nullptr;
1888 }
1889 
1890 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1891                                                      llvm::GlobalVariable *Addr,
1892                                                      bool PerformInit) {
1893   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1894       !CGM.getLangOpts().OpenMPIsDevice)
1895     return false;
1896   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1897       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1898   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1899       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1900        HasRequiresUnifiedSharedMemory))
1901     return CGM.getLangOpts().OpenMPIsDevice;
1902   VD = VD->getDefinition(CGM.getContext());
1903   assert(VD && "Unknown VarDecl");
1904 
1905   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1906     return CGM.getLangOpts().OpenMPIsDevice;
1907 
1908   QualType ASTTy = VD->getType();
1909   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1910 
1911   // Produce the unique prefix to identify the new target regions. We use
1912   // the source location of the variable declaration which we know to not
1913   // conflict with any target region.
1914   unsigned DeviceID;
1915   unsigned FileID;
1916   unsigned Line;
1917   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1918   SmallString<128> Buffer, Out;
1919   {
1920     llvm::raw_svector_ostream OS(Buffer);
1921     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1922        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1923   }
1924 
1925   const Expr *Init = VD->getAnyInitializer();
1926   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1927     llvm::Constant *Ctor;
1928     llvm::Constant *ID;
1929     if (CGM.getLangOpts().OpenMPIsDevice) {
1930       // Generate function that re-emits the declaration's initializer into
1931       // the threadprivate copy of the variable VD
1932       CodeGenFunction CtorCGF(CGM);
1933 
1934       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1935       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1936       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1937           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1938       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1939       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1940                             FunctionArgList(), Loc, Loc);
1941       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1942       CtorCGF.EmitAnyExprToMem(Init,
1943                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1944                                Init->getType().getQualifiers(),
1945                                /*IsInitializer=*/true);
1946       CtorCGF.FinishFunction();
1947       Ctor = Fn;
1948       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1949       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1950     } else {
1951       Ctor = new llvm::GlobalVariable(
1952           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1953           llvm::GlobalValue::PrivateLinkage,
1954           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1955       ID = Ctor;
1956     }
1957 
1958     // Register the information for the entry associated with the constructor.
1959     Out.clear();
1960     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1961         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1962         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1963   }
1964   if (VD->getType().isDestructedType() != QualType::DK_none) {
1965     llvm::Constant *Dtor;
1966     llvm::Constant *ID;
1967     if (CGM.getLangOpts().OpenMPIsDevice) {
1968       // Generate function that emits destructor call for the threadprivate
1969       // copy of the variable VD
1970       CodeGenFunction DtorCGF(CGM);
1971 
1972       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1973       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1974       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1975           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1976       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1977       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1978                             FunctionArgList(), Loc, Loc);
1979       // Create a scope with an artificial location for the body of this
1980       // function.
1981       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1982       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1983                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1984                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1985       DtorCGF.FinishFunction();
1986       Dtor = Fn;
1987       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1988       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1989     } else {
1990       Dtor = new llvm::GlobalVariable(
1991           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1992           llvm::GlobalValue::PrivateLinkage,
1993           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1994       ID = Dtor;
1995     }
1996     // Register the information for the entry associated with the destructor.
1997     Out.clear();
1998     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1999         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2000         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2001   }
2002   return CGM.getLangOpts().OpenMPIsDevice;
2003 }
2004 
2005 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2006                                                           QualType VarType,
2007                                                           StringRef Name) {
2008   std::string Suffix = getName({"artificial", ""});
2009   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2010   llvm::Value *GAddr =
2011       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2012   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2013       CGM.getTarget().isTLSSupported()) {
2014     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2015     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2016   }
2017   std::string CacheSuffix = getName({"cache", ""});
2018   llvm::Value *Args[] = {
2019       emitUpdateLocation(CGF, SourceLocation()),
2020       getThreadID(CGF, SourceLocation()),
2021       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2022       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2023                                 /*isSigned=*/false),
2024       getOrCreateInternalVariable(
2025           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2026   return Address(
2027       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2028           CGF.EmitRuntimeCall(
2029               OMPBuilder.getOrCreateRuntimeFunction(
2030                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2031               Args),
2032           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2033       CGM.getContext().getTypeAlignInChars(VarType));
2034 }
2035 
2036 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2037                                    const RegionCodeGenTy &ThenGen,
2038                                    const RegionCodeGenTy &ElseGen) {
2039   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2040 
2041   // If the condition constant folds and can be elided, try to avoid emitting
2042   // the condition and the dead arm of the if/else.
2043   bool CondConstant;
2044   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2045     if (CondConstant)
2046       ThenGen(CGF);
2047     else
2048       ElseGen(CGF);
2049     return;
2050   }
2051 
2052   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2053   // emit the conditional branch.
2054   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2055   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2056   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2057   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2058 
2059   // Emit the 'then' code.
2060   CGF.EmitBlock(ThenBlock);
2061   ThenGen(CGF);
2062   CGF.EmitBranch(ContBlock);
2063   // Emit the 'else' code if present.
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBlock(ElseBlock);
2067   ElseGen(CGF);
2068   // There is no need to emit line number for unconditional branch.
2069   (void)ApplyDebugLocation::CreateEmpty(CGF);
2070   CGF.EmitBranch(ContBlock);
2071   // Emit the continuation block for code after the if.
2072   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2073 }
2074 
2075 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2076                                        llvm::Function *OutlinedFn,
2077                                        ArrayRef<llvm::Value *> CapturedVars,
2078                                        const Expr *IfCond) {
2079   if (!CGF.HaveInsertPoint())
2080     return;
2081   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2082   auto &M = CGM.getModule();
2083   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2084                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2085     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2086     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2087     llvm::Value *Args[] = {
2088         RTLoc,
2089         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2090         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2091     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2092     RealArgs.append(std::begin(Args), std::end(Args));
2093     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2094 
2095     llvm::FunctionCallee RTLFn =
2096         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2097     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2098   };
2099   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2100                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2101     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2102     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2103     // Build calls:
2104     // __kmpc_serialized_parallel(&Loc, GTid);
2105     llvm::Value *Args[] = {RTLoc, ThreadID};
2106     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2107                             M, OMPRTL___kmpc_serialized_parallel),
2108                         Args);
2109 
2110     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2111     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2112     Address ZeroAddrBound =
2113         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2114                                          /*Name=*/".bound.zero.addr");
2115     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2116     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2117     // ThreadId for serialized parallels is 0.
2118     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2119     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2120     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2121 
2122     // Ensure we do not inline the function. This is trivially true for the ones
2123     // passed to __kmpc_fork_call but the ones called in serialized regions
2124     // could be inlined. This is not a perfect but it is closer to the invariant
2125     // we want, namely, every data environment starts with a new function.
2126     // TODO: We should pass the if condition to the runtime function and do the
2127     //       handling there. Much cleaner code.
2128     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2129     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2130     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2131 
2132     // __kmpc_end_serialized_parallel(&Loc, GTid);
2133     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2134     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2135                             M, OMPRTL___kmpc_end_serialized_parallel),
2136                         EndArgs);
2137   };
2138   if (IfCond) {
2139     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2140   } else {
2141     RegionCodeGenTy ThenRCG(ThenGen);
2142     ThenRCG(CGF);
2143   }
2144 }
2145 
2146 // If we're inside an (outlined) parallel region, use the region info's
2147 // thread-ID variable (it is passed in a first argument of the outlined function
2148 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2149 // regular serial code region, get thread ID by calling kmp_int32
2150 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2151 // return the address of that temp.
2152 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2153                                              SourceLocation Loc) {
2154   if (auto *OMPRegionInfo =
2155           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2156     if (OMPRegionInfo->getThreadIDVariable())
2157       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2158 
2159   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2160   QualType Int32Ty =
2161       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2162   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2163   CGF.EmitStoreOfScalar(ThreadID,
2164                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2165 
2166   return ThreadIDTemp;
2167 }
2168 
2169 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2170     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2171   SmallString<256> Buffer;
2172   llvm::raw_svector_ostream Out(Buffer);
2173   Out << Name;
2174   StringRef RuntimeName = Out.str();
2175   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2176   if (Elem.second) {
2177     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2178            "OMP internal variable has different type than requested");
2179     return &*Elem.second;
2180   }
2181 
2182   return Elem.second = new llvm::GlobalVariable(
2183              CGM.getModule(), Ty, /*IsConstant*/ false,
2184              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2185              Elem.first(), /*InsertBefore=*/nullptr,
2186              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2187 }
2188 
2189 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2190   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2191   std::string Name = getName({Prefix, "var"});
2192   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2193 }
2194 
2195 namespace {
2196 /// Common pre(post)-action for different OpenMP constructs.
2197 class CommonActionTy final : public PrePostActionTy {
2198   llvm::FunctionCallee EnterCallee;
2199   ArrayRef<llvm::Value *> EnterArgs;
2200   llvm::FunctionCallee ExitCallee;
2201   ArrayRef<llvm::Value *> ExitArgs;
2202   bool Conditional;
2203   llvm::BasicBlock *ContBlock = nullptr;
2204 
2205 public:
2206   CommonActionTy(llvm::FunctionCallee EnterCallee,
2207                  ArrayRef<llvm::Value *> EnterArgs,
2208                  llvm::FunctionCallee ExitCallee,
2209                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2210       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2211         ExitArgs(ExitArgs), Conditional(Conditional) {}
2212   void Enter(CodeGenFunction &CGF) override {
2213     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2214     if (Conditional) {
2215       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2216       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2217       ContBlock = CGF.createBasicBlock("omp_if.end");
2218       // Generate the branch (If-stmt)
2219       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2220       CGF.EmitBlock(ThenBlock);
2221     }
2222   }
2223   void Done(CodeGenFunction &CGF) {
2224     // Emit the rest of blocks/branches
2225     CGF.EmitBranch(ContBlock);
2226     CGF.EmitBlock(ContBlock, true);
2227   }
2228   void Exit(CodeGenFunction &CGF) override {
2229     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2230   }
2231 };
2232 } // anonymous namespace
2233 
2234 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2235                                          StringRef CriticalName,
2236                                          const RegionCodeGenTy &CriticalOpGen,
2237                                          SourceLocation Loc, const Expr *Hint) {
2238   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2239   // CriticalOpGen();
2240   // __kmpc_end_critical(ident_t *, gtid, Lock);
2241   // Prepare arguments and build a call to __kmpc_critical
2242   if (!CGF.HaveInsertPoint())
2243     return;
2244   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2245                          getCriticalRegionLock(CriticalName)};
2246   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2247                                                 std::end(Args));
2248   if (Hint) {
2249     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2250         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2251   }
2252   CommonActionTy Action(
2253       OMPBuilder.getOrCreateRuntimeFunction(
2254           CGM.getModule(),
2255           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2256       EnterArgs,
2257       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2258                                             OMPRTL___kmpc_end_critical),
2259       Args);
2260   CriticalOpGen.setAction(Action);
2261   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2262 }
2263 
2264 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2265                                        const RegionCodeGenTy &MasterOpGen,
2266                                        SourceLocation Loc) {
2267   if (!CGF.HaveInsertPoint())
2268     return;
2269   // if(__kmpc_master(ident_t *, gtid)) {
2270   //   MasterOpGen();
2271   //   __kmpc_end_master(ident_t *, gtid);
2272   // }
2273   // Prepare arguments and build a call to __kmpc_master
2274   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276                             CGM.getModule(), OMPRTL___kmpc_master),
2277                         Args,
2278                         OMPBuilder.getOrCreateRuntimeFunction(
2279                             CGM.getModule(), OMPRTL___kmpc_end_master),
2280                         Args,
2281                         /*Conditional=*/true);
2282   MasterOpGen.setAction(Action);
2283   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2284   Action.Done(CGF);
2285 }
2286 
2287 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2288                                        const RegionCodeGenTy &MaskedOpGen,
2289                                        SourceLocation Loc, const Expr *Filter) {
2290   if (!CGF.HaveInsertPoint())
2291     return;
2292   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2293   //   MaskedOpGen();
2294   //   __kmpc_end_masked(iden_t *, gtid);
2295   // }
2296   // Prepare arguments and build a call to __kmpc_masked
2297   llvm::Value *FilterVal = Filter
2298                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2299                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2300   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2301                          FilterVal};
2302   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2303                             getThreadID(CGF, Loc)};
2304   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2305                             CGM.getModule(), OMPRTL___kmpc_masked),
2306                         Args,
2307                         OMPBuilder.getOrCreateRuntimeFunction(
2308                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2309                         ArgsEnd,
2310                         /*Conditional=*/true);
2311   MaskedOpGen.setAction(Action);
2312   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2313   Action.Done(CGF);
2314 }
2315 
2316 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2317                                         SourceLocation Loc) {
2318   if (!CGF.HaveInsertPoint())
2319     return;
2320   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2321     OMPBuilder.createTaskyield(CGF.Builder);
2322   } else {
2323     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2324     llvm::Value *Args[] = {
2325         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2326         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2327     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2328                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2329                         Args);
2330   }
2331 
2332   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2333     Region->emitUntiedSwitch(CGF);
2334 }
2335 
2336 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2337                                           const RegionCodeGenTy &TaskgroupOpGen,
2338                                           SourceLocation Loc) {
2339   if (!CGF.HaveInsertPoint())
2340     return;
2341   // __kmpc_taskgroup(ident_t *, gtid);
2342   // TaskgroupOpGen();
2343   // __kmpc_end_taskgroup(ident_t *, gtid);
2344   // Prepare arguments and build a call to __kmpc_taskgroup
2345   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2346   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2347                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2348                         Args,
2349                         OMPBuilder.getOrCreateRuntimeFunction(
2350                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2351                         Args);
2352   TaskgroupOpGen.setAction(Action);
2353   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2354 }
2355 
2356 /// Given an array of pointers to variables, project the address of a
2357 /// given variable.
2358 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2359                                       unsigned Index, const VarDecl *Var) {
2360   // Pull out the pointer to the variable.
2361   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2362   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2363 
2364   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2365   Addr = CGF.Builder.CreateElementBitCast(
2366       Addr, CGF.ConvertTypeForMem(Var->getType()));
2367   return Addr;
2368 }
2369 
2370 static llvm::Value *emitCopyprivateCopyFunction(
2371     CodeGenModule &CGM, llvm::Type *ArgsType,
2372     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2373     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2374     SourceLocation Loc) {
2375   ASTContext &C = CGM.getContext();
2376   // void copy_func(void *LHSArg, void *RHSArg);
2377   FunctionArgList Args;
2378   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2379                            ImplicitParamDecl::Other);
2380   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2381                            ImplicitParamDecl::Other);
2382   Args.push_back(&LHSArg);
2383   Args.push_back(&RHSArg);
2384   const auto &CGFI =
2385       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2386   std::string Name =
2387       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2388   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2389                                     llvm::GlobalValue::InternalLinkage, Name,
2390                                     &CGM.getModule());
2391   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2392   Fn->setDoesNotRecurse();
2393   CodeGenFunction CGF(CGM);
2394   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2395   // Dest = (void*[n])(LHSArg);
2396   // Src = (void*[n])(RHSArg);
2397   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2398       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2399       ArgsType), CGF.getPointerAlign());
2400   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2401       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2402       ArgsType), CGF.getPointerAlign());
2403   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2404   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2405   // ...
2406   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2407   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2408     const auto *DestVar =
2409         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2410     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2411 
2412     const auto *SrcVar =
2413         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2414     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2415 
2416     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2417     QualType Type = VD->getType();
2418     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2419   }
2420   CGF.FinishFunction();
2421   return Fn;
2422 }
2423 
2424 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2425                                        const RegionCodeGenTy &SingleOpGen,
2426                                        SourceLocation Loc,
2427                                        ArrayRef<const Expr *> CopyprivateVars,
2428                                        ArrayRef<const Expr *> SrcExprs,
2429                                        ArrayRef<const Expr *> DstExprs,
2430                                        ArrayRef<const Expr *> AssignmentOps) {
2431   if (!CGF.HaveInsertPoint())
2432     return;
2433   assert(CopyprivateVars.size() == SrcExprs.size() &&
2434          CopyprivateVars.size() == DstExprs.size() &&
2435          CopyprivateVars.size() == AssignmentOps.size());
2436   ASTContext &C = CGM.getContext();
2437   // int32 did_it = 0;
2438   // if(__kmpc_single(ident_t *, gtid)) {
2439   //   SingleOpGen();
2440   //   __kmpc_end_single(ident_t *, gtid);
2441   //   did_it = 1;
2442   // }
2443   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2444   // <copy_func>, did_it);
2445 
2446   Address DidIt = Address::invalid();
2447   if (!CopyprivateVars.empty()) {
2448     // int32 did_it = 0;
2449     QualType KmpInt32Ty =
2450         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2451     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2452     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2453   }
2454   // Prepare arguments and build a call to __kmpc_single
2455   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2456   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2457                             CGM.getModule(), OMPRTL___kmpc_single),
2458                         Args,
2459                         OMPBuilder.getOrCreateRuntimeFunction(
2460                             CGM.getModule(), OMPRTL___kmpc_end_single),
2461                         Args,
2462                         /*Conditional=*/true);
2463   SingleOpGen.setAction(Action);
2464   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2465   if (DidIt.isValid()) {
2466     // did_it = 1;
2467     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2468   }
2469   Action.Done(CGF);
2470   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2471   // <copy_func>, did_it);
2472   if (DidIt.isValid()) {
2473     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2474     QualType CopyprivateArrayTy = C.getConstantArrayType(
2475         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2476         /*IndexTypeQuals=*/0);
2477     // Create a list of all private variables for copyprivate.
2478     Address CopyprivateList =
2479         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2480     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2481       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2482       CGF.Builder.CreateStore(
2483           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2484               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2485               CGF.VoidPtrTy),
2486           Elem);
2487     }
2488     // Build function that copies private values from single region to all other
2489     // threads in the corresponding parallel region.
2490     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2491         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2492         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2493     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2494     Address CL =
2495       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2496                                                       CGF.VoidPtrTy);
2497     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2498     llvm::Value *Args[] = {
2499         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2500         getThreadID(CGF, Loc),        // i32 <gtid>
2501         BufSize,                      // size_t <buf_size>
2502         CL.getPointer(),              // void *<copyprivate list>
2503         CpyFn,                        // void (*) (void *, void *) <copy_func>
2504         DidItVal                      // i32 did_it
2505     };
2506     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2507                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2508                         Args);
2509   }
2510 }
2511 
2512 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2513                                         const RegionCodeGenTy &OrderedOpGen,
2514                                         SourceLocation Loc, bool IsThreads) {
2515   if (!CGF.HaveInsertPoint())
2516     return;
2517   // __kmpc_ordered(ident_t *, gtid);
2518   // OrderedOpGen();
2519   // __kmpc_end_ordered(ident_t *, gtid);
2520   // Prepare arguments and build a call to __kmpc_ordered
2521   if (IsThreads) {
2522     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2523     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2524                               CGM.getModule(), OMPRTL___kmpc_ordered),
2525                           Args,
2526                           OMPBuilder.getOrCreateRuntimeFunction(
2527                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2528                           Args);
2529     OrderedOpGen.setAction(Action);
2530     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2531     return;
2532   }
2533   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2534 }
2535 
2536 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2537   unsigned Flags;
2538   if (Kind == OMPD_for)
2539     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2540   else if (Kind == OMPD_sections)
2541     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2542   else if (Kind == OMPD_single)
2543     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2544   else if (Kind == OMPD_barrier)
2545     Flags = OMP_IDENT_BARRIER_EXPL;
2546   else
2547     Flags = OMP_IDENT_BARRIER_IMPL;
2548   return Flags;
2549 }
2550 
2551 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2552     CodeGenFunction &CGF, const OMPLoopDirective &S,
2553     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2554   // Check if the loop directive is actually a doacross loop directive. In this
2555   // case choose static, 1 schedule.
2556   if (llvm::any_of(
2557           S.getClausesOfKind<OMPOrderedClause>(),
2558           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2559     ScheduleKind = OMPC_SCHEDULE_static;
2560     // Chunk size is 1 in this case.
2561     llvm::APInt ChunkSize(32, 1);
2562     ChunkExpr = IntegerLiteral::Create(
2563         CGF.getContext(), ChunkSize,
2564         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2565         SourceLocation());
2566   }
2567 }
2568 
2569 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2570                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2571                                       bool ForceSimpleCall) {
2572   // Check if we should use the OMPBuilder
2573   auto *OMPRegionInfo =
2574       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2575   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2576     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2577         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2578     return;
2579   }
2580 
2581   if (!CGF.HaveInsertPoint())
2582     return;
2583   // Build call __kmpc_cancel_barrier(loc, thread_id);
2584   // Build call __kmpc_barrier(loc, thread_id);
2585   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2586   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2587   // thread_id);
2588   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2589                          getThreadID(CGF, Loc)};
2590   if (OMPRegionInfo) {
2591     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2592       llvm::Value *Result = CGF.EmitRuntimeCall(
2593           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2594                                                 OMPRTL___kmpc_cancel_barrier),
2595           Args);
2596       if (EmitChecks) {
2597         // if (__kmpc_cancel_barrier()) {
2598         //   exit from construct;
2599         // }
2600         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2601         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2602         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2603         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2604         CGF.EmitBlock(ExitBB);
2605         //   exit from construct;
2606         CodeGenFunction::JumpDest CancelDestination =
2607             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2608         CGF.EmitBranchThroughCleanup(CancelDestination);
2609         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2610       }
2611       return;
2612     }
2613   }
2614   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2615                           CGM.getModule(), OMPRTL___kmpc_barrier),
2616                       Args);
2617 }
2618 
2619 /// Map the OpenMP loop schedule to the runtime enumeration.
2620 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2621                                           bool Chunked, bool Ordered) {
2622   switch (ScheduleKind) {
2623   case OMPC_SCHEDULE_static:
2624     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2625                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2626   case OMPC_SCHEDULE_dynamic:
2627     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2628   case OMPC_SCHEDULE_guided:
2629     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2630   case OMPC_SCHEDULE_runtime:
2631     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2632   case OMPC_SCHEDULE_auto:
2633     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2634   case OMPC_SCHEDULE_unknown:
2635     assert(!Chunked && "chunk was specified but schedule kind not known");
2636     return Ordered ? OMP_ord_static : OMP_sch_static;
2637   }
2638   llvm_unreachable("Unexpected runtime schedule");
2639 }
2640 
2641 /// Map the OpenMP distribute schedule to the runtime enumeration.
2642 static OpenMPSchedType
2643 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2644   // only static is allowed for dist_schedule
2645   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2646 }
2647 
2648 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2649                                          bool Chunked) const {
2650   OpenMPSchedType Schedule =
2651       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2652   return Schedule == OMP_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticNonchunked(
2656     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2657   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2658   return Schedule == OMP_dist_sch_static;
2659 }
2660 
2661 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2662                                       bool Chunked) const {
2663   OpenMPSchedType Schedule =
2664       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2665   return Schedule == OMP_sch_static_chunked;
2666 }
2667 
2668 bool CGOpenMPRuntime::isStaticChunked(
2669     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2670   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2671   return Schedule == OMP_dist_sch_static_chunked;
2672 }
2673 
2674 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2675   OpenMPSchedType Schedule =
2676       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2677   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2678   return Schedule != OMP_sch_static;
2679 }
2680 
2681 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2682                                   OpenMPScheduleClauseModifier M1,
2683                                   OpenMPScheduleClauseModifier M2) {
2684   int Modifier = 0;
2685   switch (M1) {
2686   case OMPC_SCHEDULE_MODIFIER_monotonic:
2687     Modifier = OMP_sch_modifier_monotonic;
2688     break;
2689   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2690     Modifier = OMP_sch_modifier_nonmonotonic;
2691     break;
2692   case OMPC_SCHEDULE_MODIFIER_simd:
2693     if (Schedule == OMP_sch_static_chunked)
2694       Schedule = OMP_sch_static_balanced_chunked;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_last:
2697   case OMPC_SCHEDULE_MODIFIER_unknown:
2698     break;
2699   }
2700   switch (M2) {
2701   case OMPC_SCHEDULE_MODIFIER_monotonic:
2702     Modifier = OMP_sch_modifier_monotonic;
2703     break;
2704   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2705     Modifier = OMP_sch_modifier_nonmonotonic;
2706     break;
2707   case OMPC_SCHEDULE_MODIFIER_simd:
2708     if (Schedule == OMP_sch_static_chunked)
2709       Schedule = OMP_sch_static_balanced_chunked;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_last:
2712   case OMPC_SCHEDULE_MODIFIER_unknown:
2713     break;
2714   }
2715   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2716   // If the static schedule kind is specified or if the ordered clause is
2717   // specified, and if the nonmonotonic modifier is not specified, the effect is
2718   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2719   // modifier is specified, the effect is as if the nonmonotonic modifier is
2720   // specified.
2721   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2722     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2723           Schedule == OMP_sch_static_balanced_chunked ||
2724           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2725           Schedule == OMP_dist_sch_static_chunked ||
2726           Schedule == OMP_dist_sch_static))
2727       Modifier = OMP_sch_modifier_nonmonotonic;
2728   }
2729   return Schedule | Modifier;
2730 }
2731 
2732 void CGOpenMPRuntime::emitForDispatchInit(
2733     CodeGenFunction &CGF, SourceLocation Loc,
2734     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2735     bool Ordered, const DispatchRTInput &DispatchValues) {
2736   if (!CGF.HaveInsertPoint())
2737     return;
2738   OpenMPSchedType Schedule = getRuntimeSchedule(
2739       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2740   assert(Ordered ||
2741          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2742           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2743           Schedule != OMP_sch_static_balanced_chunked));
2744   // Call __kmpc_dispatch_init(
2745   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2746   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2747   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2748 
2749   // If the Chunk was not specified in the clause - use default value 1.
2750   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2751                                             : CGF.Builder.getIntN(IVSize, 1);
2752   llvm::Value *Args[] = {
2753       emitUpdateLocation(CGF, Loc),
2754       getThreadID(CGF, Loc),
2755       CGF.Builder.getInt32(addMonoNonMonoModifier(
2756           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2757       DispatchValues.LB,                                     // Lower
2758       DispatchValues.UB,                                     // Upper
2759       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2760       Chunk                                                  // Chunk
2761   };
2762   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2763 }
2764 
2765 static void emitForStaticInitCall(
2766     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2767     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2768     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2769     const CGOpenMPRuntime::StaticRTInput &Values) {
2770   if (!CGF.HaveInsertPoint())
2771     return;
2772 
2773   assert(!Values.Ordered);
2774   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2775          Schedule == OMP_sch_static_balanced_chunked ||
2776          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2777          Schedule == OMP_dist_sch_static ||
2778          Schedule == OMP_dist_sch_static_chunked);
2779 
2780   // Call __kmpc_for_static_init(
2781   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2782   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2783   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2784   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2785   llvm::Value *Chunk = Values.Chunk;
2786   if (Chunk == nullptr) {
2787     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2788             Schedule == OMP_dist_sch_static) &&
2789            "expected static non-chunked schedule");
2790     // If the Chunk was not specified in the clause - use default value 1.
2791     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2792   } else {
2793     assert((Schedule == OMP_sch_static_chunked ||
2794             Schedule == OMP_sch_static_balanced_chunked ||
2795             Schedule == OMP_ord_static_chunked ||
2796             Schedule == OMP_dist_sch_static_chunked) &&
2797            "expected static chunked schedule");
2798   }
2799   llvm::Value *Args[] = {
2800       UpdateLocation,
2801       ThreadId,
2802       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2803                                                   M2)), // Schedule type
2804       Values.IL.getPointer(),                           // &isLastIter
2805       Values.LB.getPointer(),                           // &LB
2806       Values.UB.getPointer(),                           // &UB
2807       Values.ST.getPointer(),                           // &Stride
2808       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2809       Chunk                                             // Chunk
2810   };
2811   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2812 }
2813 
2814 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2815                                         SourceLocation Loc,
2816                                         OpenMPDirectiveKind DKind,
2817                                         const OpenMPScheduleTy &ScheduleKind,
2818                                         const StaticRTInput &Values) {
2819   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2820       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2821   assert(isOpenMPWorksharingDirective(DKind) &&
2822          "Expected loop-based or sections-based directive.");
2823   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2824                                              isOpenMPLoopDirective(DKind)
2825                                                  ? OMP_IDENT_WORK_LOOP
2826                                                  : OMP_IDENT_WORK_SECTIONS);
2827   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2828   llvm::FunctionCallee StaticInitFunction =
2829       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2830   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2831   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2832                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2833 }
2834 
2835 void CGOpenMPRuntime::emitDistributeStaticInit(
2836     CodeGenFunction &CGF, SourceLocation Loc,
2837     OpenMPDistScheduleClauseKind SchedKind,
2838     const CGOpenMPRuntime::StaticRTInput &Values) {
2839   OpenMPSchedType ScheduleNum =
2840       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2841   llvm::Value *UpdatedLocation =
2842       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2843   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2844   llvm::FunctionCallee StaticInitFunction =
2845       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2846   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2847                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2848                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2849 }
2850 
2851 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2852                                           SourceLocation Loc,
2853                                           OpenMPDirectiveKind DKind) {
2854   if (!CGF.HaveInsertPoint())
2855     return;
2856   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2857   llvm::Value *Args[] = {
2858       emitUpdateLocation(CGF, Loc,
2859                          isOpenMPDistributeDirective(DKind)
2860                              ? OMP_IDENT_WORK_DISTRIBUTE
2861                              : isOpenMPLoopDirective(DKind)
2862                                    ? OMP_IDENT_WORK_LOOP
2863                                    : OMP_IDENT_WORK_SECTIONS),
2864       getThreadID(CGF, Loc)};
2865   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2866   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2867                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2868                       Args);
2869 }
2870 
2871 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2872                                                  SourceLocation Loc,
2873                                                  unsigned IVSize,
2874                                                  bool IVSigned) {
2875   if (!CGF.HaveInsertPoint())
2876     return;
2877   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2878   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2879   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2880 }
2881 
2882 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2883                                           SourceLocation Loc, unsigned IVSize,
2884                                           bool IVSigned, Address IL,
2885                                           Address LB, Address UB,
2886                                           Address ST) {
2887   // Call __kmpc_dispatch_next(
2888   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2889   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2890   //          kmp_int[32|64] *p_stride);
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc),
2893       getThreadID(CGF, Loc),
2894       IL.getPointer(), // &isLastIter
2895       LB.getPointer(), // &Lower
2896       UB.getPointer(), // &Upper
2897       ST.getPointer()  // &Stride
2898   };
2899   llvm::Value *Call =
2900       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2901   return CGF.EmitScalarConversion(
2902       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2903       CGF.getContext().BoolTy, Loc);
2904 }
2905 
2906 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2907                                            llvm::Value *NumThreads,
2908                                            SourceLocation Loc) {
2909   if (!CGF.HaveInsertPoint())
2910     return;
2911   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2912   llvm::Value *Args[] = {
2913       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2914       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2915   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2916                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2917                       Args);
2918 }
2919 
2920 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2921                                          ProcBindKind ProcBind,
2922                                          SourceLocation Loc) {
2923   if (!CGF.HaveInsertPoint())
2924     return;
2925   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2926   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2927   llvm::Value *Args[] = {
2928       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2929       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2930   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2931                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2932                       Args);
2933 }
2934 
2935 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2936                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2937   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2938     OMPBuilder.createFlush(CGF.Builder);
2939   } else {
2940     if (!CGF.HaveInsertPoint())
2941       return;
2942     // Build call void __kmpc_flush(ident_t *loc)
2943     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2944                             CGM.getModule(), OMPRTL___kmpc_flush),
2945                         emitUpdateLocation(CGF, Loc));
2946   }
2947 }
2948 
2949 namespace {
2950 /// Indexes of fields for type kmp_task_t.
2951 enum KmpTaskTFields {
2952   /// List of shared variables.
2953   KmpTaskTShareds,
2954   /// Task routine.
2955   KmpTaskTRoutine,
2956   /// Partition id for the untied tasks.
2957   KmpTaskTPartId,
2958   /// Function with call of destructors for private variables.
2959   Data1,
2960   /// Task priority.
2961   Data2,
2962   /// (Taskloops only) Lower bound.
2963   KmpTaskTLowerBound,
2964   /// (Taskloops only) Upper bound.
2965   KmpTaskTUpperBound,
2966   /// (Taskloops only) Stride.
2967   KmpTaskTStride,
2968   /// (Taskloops only) Is last iteration flag.
2969   KmpTaskTLastIter,
2970   /// (Taskloops only) Reduction data.
2971   KmpTaskTReductions,
2972 };
2973 } // anonymous namespace
2974 
2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2976   return OffloadEntriesTargetRegion.empty() &&
2977          OffloadEntriesDeviceGlobalVar.empty();
2978 }
2979 
2980 /// Initialize target region entry.
2981 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2982     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2983                                     StringRef ParentName, unsigned LineNum,
2984                                     unsigned Order) {
2985   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2986                                              "only required for the device "
2987                                              "code generation.");
2988   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2989       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2990                                    OMPTargetRegionEntryTargetRegion);
2991   ++OffloadingEntriesNum;
2992 }
2993 
2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2995     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2996                                   StringRef ParentName, unsigned LineNum,
2997                                   llvm::Constant *Addr, llvm::Constant *ID,
2998                                   OMPTargetRegionEntryKind Flags) {
2999   // If we are emitting code for a target, the entry is already initialized,
3000   // only has to be registered.
3001   if (CGM.getLangOpts().OpenMPIsDevice) {
3002     // This could happen if the device compilation is invoked standalone.
3003     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3004       return;
3005     auto &Entry =
3006         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3007     Entry.setAddress(Addr);
3008     Entry.setID(ID);
3009     Entry.setFlags(Flags);
3010   } else {
3011     if (Flags ==
3012             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3013         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3014                                  /*IgnoreAddressId*/ true))
3015       return;
3016     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3017            "Target region entry already registered!");
3018     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3019     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3020     ++OffloadingEntriesNum;
3021   }
3022 }
3023 
3024 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3025     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3026     bool IgnoreAddressId) const {
3027   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3028   if (PerDevice == OffloadEntriesTargetRegion.end())
3029     return false;
3030   auto PerFile = PerDevice->second.find(FileID);
3031   if (PerFile == PerDevice->second.end())
3032     return false;
3033   auto PerParentName = PerFile->second.find(ParentName);
3034   if (PerParentName == PerFile->second.end())
3035     return false;
3036   auto PerLine = PerParentName->second.find(LineNum);
3037   if (PerLine == PerParentName->second.end())
3038     return false;
3039   // Fail if this entry is already registered.
3040   if (!IgnoreAddressId &&
3041       (PerLine->second.getAddress() || PerLine->second.getID()))
3042     return false;
3043   return true;
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3047     const OffloadTargetRegionEntryInfoActTy &Action) {
3048   // Scan all target region entries and perform the provided action.
3049   for (const auto &D : OffloadEntriesTargetRegion)
3050     for (const auto &F : D.second)
3051       for (const auto &P : F.second)
3052         for (const auto &L : P.second)
3053           Action(D.first, F.first, P.first(), L.first, L.second);
3054 }
3055 
3056 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3057     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3058                                        OMPTargetGlobalVarEntryKind Flags,
3059                                        unsigned Order) {
3060   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3061                                              "only required for the device "
3062                                              "code generation.");
3063   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3064   ++OffloadingEntriesNum;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3068     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3069                                      CharUnits VarSize,
3070                                      OMPTargetGlobalVarEntryKind Flags,
3071                                      llvm::GlobalValue::LinkageTypes Linkage) {
3072   if (CGM.getLangOpts().OpenMPIsDevice) {
3073     // This could happen if the device compilation is invoked standalone.
3074     if (!hasDeviceGlobalVarEntryInfo(VarName))
3075       return;
3076     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3077     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3078       if (Entry.getVarSize().isZero()) {
3079         Entry.setVarSize(VarSize);
3080         Entry.setLinkage(Linkage);
3081       }
3082       return;
3083     }
3084     Entry.setVarSize(VarSize);
3085     Entry.setLinkage(Linkage);
3086     Entry.setAddress(Addr);
3087   } else {
3088     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3089       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3090       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3091              "Entry not initialized!");
3092       if (Entry.getVarSize().isZero()) {
3093         Entry.setVarSize(VarSize);
3094         Entry.setLinkage(Linkage);
3095       }
3096       return;
3097     }
3098     OffloadEntriesDeviceGlobalVar.try_emplace(
3099         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3100     ++OffloadingEntriesNum;
3101   }
3102 }
3103 
3104 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3105     actOnDeviceGlobalVarEntriesInfo(
3106         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3107   // Scan all target region entries and perform the provided action.
3108   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3109     Action(E.getKey(), E.getValue());
3110 }
3111 
3112 void CGOpenMPRuntime::createOffloadEntry(
3113     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3114     llvm::GlobalValue::LinkageTypes Linkage) {
3115   StringRef Name = Addr->getName();
3116   llvm::Module &M = CGM.getModule();
3117   llvm::LLVMContext &C = M.getContext();
3118 
3119   // Create constant string with the name.
3120   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3121 
3122   std::string StringName = getName({"omp_offloading", "entry_name"});
3123   auto *Str = new llvm::GlobalVariable(
3124       M, StrPtrInit->getType(), /*isConstant=*/true,
3125       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3126   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3127 
3128   llvm::Constant *Data[] = {
3129       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3130       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3131       llvm::ConstantInt::get(CGM.SizeTy, Size),
3132       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3133       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3134   std::string EntryName = getName({"omp_offloading", "entry", ""});
3135   llvm::GlobalVariable *Entry = createGlobalStruct(
3136       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3137       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3138 
3139   // The entry has to be created in the section the linker expects it to be.
3140   Entry->setSection("omp_offloading_entries");
3141 }
3142 
3143 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3144   // Emit the offloading entries and metadata so that the device codegen side
3145   // can easily figure out what to emit. The produced metadata looks like
3146   // this:
3147   //
3148   // !omp_offload.info = !{!1, ...}
3149   //
3150   // Right now we only generate metadata for function that contain target
3151   // regions.
3152 
3153   // If we are in simd mode or there are no entries, we don't need to do
3154   // anything.
3155   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3156     return;
3157 
3158   llvm::Module &M = CGM.getModule();
3159   llvm::LLVMContext &C = M.getContext();
3160   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3161                          SourceLocation, StringRef>,
3162               16>
3163       OrderedEntries(OffloadEntriesInfoManager.size());
3164   llvm::SmallVector<StringRef, 16> ParentFunctions(
3165       OffloadEntriesInfoManager.size());
3166 
3167   // Auxiliary methods to create metadata values and strings.
3168   auto &&GetMDInt = [this](unsigned V) {
3169     return llvm::ConstantAsMetadata::get(
3170         llvm::ConstantInt::get(CGM.Int32Ty, V));
3171   };
3172 
3173   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3174 
3175   // Create the offloading info metadata node.
3176   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3177 
3178   // Create function that emits metadata for each target region entry;
3179   auto &&TargetRegionMetadataEmitter =
3180       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3181        &GetMDString](
3182           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3183           unsigned Line,
3184           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3185         // Generate metadata for target regions. Each entry of this metadata
3186         // contains:
3187         // - Entry 0 -> Kind of this type of metadata (0).
3188         // - Entry 1 -> Device ID of the file where the entry was identified.
3189         // - Entry 2 -> File ID of the file where the entry was identified.
3190         // - Entry 3 -> Mangled name of the function where the entry was
3191         // identified.
3192         // - Entry 4 -> Line in the file where the entry was identified.
3193         // - Entry 5 -> Order the entry was created.
3194         // The first element of the metadata node is the kind.
3195         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3196                                  GetMDInt(FileID),      GetMDString(ParentName),
3197                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3198 
3199         SourceLocation Loc;
3200         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3201                   E = CGM.getContext().getSourceManager().fileinfo_end();
3202              I != E; ++I) {
3203           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3204               I->getFirst()->getUniqueID().getFile() == FileID) {
3205             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3206                 I->getFirst(), Line, 1);
3207             break;
3208           }
3209         }
3210         // Save this entry in the right position of the ordered entries array.
3211         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3212         ParentFunctions[E.getOrder()] = ParentName;
3213 
3214         // Add metadata to the named metadata node.
3215         MD->addOperand(llvm::MDNode::get(C, Ops));
3216       };
3217 
3218   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3219       TargetRegionMetadataEmitter);
3220 
3221   // Create function that emits metadata for each device global variable entry;
3222   auto &&DeviceGlobalVarMetadataEmitter =
3223       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3224        MD](StringRef MangledName,
3225            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3226                &E) {
3227         // Generate metadata for global variables. Each entry of this metadata
3228         // contains:
3229         // - Entry 0 -> Kind of this type of metadata (1).
3230         // - Entry 1 -> Mangled name of the variable.
3231         // - Entry 2 -> Declare target kind.
3232         // - Entry 3 -> Order the entry was created.
3233         // The first element of the metadata node is the kind.
3234         llvm::Metadata *Ops[] = {
3235             GetMDInt(E.getKind()), GetMDString(MangledName),
3236             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3237 
3238         // Save this entry in the right position of the ordered entries array.
3239         OrderedEntries[E.getOrder()] =
3240             std::make_tuple(&E, SourceLocation(), MangledName);
3241 
3242         // Add metadata to the named metadata node.
3243         MD->addOperand(llvm::MDNode::get(C, Ops));
3244       };
3245 
3246   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3247       DeviceGlobalVarMetadataEmitter);
3248 
3249   for (const auto &E : OrderedEntries) {
3250     assert(std::get<0>(E) && "All ordered entries must exist!");
3251     if (const auto *CE =
3252             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3253                 std::get<0>(E))) {
3254       if (!CE->getID() || !CE->getAddress()) {
3255         // Do not blame the entry if the parent funtion is not emitted.
3256         StringRef FnName = ParentFunctions[CE->getOrder()];
3257         if (!CGM.GetGlobalValue(FnName))
3258           continue;
3259         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3260             DiagnosticsEngine::Error,
3261             "Offloading entry for target region in %0 is incorrect: either the "
3262             "address or the ID is invalid.");
3263         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3264         continue;
3265       }
3266       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3267                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3268     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3269                                              OffloadEntryInfoDeviceGlobalVar>(
3270                    std::get<0>(E))) {
3271       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3272           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3273               CE->getFlags());
3274       switch (Flags) {
3275       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3276         if (CGM.getLangOpts().OpenMPIsDevice &&
3277             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3278           continue;
3279         if (!CE->getAddress()) {
3280           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281               DiagnosticsEngine::Error, "Offloading entry for declare target "
3282                                         "variable %0 is incorrect: the "
3283                                         "address is invalid.");
3284           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3285           continue;
3286         }
3287         // The vaiable has no definition - no need to add the entry.
3288         if (CE->getVarSize().isZero())
3289           continue;
3290         break;
3291       }
3292       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3293         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3294                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3295                "Declaret target link address is set.");
3296         if (CGM.getLangOpts().OpenMPIsDevice)
3297           continue;
3298         if (!CE->getAddress()) {
3299           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3300               DiagnosticsEngine::Error,
3301               "Offloading entry for declare target variable is incorrect: the "
3302               "address is invalid.");
3303           CGM.getDiags().Report(DiagID);
3304           continue;
3305         }
3306         break;
3307       }
3308       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3309                          CE->getVarSize().getQuantity(), Flags,
3310                          CE->getLinkage());
3311     } else {
3312       llvm_unreachable("Unsupported entry kind.");
3313     }
3314   }
3315 }
3316 
3317 /// Loads all the offload entries information from the host IR
3318 /// metadata.
3319 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3320   // If we are in target mode, load the metadata from the host IR. This code has
3321   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3322 
3323   if (!CGM.getLangOpts().OpenMPIsDevice)
3324     return;
3325 
3326   if (CGM.getLangOpts().OMPHostIRFile.empty())
3327     return;
3328 
3329   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3330   if (auto EC = Buf.getError()) {
3331     CGM.getDiags().Report(diag::err_cannot_open_file)
3332         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3333     return;
3334   }
3335 
3336   llvm::LLVMContext C;
3337   auto ME = expectedToErrorOrAndEmitErrors(
3338       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3339 
3340   if (auto EC = ME.getError()) {
3341     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3342         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3343     CGM.getDiags().Report(DiagID)
3344         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3345     return;
3346   }
3347 
3348   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3349   if (!MD)
3350     return;
3351 
3352   for (llvm::MDNode *MN : MD->operands()) {
3353     auto &&GetMDInt = [MN](unsigned Idx) {
3354       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3355       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3356     };
3357 
3358     auto &&GetMDString = [MN](unsigned Idx) {
3359       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3360       return V->getString();
3361     };
3362 
3363     switch (GetMDInt(0)) {
3364     default:
3365       llvm_unreachable("Unexpected metadata!");
3366       break;
3367     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3368         OffloadingEntryInfoTargetRegion:
3369       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3370           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3371           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3372           /*Order=*/GetMDInt(5));
3373       break;
3374     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375         OffloadingEntryInfoDeviceGlobalVar:
3376       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3377           /*MangledName=*/GetMDString(1),
3378           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3379               /*Flags=*/GetMDInt(2)),
3380           /*Order=*/GetMDInt(3));
3381       break;
3382     }
3383   }
3384 }
3385 
3386 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3387   if (!KmpRoutineEntryPtrTy) {
3388     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3389     ASTContext &C = CGM.getContext();
3390     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3391     FunctionProtoType::ExtProtoInfo EPI;
3392     KmpRoutineEntryPtrQTy = C.getPointerType(
3393         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3394     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3395   }
3396 }
3397 
3398 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3399   // Make sure the type of the entry is already created. This is the type we
3400   // have to create:
3401   // struct __tgt_offload_entry{
3402   //   void      *addr;       // Pointer to the offload entry info.
3403   //                          // (function or global)
3404   //   char      *name;       // Name of the function or global.
3405   //   size_t     size;       // Size of the entry info (0 if it a function).
3406   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3407   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3408   // };
3409   if (TgtOffloadEntryQTy.isNull()) {
3410     ASTContext &C = CGM.getContext();
3411     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3412     RD->startDefinition();
3413     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3414     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3415     addFieldToRecordDecl(C, RD, C.getSizeType());
3416     addFieldToRecordDecl(
3417         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3418     addFieldToRecordDecl(
3419         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3420     RD->completeDefinition();
3421     RD->addAttr(PackedAttr::CreateImplicit(C));
3422     TgtOffloadEntryQTy = C.getRecordType(RD);
3423   }
3424   return TgtOffloadEntryQTy;
3425 }
3426 
3427 namespace {
3428 struct PrivateHelpersTy {
3429   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3430                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3431       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3432         PrivateElemInit(PrivateElemInit) {}
3433   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3434   const Expr *OriginalRef = nullptr;
3435   const VarDecl *Original = nullptr;
3436   const VarDecl *PrivateCopy = nullptr;
3437   const VarDecl *PrivateElemInit = nullptr;
3438   bool isLocalPrivate() const {
3439     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3440   }
3441 };
3442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3443 } // anonymous namespace
3444 
3445 static bool isAllocatableDecl(const VarDecl *VD) {
3446   const VarDecl *CVD = VD->getCanonicalDecl();
3447   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3448     return false;
3449   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3450   // Use the default allocation.
3451   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3452             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3453            !AA->getAllocator());
3454 }
3455 
3456 static RecordDecl *
3457 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3458   if (!Privates.empty()) {
3459     ASTContext &C = CGM.getContext();
3460     // Build struct .kmp_privates_t. {
3461     //         /*  private vars  */
3462     //       };
3463     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3464     RD->startDefinition();
3465     for (const auto &Pair : Privates) {
3466       const VarDecl *VD = Pair.second.Original;
3467       QualType Type = VD->getType().getNonReferenceType();
3468       // If the private variable is a local variable with lvalue ref type,
3469       // allocate the pointer instead of the pointee type.
3470       if (Pair.second.isLocalPrivate()) {
3471         if (VD->getType()->isLValueReferenceType())
3472           Type = C.getPointerType(Type);
3473         if (isAllocatableDecl(VD))
3474           Type = C.getPointerType(Type);
3475       }
3476       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3477       if (VD->hasAttrs()) {
3478         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3479              E(VD->getAttrs().end());
3480              I != E; ++I)
3481           FD->addAttr(*I);
3482       }
3483     }
3484     RD->completeDefinition();
3485     return RD;
3486   }
3487   return nullptr;
3488 }
3489 
3490 static RecordDecl *
3491 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3492                          QualType KmpInt32Ty,
3493                          QualType KmpRoutineEntryPointerQTy) {
3494   ASTContext &C = CGM.getContext();
3495   // Build struct kmp_task_t {
3496   //         void *              shareds;
3497   //         kmp_routine_entry_t routine;
3498   //         kmp_int32           part_id;
3499   //         kmp_cmplrdata_t data1;
3500   //         kmp_cmplrdata_t data2;
3501   // For taskloops additional fields:
3502   //         kmp_uint64          lb;
3503   //         kmp_uint64          ub;
3504   //         kmp_int64           st;
3505   //         kmp_int32           liter;
3506   //         void *              reductions;
3507   //       };
3508   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3509   UD->startDefinition();
3510   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3511   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3512   UD->completeDefinition();
3513   QualType KmpCmplrdataTy = C.getRecordType(UD);
3514   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3515   RD->startDefinition();
3516   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3517   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3518   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3519   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3520   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3521   if (isOpenMPTaskLoopDirective(Kind)) {
3522     QualType KmpUInt64Ty =
3523         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3524     QualType KmpInt64Ty =
3525         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3526     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3527     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3528     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3529     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3530     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3531   }
3532   RD->completeDefinition();
3533   return RD;
3534 }
3535 
3536 static RecordDecl *
3537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3538                                      ArrayRef<PrivateDataTy> Privates) {
3539   ASTContext &C = CGM.getContext();
3540   // Build struct kmp_task_t_with_privates {
3541   //         kmp_task_t task_data;
3542   //         .kmp_privates_t. privates;
3543   //       };
3544   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3545   RD->startDefinition();
3546   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3547   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3548     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3549   RD->completeDefinition();
3550   return RD;
3551 }
3552 
3553 /// Emit a proxy function which accepts kmp_task_t as the second
3554 /// argument.
3555 /// \code
3556 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3557 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3558 ///   For taskloops:
3559 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3560 ///   tt->reductions, tt->shareds);
3561 ///   return 0;
3562 /// }
3563 /// \endcode
3564 static llvm::Function *
3565 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3566                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3567                       QualType KmpTaskTWithPrivatesPtrQTy,
3568                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3569                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3570                       llvm::Value *TaskPrivatesMap) {
3571   ASTContext &C = CGM.getContext();
3572   FunctionArgList Args;
3573   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3574                             ImplicitParamDecl::Other);
3575   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3576                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3577                                 ImplicitParamDecl::Other);
3578   Args.push_back(&GtidArg);
3579   Args.push_back(&TaskTypeArg);
3580   const auto &TaskEntryFnInfo =
3581       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3582   llvm::FunctionType *TaskEntryTy =
3583       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3584   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3585   auto *TaskEntry = llvm::Function::Create(
3586       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3587   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3588   TaskEntry->setDoesNotRecurse();
3589   CodeGenFunction CGF(CGM);
3590   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3591                     Loc, Loc);
3592 
3593   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3594   // tt,
3595   // For taskloops:
3596   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3597   // tt->task_data.shareds);
3598   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3599       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3600   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3601       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3602       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3603   const auto *KmpTaskTWithPrivatesQTyRD =
3604       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3605   LValue Base =
3606       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3607   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3608   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3609   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3610   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3611 
3612   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3613   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3614   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3615       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3616       CGF.ConvertTypeForMem(SharedsPtrTy));
3617 
3618   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3619   llvm::Value *PrivatesParam;
3620   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3621     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3622     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3623         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3624   } else {
3625     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3626   }
3627 
3628   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3629                                TaskPrivatesMap,
3630                                CGF.Builder
3631                                    .CreatePointerBitCastOrAddrSpaceCast(
3632                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3633                                    .getPointer()};
3634   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3635                                           std::end(CommonArgs));
3636   if (isOpenMPTaskLoopDirective(Kind)) {
3637     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3638     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3639     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3640     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3641     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3642     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3643     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3644     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3645     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3646     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3647     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3648     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3649     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3650     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3651     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3652     CallArgs.push_back(LBParam);
3653     CallArgs.push_back(UBParam);
3654     CallArgs.push_back(StParam);
3655     CallArgs.push_back(LIParam);
3656     CallArgs.push_back(RParam);
3657   }
3658   CallArgs.push_back(SharedsParam);
3659 
3660   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3661                                                   CallArgs);
3662   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3663                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3664   CGF.FinishFunction();
3665   return TaskEntry;
3666 }
3667 
3668 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3669                                             SourceLocation Loc,
3670                                             QualType KmpInt32Ty,
3671                                             QualType KmpTaskTWithPrivatesPtrQTy,
3672                                             QualType KmpTaskTWithPrivatesQTy) {
3673   ASTContext &C = CGM.getContext();
3674   FunctionArgList Args;
3675   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3676                             ImplicitParamDecl::Other);
3677   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3678                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3679                                 ImplicitParamDecl::Other);
3680   Args.push_back(&GtidArg);
3681   Args.push_back(&TaskTypeArg);
3682   const auto &DestructorFnInfo =
3683       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3684   llvm::FunctionType *DestructorFnTy =
3685       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3686   std::string Name =
3687       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3688   auto *DestructorFn =
3689       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3690                              Name, &CGM.getModule());
3691   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3692                                     DestructorFnInfo);
3693   DestructorFn->setDoesNotRecurse();
3694   CodeGenFunction CGF(CGM);
3695   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3696                     Args, Loc, Loc);
3697 
3698   LValue Base = CGF.EmitLoadOfPointerLValue(
3699       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3700       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3701   const auto *KmpTaskTWithPrivatesQTyRD =
3702       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3703   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3704   Base = CGF.EmitLValueForField(Base, *FI);
3705   for (const auto *Field :
3706        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3707     if (QualType::DestructionKind DtorKind =
3708             Field->getType().isDestructedType()) {
3709       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3710       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3711     }
3712   }
3713   CGF.FinishFunction();
3714   return DestructorFn;
3715 }
3716 
3717 /// Emit a privates mapping function for correct handling of private and
3718 /// firstprivate variables.
3719 /// \code
3720 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3721 /// **noalias priv1,...,  <tyn> **noalias privn) {
3722 ///   *priv1 = &.privates.priv1;
3723 ///   ...;
3724 ///   *privn = &.privates.privn;
3725 /// }
3726 /// \endcode
3727 static llvm::Value *
3728 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3729                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3730                                ArrayRef<PrivateDataTy> Privates) {
3731   ASTContext &C = CGM.getContext();
3732   FunctionArgList Args;
3733   ImplicitParamDecl TaskPrivatesArg(
3734       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3735       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3736       ImplicitParamDecl::Other);
3737   Args.push_back(&TaskPrivatesArg);
3738   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3739   unsigned Counter = 1;
3740   for (const Expr *E : Data.PrivateVars) {
3741     Args.push_back(ImplicitParamDecl::Create(
3742         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3743         C.getPointerType(C.getPointerType(E->getType()))
3744             .withConst()
3745             .withRestrict(),
3746         ImplicitParamDecl::Other));
3747     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3748     PrivateVarsPos[VD] = Counter;
3749     ++Counter;
3750   }
3751   for (const Expr *E : Data.FirstprivateVars) {
3752     Args.push_back(ImplicitParamDecl::Create(
3753         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3754         C.getPointerType(C.getPointerType(E->getType()))
3755             .withConst()
3756             .withRestrict(),
3757         ImplicitParamDecl::Other));
3758     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3759     PrivateVarsPos[VD] = Counter;
3760     ++Counter;
3761   }
3762   for (const Expr *E : Data.LastprivateVars) {
3763     Args.push_back(ImplicitParamDecl::Create(
3764         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765         C.getPointerType(C.getPointerType(E->getType()))
3766             .withConst()
3767             .withRestrict(),
3768         ImplicitParamDecl::Other));
3769     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3770     PrivateVarsPos[VD] = Counter;
3771     ++Counter;
3772   }
3773   for (const VarDecl *VD : Data.PrivateLocals) {
3774     QualType Ty = VD->getType().getNonReferenceType();
3775     if (VD->getType()->isLValueReferenceType())
3776       Ty = C.getPointerType(Ty);
3777     if (isAllocatableDecl(VD))
3778       Ty = C.getPointerType(Ty);
3779     Args.push_back(ImplicitParamDecl::Create(
3780         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3782         ImplicitParamDecl::Other));
3783     PrivateVarsPos[VD] = Counter;
3784     ++Counter;
3785   }
3786   const auto &TaskPrivatesMapFnInfo =
3787       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3788   llvm::FunctionType *TaskPrivatesMapTy =
3789       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3790   std::string Name =
3791       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3792   auto *TaskPrivatesMap = llvm::Function::Create(
3793       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3794       &CGM.getModule());
3795   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3796                                     TaskPrivatesMapFnInfo);
3797   if (CGM.getLangOpts().Optimize) {
3798     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3799     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3800     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3801   }
3802   CodeGenFunction CGF(CGM);
3803   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3804                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3805 
3806   // *privi = &.privates.privi;
3807   LValue Base = CGF.EmitLoadOfPointerLValue(
3808       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3809       TaskPrivatesArg.getType()->castAs<PointerType>());
3810   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3811   Counter = 0;
3812   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3813     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3814     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3815     LValue RefLVal =
3816         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3817     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3818         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3819     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3820     ++Counter;
3821   }
3822   CGF.FinishFunction();
3823   return TaskPrivatesMap;
3824 }
3825 
3826 /// Emit initialization for private variables in task-based directives.
3827 static void emitPrivatesInit(CodeGenFunction &CGF,
3828                              const OMPExecutableDirective &D,
3829                              Address KmpTaskSharedsPtr, LValue TDBase,
3830                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3831                              QualType SharedsTy, QualType SharedsPtrTy,
3832                              const OMPTaskDataTy &Data,
3833                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3834   ASTContext &C = CGF.getContext();
3835   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3836   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3837   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3838                                  ? OMPD_taskloop
3839                                  : OMPD_task;
3840   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3841   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3842   LValue SrcBase;
3843   bool IsTargetTask =
3844       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3845       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3846   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3847   // PointersArray, SizesArray, and MappersArray. The original variables for
3848   // these arrays are not captured and we get their addresses explicitly.
3849   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3850       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3851     SrcBase = CGF.MakeAddrLValue(
3852         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3853             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3854         SharedsTy);
3855   }
3856   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3857   for (const PrivateDataTy &Pair : Privates) {
3858     // Do not initialize private locals.
3859     if (Pair.second.isLocalPrivate()) {
3860       ++FI;
3861       continue;
3862     }
3863     const VarDecl *VD = Pair.second.PrivateCopy;
3864     const Expr *Init = VD->getAnyInitializer();
3865     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3866                              !CGF.isTrivialInitializer(Init)))) {
3867       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3868       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3869         const VarDecl *OriginalVD = Pair.second.Original;
3870         // Check if the variable is the target-based BasePointersArray,
3871         // PointersArray, SizesArray, or MappersArray.
3872         LValue SharedRefLValue;
3873         QualType Type = PrivateLValue.getType();
3874         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3875         if (IsTargetTask && !SharedField) {
3876           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3877                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3878                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3879                          ->getNumParams() == 0 &&
3880                  isa<TranslationUnitDecl>(
3881                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3882                          ->getDeclContext()) &&
3883                  "Expected artificial target data variable.");
3884           SharedRefLValue =
3885               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3886         } else if (ForDup) {
3887           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3888           SharedRefLValue = CGF.MakeAddrLValue(
3889               Address(SharedRefLValue.getPointer(CGF),
3890                       C.getDeclAlign(OriginalVD)),
3891               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3892               SharedRefLValue.getTBAAInfo());
3893         } else if (CGF.LambdaCaptureFields.count(
3894                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3895                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3896           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3897         } else {
3898           // Processing for implicitly captured variables.
3899           InlinedOpenMPRegionRAII Region(
3900               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3901               /*HasCancel=*/false, /*NoInheritance=*/true);
3902           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3903         }
3904         if (Type->isArrayType()) {
3905           // Initialize firstprivate array.
3906           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3907             // Perform simple memcpy.
3908             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3909           } else {
3910             // Initialize firstprivate array using element-by-element
3911             // initialization.
3912             CGF.EmitOMPAggregateAssign(
3913                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3914                 Type,
3915                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3916                                                   Address SrcElement) {
3917                   // Clean up any temporaries needed by the initialization.
3918                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3919                   InitScope.addPrivate(
3920                       Elem, [SrcElement]() -> Address { return SrcElement; });
3921                   (void)InitScope.Privatize();
3922                   // Emit initialization for single element.
3923                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3924                       CGF, &CapturesInfo);
3925                   CGF.EmitAnyExprToMem(Init, DestElement,
3926                                        Init->getType().getQualifiers(),
3927                                        /*IsInitializer=*/false);
3928                 });
3929           }
3930         } else {
3931           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3932           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3933             return SharedRefLValue.getAddress(CGF);
3934           });
3935           (void)InitScope.Privatize();
3936           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3937           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3938                              /*capturedByInit=*/false);
3939         }
3940       } else {
3941         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3942       }
3943     }
3944     ++FI;
3945   }
3946 }
3947 
3948 /// Check if duplication function is required for taskloops.
3949 static bool checkInitIsRequired(CodeGenFunction &CGF,
3950                                 ArrayRef<PrivateDataTy> Privates) {
3951   bool InitRequired = false;
3952   for (const PrivateDataTy &Pair : Privates) {
3953     if (Pair.second.isLocalPrivate())
3954       continue;
3955     const VarDecl *VD = Pair.second.PrivateCopy;
3956     const Expr *Init = VD->getAnyInitializer();
3957     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3958                                     !CGF.isTrivialInitializer(Init));
3959     if (InitRequired)
3960       break;
3961   }
3962   return InitRequired;
3963 }
3964 
3965 
3966 /// Emit task_dup function (for initialization of
3967 /// private/firstprivate/lastprivate vars and last_iter flag)
3968 /// \code
3969 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3970 /// lastpriv) {
3971 /// // setup lastprivate flag
3972 ///    task_dst->last = lastpriv;
3973 /// // could be constructor calls here...
3974 /// }
3975 /// \endcode
3976 static llvm::Value *
3977 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3978                     const OMPExecutableDirective &D,
3979                     QualType KmpTaskTWithPrivatesPtrQTy,
3980                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3981                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3982                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3983                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3984   ASTContext &C = CGM.getContext();
3985   FunctionArgList Args;
3986   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3987                            KmpTaskTWithPrivatesPtrQTy,
3988                            ImplicitParamDecl::Other);
3989   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3990                            KmpTaskTWithPrivatesPtrQTy,
3991                            ImplicitParamDecl::Other);
3992   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3993                                 ImplicitParamDecl::Other);
3994   Args.push_back(&DstArg);
3995   Args.push_back(&SrcArg);
3996   Args.push_back(&LastprivArg);
3997   const auto &TaskDupFnInfo =
3998       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3999   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4000   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4001   auto *TaskDup = llvm::Function::Create(
4002       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4003   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4004   TaskDup->setDoesNotRecurse();
4005   CodeGenFunction CGF(CGM);
4006   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4007                     Loc);
4008 
4009   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4010       CGF.GetAddrOfLocalVar(&DstArg),
4011       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4012   // task_dst->liter = lastpriv;
4013   if (WithLastIter) {
4014     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4015     LValue Base = CGF.EmitLValueForField(
4016         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4017     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4018     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4019         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4020     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4021   }
4022 
4023   // Emit initial values for private copies (if any).
4024   assert(!Privates.empty());
4025   Address KmpTaskSharedsPtr = Address::invalid();
4026   if (!Data.FirstprivateVars.empty()) {
4027     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4028         CGF.GetAddrOfLocalVar(&SrcArg),
4029         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4030     LValue Base = CGF.EmitLValueForField(
4031         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4032     KmpTaskSharedsPtr = Address(
4033         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4034                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4035                                                   KmpTaskTShareds)),
4036                              Loc),
4037         CGM.getNaturalTypeAlignment(SharedsTy));
4038   }
4039   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4040                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4041   CGF.FinishFunction();
4042   return TaskDup;
4043 }
4044 
4045 /// Checks if destructor function is required to be generated.
4046 /// \return true if cleanups are required, false otherwise.
4047 static bool
4048 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4049                          ArrayRef<PrivateDataTy> Privates) {
4050   for (const PrivateDataTy &P : Privates) {
4051     if (P.second.isLocalPrivate())
4052       continue;
4053     QualType Ty = P.second.Original->getType().getNonReferenceType();
4054     if (Ty.isDestructedType())
4055       return true;
4056   }
4057   return false;
4058 }
4059 
4060 namespace {
4061 /// Loop generator for OpenMP iterator expression.
4062 class OMPIteratorGeneratorScope final
4063     : public CodeGenFunction::OMPPrivateScope {
4064   CodeGenFunction &CGF;
4065   const OMPIteratorExpr *E = nullptr;
4066   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4067   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4068   OMPIteratorGeneratorScope() = delete;
4069   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4070 
4071 public:
4072   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4073       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4074     if (!E)
4075       return;
4076     SmallVector<llvm::Value *, 4> Uppers;
4077     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4078       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4079       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4080       addPrivate(VD, [&CGF, VD]() {
4081         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4082       });
4083       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4084       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4085         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4086                                  "counter.addr");
4087       });
4088     }
4089     Privatize();
4090 
4091     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4092       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4093       LValue CLVal =
4094           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4095                              HelperData.CounterVD->getType());
4096       // Counter = 0;
4097       CGF.EmitStoreOfScalar(
4098           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4099           CLVal);
4100       CodeGenFunction::JumpDest &ContDest =
4101           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4102       CodeGenFunction::JumpDest &ExitDest =
4103           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4104       // N = <number-of_iterations>;
4105       llvm::Value *N = Uppers[I];
4106       // cont:
4107       // if (Counter < N) goto body; else goto exit;
4108       CGF.EmitBlock(ContDest.getBlock());
4109       auto *CVal =
4110           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4111       llvm::Value *Cmp =
4112           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4113               ? CGF.Builder.CreateICmpSLT(CVal, N)
4114               : CGF.Builder.CreateICmpULT(CVal, N);
4115       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4116       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4117       // body:
4118       CGF.EmitBlock(BodyBB);
4119       // Iteri = Begini + Counter * Stepi;
4120       CGF.EmitIgnoredExpr(HelperData.Update);
4121     }
4122   }
4123   ~OMPIteratorGeneratorScope() {
4124     if (!E)
4125       return;
4126     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4127       // Counter = Counter + 1;
4128       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4129       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4130       // goto cont;
4131       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4132       // exit:
4133       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4134     }
4135   }
4136 };
4137 } // namespace
4138 
4139 static std::pair<llvm::Value *, llvm::Value *>
4140 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4141   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4142   llvm::Value *Addr;
4143   if (OASE) {
4144     const Expr *Base = OASE->getBase();
4145     Addr = CGF.EmitScalarExpr(Base);
4146   } else {
4147     Addr = CGF.EmitLValue(E).getPointer(CGF);
4148   }
4149   llvm::Value *SizeVal;
4150   QualType Ty = E->getType();
4151   if (OASE) {
4152     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4153     for (const Expr *SE : OASE->getDimensions()) {
4154       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4155       Sz = CGF.EmitScalarConversion(
4156           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4157       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4158     }
4159   } else if (const auto *ASE =
4160                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4161     LValue UpAddrLVal =
4162         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4163     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4164     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4165         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4166     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4167     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4168     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4169   } else {
4170     SizeVal = CGF.getTypeSize(Ty);
4171   }
4172   return std::make_pair(Addr, SizeVal);
4173 }
4174 
4175 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4176 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4177   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4178   if (KmpTaskAffinityInfoTy.isNull()) {
4179     RecordDecl *KmpAffinityInfoRD =
4180         C.buildImplicitRecord("kmp_task_affinity_info_t");
4181     KmpAffinityInfoRD->startDefinition();
4182     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4183     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4184     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4185     KmpAffinityInfoRD->completeDefinition();
4186     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4187   }
4188 }
4189 
4190 CGOpenMPRuntime::TaskResultTy
4191 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4192                               const OMPExecutableDirective &D,
4193                               llvm::Function *TaskFunction, QualType SharedsTy,
4194                               Address Shareds, const OMPTaskDataTy &Data) {
4195   ASTContext &C = CGM.getContext();
4196   llvm::SmallVector<PrivateDataTy, 4> Privates;
4197   // Aggregate privates and sort them by the alignment.
4198   const auto *I = Data.PrivateCopies.begin();
4199   for (const Expr *E : Data.PrivateVars) {
4200     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4201     Privates.emplace_back(
4202         C.getDeclAlign(VD),
4203         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4204                          /*PrivateElemInit=*/nullptr));
4205     ++I;
4206   }
4207   I = Data.FirstprivateCopies.begin();
4208   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4209   for (const Expr *E : Data.FirstprivateVars) {
4210     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4211     Privates.emplace_back(
4212         C.getDeclAlign(VD),
4213         PrivateHelpersTy(
4214             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4215             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4216     ++I;
4217     ++IElemInitRef;
4218   }
4219   I = Data.LastprivateCopies.begin();
4220   for (const Expr *E : Data.LastprivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   for (const VarDecl *VD : Data.PrivateLocals) {
4229     if (isAllocatableDecl(VD))
4230       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4231     else
4232       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4233   }
4234   llvm::stable_sort(Privates,
4235                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4236                       return L.first > R.first;
4237                     });
4238   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4239   // Build type kmp_routine_entry_t (if not built yet).
4240   emitKmpRoutineEntryT(KmpInt32Ty);
4241   // Build type kmp_task_t (if not built yet).
4242   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4243     if (SavedKmpTaskloopTQTy.isNull()) {
4244       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4245           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4246     }
4247     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4248   } else {
4249     assert((D.getDirectiveKind() == OMPD_task ||
4250             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4251             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4252            "Expected taskloop, task or target directive");
4253     if (SavedKmpTaskTQTy.isNull()) {
4254       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4255           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4256     }
4257     KmpTaskTQTy = SavedKmpTaskTQTy;
4258   }
4259   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4260   // Build particular struct kmp_task_t for the given task.
4261   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4262       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4263   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4264   QualType KmpTaskTWithPrivatesPtrQTy =
4265       C.getPointerType(KmpTaskTWithPrivatesQTy);
4266   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4267   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4268       KmpTaskTWithPrivatesTy->getPointerTo();
4269   llvm::Value *KmpTaskTWithPrivatesTySize =
4270       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4271   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4272 
4273   // Emit initial values for private copies (if any).
4274   llvm::Value *TaskPrivatesMap = nullptr;
4275   llvm::Type *TaskPrivatesMapTy =
4276       std::next(TaskFunction->arg_begin(), 3)->getType();
4277   if (!Privates.empty()) {
4278     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4279     TaskPrivatesMap =
4280         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4281     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4282         TaskPrivatesMap, TaskPrivatesMapTy);
4283   } else {
4284     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4285         cast<llvm::PointerType>(TaskPrivatesMapTy));
4286   }
4287   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4288   // kmp_task_t *tt);
4289   llvm::Function *TaskEntry = emitProxyTaskFunction(
4290       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4291       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4292       TaskPrivatesMap);
4293 
4294   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4295   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4296   // kmp_routine_entry_t *task_entry);
4297   // Task flags. Format is taken from
4298   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4299   // description of kmp_tasking_flags struct.
4300   enum {
4301     TiedFlag = 0x1,
4302     FinalFlag = 0x2,
4303     DestructorsFlag = 0x8,
4304     PriorityFlag = 0x20,
4305     DetachableFlag = 0x40,
4306   };
4307   unsigned Flags = Data.Tied ? TiedFlag : 0;
4308   bool NeedsCleanup = false;
4309   if (!Privates.empty()) {
4310     NeedsCleanup =
4311         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4312     if (NeedsCleanup)
4313       Flags = Flags | DestructorsFlag;
4314   }
4315   if (Data.Priority.getInt())
4316     Flags = Flags | PriorityFlag;
4317   if (D.hasClausesOfKind<OMPDetachClause>())
4318     Flags = Flags | DetachableFlag;
4319   llvm::Value *TaskFlags =
4320       Data.Final.getPointer()
4321           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4322                                      CGF.Builder.getInt32(FinalFlag),
4323                                      CGF.Builder.getInt32(/*C=*/0))
4324           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4325   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4326   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4327   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4328       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4329       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4330           TaskEntry, KmpRoutineEntryPtrTy)};
4331   llvm::Value *NewTask;
4332   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4333     // Check if we have any device clause associated with the directive.
4334     const Expr *Device = nullptr;
4335     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4336       Device = C->getDevice();
4337     // Emit device ID if any otherwise use default value.
4338     llvm::Value *DeviceID;
4339     if (Device)
4340       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4341                                            CGF.Int64Ty, /*isSigned=*/true);
4342     else
4343       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4344     AllocArgs.push_back(DeviceID);
4345     NewTask = CGF.EmitRuntimeCall(
4346         OMPBuilder.getOrCreateRuntimeFunction(
4347             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4348         AllocArgs);
4349   } else {
4350     NewTask =
4351         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4352                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4353                             AllocArgs);
4354   }
4355   // Emit detach clause initialization.
4356   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4357   // task_descriptor);
4358   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4359     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4360     LValue EvtLVal = CGF.EmitLValue(Evt);
4361 
4362     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4363     // int gtid, kmp_task_t *task);
4364     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4365     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4366     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4367     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4368         OMPBuilder.getOrCreateRuntimeFunction(
4369             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4370         {Loc, Tid, NewTask});
4371     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4372                                       Evt->getExprLoc());
4373     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4374   }
4375   // Process affinity clauses.
4376   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4377     // Process list of affinity data.
4378     ASTContext &C = CGM.getContext();
4379     Address AffinitiesArray = Address::invalid();
4380     // Calculate number of elements to form the array of affinity data.
4381     llvm::Value *NumOfElements = nullptr;
4382     unsigned NumAffinities = 0;
4383     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4384       if (const Expr *Modifier = C->getModifier()) {
4385         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4386         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4387           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4388           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4389           NumOfElements =
4390               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4391         }
4392       } else {
4393         NumAffinities += C->varlist_size();
4394       }
4395     }
4396     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4397     // Fields ids in kmp_task_affinity_info record.
4398     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4399 
4400     QualType KmpTaskAffinityInfoArrayTy;
4401     if (NumOfElements) {
4402       NumOfElements = CGF.Builder.CreateNUWAdd(
4403           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4404       auto *OVE = new (C) OpaqueValueExpr(
4405           Loc,
4406           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4407           VK_PRValue);
4408       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4409                                                     RValue::get(NumOfElements));
4410       KmpTaskAffinityInfoArrayTy =
4411           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4412                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4413       // Properly emit variable-sized array.
4414       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4415                                            ImplicitParamDecl::Other);
4416       CGF.EmitVarDecl(*PD);
4417       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4418       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4419                                                 /*isSigned=*/false);
4420     } else {
4421       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4422           KmpTaskAffinityInfoTy,
4423           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4424           ArrayType::Normal, /*IndexTypeQuals=*/0);
4425       AffinitiesArray =
4426           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4427       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4428       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4429                                              /*isSigned=*/false);
4430     }
4431 
4432     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4433     // Fill array by elements without iterators.
4434     unsigned Pos = 0;
4435     bool HasIterator = false;
4436     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4437       if (C->getModifier()) {
4438         HasIterator = true;
4439         continue;
4440       }
4441       for (const Expr *E : C->varlists()) {
4442         llvm::Value *Addr;
4443         llvm::Value *Size;
4444         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4445         LValue Base =
4446             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4447                                KmpTaskAffinityInfoTy);
4448         // affs[i].base_addr = &<Affinities[i].second>;
4449         LValue BaseAddrLVal = CGF.EmitLValueForField(
4450             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4451         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4452                               BaseAddrLVal);
4453         // affs[i].len = sizeof(<Affinities[i].second>);
4454         LValue LenLVal = CGF.EmitLValueForField(
4455             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4456         CGF.EmitStoreOfScalar(Size, LenLVal);
4457         ++Pos;
4458       }
4459     }
4460     LValue PosLVal;
4461     if (HasIterator) {
4462       PosLVal = CGF.MakeAddrLValue(
4463           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4464           C.getSizeType());
4465       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4466     }
4467     // Process elements with iterators.
4468     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4469       const Expr *Modifier = C->getModifier();
4470       if (!Modifier)
4471         continue;
4472       OMPIteratorGeneratorScope IteratorScope(
4473           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4474       for (const Expr *E : C->varlists()) {
4475         llvm::Value *Addr;
4476         llvm::Value *Size;
4477         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4478         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4479         LValue Base = CGF.MakeAddrLValue(
4480             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4481                                           AffinitiesArray.getPointer(), Idx),
4482                     AffinitiesArray.getAlignment()),
4483             KmpTaskAffinityInfoTy);
4484         // affs[i].base_addr = &<Affinities[i].second>;
4485         LValue BaseAddrLVal = CGF.EmitLValueForField(
4486             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4487         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4488                               BaseAddrLVal);
4489         // affs[i].len = sizeof(<Affinities[i].second>);
4490         LValue LenLVal = CGF.EmitLValueForField(
4491             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4492         CGF.EmitStoreOfScalar(Size, LenLVal);
4493         Idx = CGF.Builder.CreateNUWAdd(
4494             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4495         CGF.EmitStoreOfScalar(Idx, PosLVal);
4496       }
4497     }
4498     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4499     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4500     // naffins, kmp_task_affinity_info_t *affin_list);
4501     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4502     llvm::Value *GTid = getThreadID(CGF, Loc);
4503     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4504         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4505     // FIXME: Emit the function and ignore its result for now unless the
4506     // runtime function is properly implemented.
4507     (void)CGF.EmitRuntimeCall(
4508         OMPBuilder.getOrCreateRuntimeFunction(
4509             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4510         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4511   }
4512   llvm::Value *NewTaskNewTaskTTy =
4513       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4514           NewTask, KmpTaskTWithPrivatesPtrTy);
4515   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4516                                                KmpTaskTWithPrivatesQTy);
4517   LValue TDBase =
4518       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4519   // Fill the data in the resulting kmp_task_t record.
4520   // Copy shareds if there are any.
4521   Address KmpTaskSharedsPtr = Address::invalid();
4522   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4523     KmpTaskSharedsPtr =
4524         Address(CGF.EmitLoadOfScalar(
4525                     CGF.EmitLValueForField(
4526                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4527                                            KmpTaskTShareds)),
4528                     Loc),
4529                 CGM.getNaturalTypeAlignment(SharedsTy));
4530     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4531     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4532     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4533   }
4534   // Emit initial values for private copies (if any).
4535   TaskResultTy Result;
4536   if (!Privates.empty()) {
4537     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4538                      SharedsTy, SharedsPtrTy, Data, Privates,
4539                      /*ForDup=*/false);
4540     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4541         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4542       Result.TaskDupFn = emitTaskDupFunction(
4543           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4544           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4545           /*WithLastIter=*/!Data.LastprivateVars.empty());
4546     }
4547   }
4548   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4549   enum { Priority = 0, Destructors = 1 };
4550   // Provide pointer to function with destructors for privates.
4551   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4552   const RecordDecl *KmpCmplrdataUD =
4553       (*FI)->getType()->getAsUnionType()->getDecl();
4554   if (NeedsCleanup) {
4555     llvm::Value *DestructorFn = emitDestructorsFunction(
4556         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4557         KmpTaskTWithPrivatesQTy);
4558     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4559     LValue DestructorsLV = CGF.EmitLValueForField(
4560         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4561     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4562                               DestructorFn, KmpRoutineEntryPtrTy),
4563                           DestructorsLV);
4564   }
4565   // Set priority.
4566   if (Data.Priority.getInt()) {
4567     LValue Data2LV = CGF.EmitLValueForField(
4568         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4569     LValue PriorityLV = CGF.EmitLValueForField(
4570         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4571     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4572   }
4573   Result.NewTask = NewTask;
4574   Result.TaskEntry = TaskEntry;
4575   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4576   Result.TDBase = TDBase;
4577   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4578   return Result;
4579 }
4580 
4581 namespace {
4582 /// Dependence kind for RTL.
4583 enum RTLDependenceKindTy {
4584   DepIn = 0x01,
4585   DepInOut = 0x3,
4586   DepMutexInOutSet = 0x4
4587 };
4588 /// Fields ids in kmp_depend_info record.
4589 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4590 } // namespace
4591 
4592 /// Translates internal dependency kind into the runtime kind.
4593 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4594   RTLDependenceKindTy DepKind;
4595   switch (K) {
4596   case OMPC_DEPEND_in:
4597     DepKind = DepIn;
4598     break;
4599   // Out and InOut dependencies must use the same code.
4600   case OMPC_DEPEND_out:
4601   case OMPC_DEPEND_inout:
4602     DepKind = DepInOut;
4603     break;
4604   case OMPC_DEPEND_mutexinoutset:
4605     DepKind = DepMutexInOutSet;
4606     break;
4607   case OMPC_DEPEND_source:
4608   case OMPC_DEPEND_sink:
4609   case OMPC_DEPEND_depobj:
4610   case OMPC_DEPEND_unknown:
4611     llvm_unreachable("Unknown task dependence type");
4612   }
4613   return DepKind;
4614 }
4615 
4616 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4617 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4618                            QualType &FlagsTy) {
4619   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4620   if (KmpDependInfoTy.isNull()) {
4621     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4622     KmpDependInfoRD->startDefinition();
4623     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4624     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4625     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4626     KmpDependInfoRD->completeDefinition();
4627     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4628   }
4629 }
4630 
4631 std::pair<llvm::Value *, LValue>
4632 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4633                                    SourceLocation Loc) {
4634   ASTContext &C = CGM.getContext();
4635   QualType FlagsTy;
4636   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4637   RecordDecl *KmpDependInfoRD =
4638       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4639   LValue Base = CGF.EmitLoadOfPointerLValue(
4640       DepobjLVal.getAddress(CGF),
4641       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4642   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4643   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4644           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4645   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4646                             Base.getTBAAInfo());
4647   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4648       Addr.getElementType(), Addr.getPointer(),
4649       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4650   LValue NumDepsBase = CGF.MakeAddrLValue(
4651       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4652       Base.getBaseInfo(), Base.getTBAAInfo());
4653   // NumDeps = deps[i].base_addr;
4654   LValue BaseAddrLVal = CGF.EmitLValueForField(
4655       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4656   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4657   return std::make_pair(NumDeps, Base);
4658 }
4659 
4660 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4661                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4662                            const OMPTaskDataTy::DependData &Data,
4663                            Address DependenciesArray) {
4664   CodeGenModule &CGM = CGF.CGM;
4665   ASTContext &C = CGM.getContext();
4666   QualType FlagsTy;
4667   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4668   RecordDecl *KmpDependInfoRD =
4669       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4670   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4671 
4672   OMPIteratorGeneratorScope IteratorScope(
4673       CGF, cast_or_null<OMPIteratorExpr>(
4674                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4675                                  : nullptr));
4676   for (const Expr *E : Data.DepExprs) {
4677     llvm::Value *Addr;
4678     llvm::Value *Size;
4679     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4680     LValue Base;
4681     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4682       Base = CGF.MakeAddrLValue(
4683           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4684     } else {
4685       LValue &PosLVal = *Pos.get<LValue *>();
4686       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4687       Base = CGF.MakeAddrLValue(
4688           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4689                                         DependenciesArray.getPointer(), Idx),
4690                   DependenciesArray.getAlignment()),
4691           KmpDependInfoTy);
4692     }
4693     // deps[i].base_addr = &<Dependencies[i].second>;
4694     LValue BaseAddrLVal = CGF.EmitLValueForField(
4695         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4696     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4697                           BaseAddrLVal);
4698     // deps[i].len = sizeof(<Dependencies[i].second>);
4699     LValue LenLVal = CGF.EmitLValueForField(
4700         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4701     CGF.EmitStoreOfScalar(Size, LenLVal);
4702     // deps[i].flags = <Dependencies[i].first>;
4703     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4704     LValue FlagsLVal = CGF.EmitLValueForField(
4705         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4706     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4707                           FlagsLVal);
4708     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4709       ++(*P);
4710     } else {
4711       LValue &PosLVal = *Pos.get<LValue *>();
4712       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4713       Idx = CGF.Builder.CreateNUWAdd(Idx,
4714                                      llvm::ConstantInt::get(Idx->getType(), 1));
4715       CGF.EmitStoreOfScalar(Idx, PosLVal);
4716     }
4717   }
4718 }
4719 
4720 static SmallVector<llvm::Value *, 4>
4721 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4722                         const OMPTaskDataTy::DependData &Data) {
4723   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4724          "Expected depobj dependecy kind.");
4725   SmallVector<llvm::Value *, 4> Sizes;
4726   SmallVector<LValue, 4> SizeLVals;
4727   ASTContext &C = CGF.getContext();
4728   QualType FlagsTy;
4729   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4730   RecordDecl *KmpDependInfoRD =
4731       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4732   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4733   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4734   {
4735     OMPIteratorGeneratorScope IteratorScope(
4736         CGF, cast_or_null<OMPIteratorExpr>(
4737                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4738                                    : nullptr));
4739     for (const Expr *E : Data.DepExprs) {
4740       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4741       LValue Base = CGF.EmitLoadOfPointerLValue(
4742           DepobjLVal.getAddress(CGF),
4743           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4744       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4745           Base.getAddress(CGF), KmpDependInfoPtrT);
4746       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4747                                 Base.getTBAAInfo());
4748       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4749           Addr.getElementType(), Addr.getPointer(),
4750           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4751       LValue NumDepsBase = CGF.MakeAddrLValue(
4752           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4753           Base.getBaseInfo(), Base.getTBAAInfo());
4754       // NumDeps = deps[i].base_addr;
4755       LValue BaseAddrLVal = CGF.EmitLValueForField(
4756           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4757       llvm::Value *NumDeps =
4758           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4759       LValue NumLVal = CGF.MakeAddrLValue(
4760           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4761           C.getUIntPtrType());
4762       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4763                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4764       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4765       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4766       CGF.EmitStoreOfScalar(Add, NumLVal);
4767       SizeLVals.push_back(NumLVal);
4768     }
4769   }
4770   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4771     llvm::Value *Size =
4772         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4773     Sizes.push_back(Size);
4774   }
4775   return Sizes;
4776 }
4777 
4778 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4779                                LValue PosLVal,
4780                                const OMPTaskDataTy::DependData &Data,
4781                                Address DependenciesArray) {
4782   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4783          "Expected depobj dependecy kind.");
4784   ASTContext &C = CGF.getContext();
4785   QualType FlagsTy;
4786   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4787   RecordDecl *KmpDependInfoRD =
4788       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4789   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4790   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4791   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4792   {
4793     OMPIteratorGeneratorScope IteratorScope(
4794         CGF, cast_or_null<OMPIteratorExpr>(
4795                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4796                                    : nullptr));
4797     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4798       const Expr *E = Data.DepExprs[I];
4799       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4800       LValue Base = CGF.EmitLoadOfPointerLValue(
4801           DepobjLVal.getAddress(CGF),
4802           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4803       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4804           Base.getAddress(CGF), KmpDependInfoPtrT);
4805       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4806                                 Base.getTBAAInfo());
4807 
4808       // Get number of elements in a single depobj.
4809       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4810           Addr.getElementType(), Addr.getPointer(),
4811           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4812       LValue NumDepsBase = CGF.MakeAddrLValue(
4813           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4814           Base.getBaseInfo(), Base.getTBAAInfo());
4815       // NumDeps = deps[i].base_addr;
4816       LValue BaseAddrLVal = CGF.EmitLValueForField(
4817           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4818       llvm::Value *NumDeps =
4819           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4820 
4821       // memcopy dependency data.
4822       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4823           ElSize,
4824           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4825       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4826       Address DepAddr =
4827           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4828                                         DependenciesArray.getPointer(), Pos),
4829                   DependenciesArray.getAlignment());
4830       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4831 
4832       // Increase pos.
4833       // pos += size;
4834       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4835       CGF.EmitStoreOfScalar(Add, PosLVal);
4836     }
4837   }
4838 }
4839 
4840 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4841     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4842     SourceLocation Loc) {
4843   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4844         return D.DepExprs.empty();
4845       }))
4846     return std::make_pair(nullptr, Address::invalid());
4847   // Process list of dependencies.
4848   ASTContext &C = CGM.getContext();
4849   Address DependenciesArray = Address::invalid();
4850   llvm::Value *NumOfElements = nullptr;
4851   unsigned NumDependencies = std::accumulate(
4852       Dependencies.begin(), Dependencies.end(), 0,
4853       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4854         return D.DepKind == OMPC_DEPEND_depobj
4855                    ? V
4856                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4857       });
4858   QualType FlagsTy;
4859   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4860   bool HasDepobjDeps = false;
4861   bool HasRegularWithIterators = false;
4862   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4863   llvm::Value *NumOfRegularWithIterators =
4864       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4865   // Calculate number of depobj dependecies and regular deps with the iterators.
4866   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4867     if (D.DepKind == OMPC_DEPEND_depobj) {
4868       SmallVector<llvm::Value *, 4> Sizes =
4869           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4870       for (llvm::Value *Size : Sizes) {
4871         NumOfDepobjElements =
4872             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4873       }
4874       HasDepobjDeps = true;
4875       continue;
4876     }
4877     // Include number of iterations, if any.
4878     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4879       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4880         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4881         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4882         NumOfRegularWithIterators =
4883             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4884       }
4885       HasRegularWithIterators = true;
4886       continue;
4887     }
4888   }
4889 
4890   QualType KmpDependInfoArrayTy;
4891   if (HasDepobjDeps || HasRegularWithIterators) {
4892     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4893                                            /*isSigned=*/false);
4894     if (HasDepobjDeps) {
4895       NumOfElements =
4896           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4897     }
4898     if (HasRegularWithIterators) {
4899       NumOfElements =
4900           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4901     }
4902     auto *OVE = new (C) OpaqueValueExpr(
4903         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4904         VK_PRValue);
4905     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4906                                                   RValue::get(NumOfElements));
4907     KmpDependInfoArrayTy =
4908         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4909                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4910     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4911     // Properly emit variable-sized array.
4912     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4913                                          ImplicitParamDecl::Other);
4914     CGF.EmitVarDecl(*PD);
4915     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4916     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4917                                               /*isSigned=*/false);
4918   } else {
4919     KmpDependInfoArrayTy = C.getConstantArrayType(
4920         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4921         ArrayType::Normal, /*IndexTypeQuals=*/0);
4922     DependenciesArray =
4923         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4924     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4925     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4926                                            /*isSigned=*/false);
4927   }
4928   unsigned Pos = 0;
4929   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4930     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4931         Dependencies[I].IteratorExpr)
4932       continue;
4933     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4934                    DependenciesArray);
4935   }
4936   // Copy regular dependecies with iterators.
4937   LValue PosLVal = CGF.MakeAddrLValue(
4938       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4939   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4940   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4942         !Dependencies[I].IteratorExpr)
4943       continue;
4944     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4945                    DependenciesArray);
4946   }
4947   // Copy final depobj arrays without iterators.
4948   if (HasDepobjDeps) {
4949     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4950       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4951         continue;
4952       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4953                          DependenciesArray);
4954     }
4955   }
4956   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4957       DependenciesArray, CGF.VoidPtrTy);
4958   return std::make_pair(NumOfElements, DependenciesArray);
4959 }
4960 
4961 Address CGOpenMPRuntime::emitDepobjDependClause(
4962     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4963     SourceLocation Loc) {
4964   if (Dependencies.DepExprs.empty())
4965     return Address::invalid();
4966   // Process list of dependencies.
4967   ASTContext &C = CGM.getContext();
4968   Address DependenciesArray = Address::invalid();
4969   unsigned NumDependencies = Dependencies.DepExprs.size();
4970   QualType FlagsTy;
4971   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4972   RecordDecl *KmpDependInfoRD =
4973       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4974 
4975   llvm::Value *Size;
4976   // Define type kmp_depend_info[<Dependencies.size()>];
4977   // For depobj reserve one extra element to store the number of elements.
4978   // It is required to handle depobj(x) update(in) construct.
4979   // kmp_depend_info[<Dependencies.size()>] deps;
4980   llvm::Value *NumDepsVal;
4981   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4982   if (const auto *IE =
4983           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4984     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4985     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4986       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4987       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4988       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4989     }
4990     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4991                                     NumDepsVal);
4992     CharUnits SizeInBytes =
4993         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4994     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4995     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4996     NumDepsVal =
4997         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4998   } else {
4999     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5000         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5001         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5002     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5003     Size = CGM.getSize(Sz.alignTo(Align));
5004     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5005   }
5006   // Need to allocate on the dynamic memory.
5007   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5008   // Use default allocator.
5009   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5010   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5011 
5012   llvm::Value *Addr =
5013       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5014                               CGM.getModule(), OMPRTL___kmpc_alloc),
5015                           Args, ".dep.arr.addr");
5016   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5017       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5018   DependenciesArray = Address(Addr, Align);
5019   // Write number of elements in the first element of array for depobj.
5020   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5021   // deps[i].base_addr = NumDependencies;
5022   LValue BaseAddrLVal = CGF.EmitLValueForField(
5023       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5024   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5025   llvm::PointerUnion<unsigned *, LValue *> Pos;
5026   unsigned Idx = 1;
5027   LValue PosLVal;
5028   if (Dependencies.IteratorExpr) {
5029     PosLVal = CGF.MakeAddrLValue(
5030         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5031         C.getSizeType());
5032     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5033                           /*IsInit=*/true);
5034     Pos = &PosLVal;
5035   } else {
5036     Pos = &Idx;
5037   }
5038   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5039   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5040       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5041   return DependenciesArray;
5042 }
5043 
5044 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5045                                         SourceLocation Loc) {
5046   ASTContext &C = CGM.getContext();
5047   QualType FlagsTy;
5048   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5049   LValue Base = CGF.EmitLoadOfPointerLValue(
5050       DepobjLVal.getAddress(CGF),
5051       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5052   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5053   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5054       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5055   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5056       Addr.getElementType(), Addr.getPointer(),
5057       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5058   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5059                                                                CGF.VoidPtrTy);
5060   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5061   // Use default allocator.
5062   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5063   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5064 
5065   // _kmpc_free(gtid, addr, nullptr);
5066   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5067                                 CGM.getModule(), OMPRTL___kmpc_free),
5068                             Args);
5069 }
5070 
5071 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5072                                        OpenMPDependClauseKind NewDepKind,
5073                                        SourceLocation Loc) {
5074   ASTContext &C = CGM.getContext();
5075   QualType FlagsTy;
5076   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5077   RecordDecl *KmpDependInfoRD =
5078       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5079   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5080   llvm::Value *NumDeps;
5081   LValue Base;
5082   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5083 
5084   Address Begin = Base.getAddress(CGF);
5085   // Cast from pointer to array type to pointer to single element.
5086   llvm::Value *End = CGF.Builder.CreateGEP(
5087       Begin.getElementType(), Begin.getPointer(), NumDeps);
5088   // The basic structure here is a while-do loop.
5089   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5090   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5091   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5092   CGF.EmitBlock(BodyBB);
5093   llvm::PHINode *ElementPHI =
5094       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5095   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5096   Begin = Address(ElementPHI, Begin.getAlignment());
5097   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5098                             Base.getTBAAInfo());
5099   // deps[i].flags = NewDepKind;
5100   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5101   LValue FlagsLVal = CGF.EmitLValueForField(
5102       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5103   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5104                         FlagsLVal);
5105 
5106   // Shift the address forward by one element.
5107   Address ElementNext =
5108       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5109   ElementPHI->addIncoming(ElementNext.getPointer(),
5110                           CGF.Builder.GetInsertBlock());
5111   llvm::Value *IsEmpty =
5112       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5113   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5114   // Done.
5115   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5116 }
5117 
5118 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5119                                    const OMPExecutableDirective &D,
5120                                    llvm::Function *TaskFunction,
5121                                    QualType SharedsTy, Address Shareds,
5122                                    const Expr *IfCond,
5123                                    const OMPTaskDataTy &Data) {
5124   if (!CGF.HaveInsertPoint())
5125     return;
5126 
5127   TaskResultTy Result =
5128       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5129   llvm::Value *NewTask = Result.NewTask;
5130   llvm::Function *TaskEntry = Result.TaskEntry;
5131   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5132   LValue TDBase = Result.TDBase;
5133   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5134   // Process list of dependences.
5135   Address DependenciesArray = Address::invalid();
5136   llvm::Value *NumOfElements;
5137   std::tie(NumOfElements, DependenciesArray) =
5138       emitDependClause(CGF, Data.Dependences, Loc);
5139 
5140   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5141   // libcall.
5142   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5143   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5144   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5145   // list is not empty
5146   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5147   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5148   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5149   llvm::Value *DepTaskArgs[7];
5150   if (!Data.Dependences.empty()) {
5151     DepTaskArgs[0] = UpLoc;
5152     DepTaskArgs[1] = ThreadID;
5153     DepTaskArgs[2] = NewTask;
5154     DepTaskArgs[3] = NumOfElements;
5155     DepTaskArgs[4] = DependenciesArray.getPointer();
5156     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5157     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5158   }
5159   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5160                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5161     if (!Data.Tied) {
5162       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5163       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5164       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5165     }
5166     if (!Data.Dependences.empty()) {
5167       CGF.EmitRuntimeCall(
5168           OMPBuilder.getOrCreateRuntimeFunction(
5169               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5170           DepTaskArgs);
5171     } else {
5172       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5173                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5174                           TaskArgs);
5175     }
5176     // Check if parent region is untied and build return for untied task;
5177     if (auto *Region =
5178             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5179       Region->emitUntiedSwitch(CGF);
5180   };
5181 
5182   llvm::Value *DepWaitTaskArgs[6];
5183   if (!Data.Dependences.empty()) {
5184     DepWaitTaskArgs[0] = UpLoc;
5185     DepWaitTaskArgs[1] = ThreadID;
5186     DepWaitTaskArgs[2] = NumOfElements;
5187     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5188     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5189     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5190   }
5191   auto &M = CGM.getModule();
5192   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5193                         TaskEntry, &Data, &DepWaitTaskArgs,
5194                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5195     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5196     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5197     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5198     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5199     // is specified.
5200     if (!Data.Dependences.empty())
5201       CGF.EmitRuntimeCall(
5202           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5203           DepWaitTaskArgs);
5204     // Call proxy_task_entry(gtid, new_task);
5205     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5206                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5207       Action.Enter(CGF);
5208       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5209       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5210                                                           OutlinedFnArgs);
5211     };
5212 
5213     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5214     // kmp_task_t *new_task);
5215     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5216     // kmp_task_t *new_task);
5217     RegionCodeGenTy RCG(CodeGen);
5218     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5219                               M, OMPRTL___kmpc_omp_task_begin_if0),
5220                           TaskArgs,
5221                           OMPBuilder.getOrCreateRuntimeFunction(
5222                               M, OMPRTL___kmpc_omp_task_complete_if0),
5223                           TaskArgs);
5224     RCG.setAction(Action);
5225     RCG(CGF);
5226   };
5227 
5228   if (IfCond) {
5229     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5230   } else {
5231     RegionCodeGenTy ThenRCG(ThenCodeGen);
5232     ThenRCG(CGF);
5233   }
5234 }
5235 
5236 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5237                                        const OMPLoopDirective &D,
5238                                        llvm::Function *TaskFunction,
5239                                        QualType SharedsTy, Address Shareds,
5240                                        const Expr *IfCond,
5241                                        const OMPTaskDataTy &Data) {
5242   if (!CGF.HaveInsertPoint())
5243     return;
5244   TaskResultTy Result =
5245       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5246   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5247   // libcall.
5248   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5249   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5250   // sched, kmp_uint64 grainsize, void *task_dup);
5251   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5252   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5253   llvm::Value *IfVal;
5254   if (IfCond) {
5255     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5256                                       /*isSigned=*/true);
5257   } else {
5258     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5259   }
5260 
5261   LValue LBLVal = CGF.EmitLValueForField(
5262       Result.TDBase,
5263       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5264   const auto *LBVar =
5265       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5266   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5267                        LBLVal.getQuals(),
5268                        /*IsInitializer=*/true);
5269   LValue UBLVal = CGF.EmitLValueForField(
5270       Result.TDBase,
5271       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5272   const auto *UBVar =
5273       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5274   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5275                        UBLVal.getQuals(),
5276                        /*IsInitializer=*/true);
5277   LValue StLVal = CGF.EmitLValueForField(
5278       Result.TDBase,
5279       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5280   const auto *StVar =
5281       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5282   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5283                        StLVal.getQuals(),
5284                        /*IsInitializer=*/true);
5285   // Store reductions address.
5286   LValue RedLVal = CGF.EmitLValueForField(
5287       Result.TDBase,
5288       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5289   if (Data.Reductions) {
5290     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5291   } else {
5292     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5293                                CGF.getContext().VoidPtrTy);
5294   }
5295   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5296   llvm::Value *TaskArgs[] = {
5297       UpLoc,
5298       ThreadID,
5299       Result.NewTask,
5300       IfVal,
5301       LBLVal.getPointer(CGF),
5302       UBLVal.getPointer(CGF),
5303       CGF.EmitLoadOfScalar(StLVal, Loc),
5304       llvm::ConstantInt::getSigned(
5305           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5306       llvm::ConstantInt::getSigned(
5307           CGF.IntTy, Data.Schedule.getPointer()
5308                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5309                          : NoSchedule),
5310       Data.Schedule.getPointer()
5311           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5312                                       /*isSigned=*/false)
5313           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5314       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5315                              Result.TaskDupFn, CGF.VoidPtrTy)
5316                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5317   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5318                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5319                       TaskArgs);
5320 }
5321 
5322 /// Emit reduction operation for each element of array (required for
5323 /// array sections) LHS op = RHS.
5324 /// \param Type Type of array.
5325 /// \param LHSVar Variable on the left side of the reduction operation
5326 /// (references element of array in original variable).
5327 /// \param RHSVar Variable on the right side of the reduction operation
5328 /// (references element of array in original variable).
5329 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5330 /// RHSVar.
5331 static void EmitOMPAggregateReduction(
5332     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5333     const VarDecl *RHSVar,
5334     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5335                                   const Expr *, const Expr *)> &RedOpGen,
5336     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5337     const Expr *UpExpr = nullptr) {
5338   // Perform element-by-element initialization.
5339   QualType ElementTy;
5340   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5341   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5342 
5343   // Drill down to the base element type on both arrays.
5344   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5345   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5346 
5347   llvm::Value *RHSBegin = RHSAddr.getPointer();
5348   llvm::Value *LHSBegin = LHSAddr.getPointer();
5349   // Cast from pointer to array type to pointer to single element.
5350   llvm::Value *LHSEnd =
5351       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5352   // The basic structure here is a while-do loop.
5353   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5354   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5355   llvm::Value *IsEmpty =
5356       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5357   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5358 
5359   // Enter the loop body, making that address the current address.
5360   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5361   CGF.EmitBlock(BodyBB);
5362 
5363   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5364 
5365   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5366       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5367   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5368   Address RHSElementCurrent =
5369       Address(RHSElementPHI,
5370               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5371 
5372   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5373       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5374   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5375   Address LHSElementCurrent =
5376       Address(LHSElementPHI,
5377               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5378 
5379   // Emit copy.
5380   CodeGenFunction::OMPPrivateScope Scope(CGF);
5381   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5382   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5383   Scope.Privatize();
5384   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5385   Scope.ForceCleanup();
5386 
5387   // Shift the address forward by one element.
5388   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5389       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5390       "omp.arraycpy.dest.element");
5391   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5392       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5393       "omp.arraycpy.src.element");
5394   // Check whether we've reached the end.
5395   llvm::Value *Done =
5396       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5397   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5398   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5399   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5400 
5401   // Done.
5402   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5403 }
5404 
5405 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5406 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5407 /// UDR combiner function.
5408 static void emitReductionCombiner(CodeGenFunction &CGF,
5409                                   const Expr *ReductionOp) {
5410   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5411     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5412       if (const auto *DRE =
5413               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5414         if (const auto *DRD =
5415                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5416           std::pair<llvm::Function *, llvm::Function *> Reduction =
5417               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5418           RValue Func = RValue::get(Reduction.first);
5419           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5420           CGF.EmitIgnoredExpr(ReductionOp);
5421           return;
5422         }
5423   CGF.EmitIgnoredExpr(ReductionOp);
5424 }
5425 
5426 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5427     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5428     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5429     ArrayRef<const Expr *> ReductionOps) {
5430   ASTContext &C = CGM.getContext();
5431 
5432   // void reduction_func(void *LHSArg, void *RHSArg);
5433   FunctionArgList Args;
5434   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5435                            ImplicitParamDecl::Other);
5436   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5437                            ImplicitParamDecl::Other);
5438   Args.push_back(&LHSArg);
5439   Args.push_back(&RHSArg);
5440   const auto &CGFI =
5441       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5442   std::string Name = getName({"omp", "reduction", "reduction_func"});
5443   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5444                                     llvm::GlobalValue::InternalLinkage, Name,
5445                                     &CGM.getModule());
5446   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5447   Fn->setDoesNotRecurse();
5448   CodeGenFunction CGF(CGM);
5449   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5450 
5451   // Dst = (void*[n])(LHSArg);
5452   // Src = (void*[n])(RHSArg);
5453   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5454       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5455       ArgsType), CGF.getPointerAlign());
5456   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5457       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5458       ArgsType), CGF.getPointerAlign());
5459 
5460   //  ...
5461   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5462   //  ...
5463   CodeGenFunction::OMPPrivateScope Scope(CGF);
5464   auto IPriv = Privates.begin();
5465   unsigned Idx = 0;
5466   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5467     const auto *RHSVar =
5468         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5469     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5470       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5471     });
5472     const auto *LHSVar =
5473         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5474     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5475       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5476     });
5477     QualType PrivTy = (*IPriv)->getType();
5478     if (PrivTy->isVariablyModifiedType()) {
5479       // Get array size and emit VLA type.
5480       ++Idx;
5481       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5482       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5483       const VariableArrayType *VLA =
5484           CGF.getContext().getAsVariableArrayType(PrivTy);
5485       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5486       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5487           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5488       CGF.EmitVariablyModifiedType(PrivTy);
5489     }
5490   }
5491   Scope.Privatize();
5492   IPriv = Privates.begin();
5493   auto ILHS = LHSExprs.begin();
5494   auto IRHS = RHSExprs.begin();
5495   for (const Expr *E : ReductionOps) {
5496     if ((*IPriv)->getType()->isArrayType()) {
5497       // Emit reduction for array section.
5498       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5499       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5500       EmitOMPAggregateReduction(
5501           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5502           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5503             emitReductionCombiner(CGF, E);
5504           });
5505     } else {
5506       // Emit reduction for array subscript or single variable.
5507       emitReductionCombiner(CGF, E);
5508     }
5509     ++IPriv;
5510     ++ILHS;
5511     ++IRHS;
5512   }
5513   Scope.ForceCleanup();
5514   CGF.FinishFunction();
5515   return Fn;
5516 }
5517 
5518 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5519                                                   const Expr *ReductionOp,
5520                                                   const Expr *PrivateRef,
5521                                                   const DeclRefExpr *LHS,
5522                                                   const DeclRefExpr *RHS) {
5523   if (PrivateRef->getType()->isArrayType()) {
5524     // Emit reduction for array section.
5525     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5526     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5527     EmitOMPAggregateReduction(
5528         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5529         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5530           emitReductionCombiner(CGF, ReductionOp);
5531         });
5532   } else {
5533     // Emit reduction for array subscript or single variable.
5534     emitReductionCombiner(CGF, ReductionOp);
5535   }
5536 }
5537 
5538 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5539                                     ArrayRef<const Expr *> Privates,
5540                                     ArrayRef<const Expr *> LHSExprs,
5541                                     ArrayRef<const Expr *> RHSExprs,
5542                                     ArrayRef<const Expr *> ReductionOps,
5543                                     ReductionOptionsTy Options) {
5544   if (!CGF.HaveInsertPoint())
5545     return;
5546 
5547   bool WithNowait = Options.WithNowait;
5548   bool SimpleReduction = Options.SimpleReduction;
5549 
5550   // Next code should be emitted for reduction:
5551   //
5552   // static kmp_critical_name lock = { 0 };
5553   //
5554   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5555   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5556   //  ...
5557   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5558   //  *(Type<n>-1*)rhs[<n>-1]);
5559   // }
5560   //
5561   // ...
5562   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5563   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5564   // RedList, reduce_func, &<lock>)) {
5565   // case 1:
5566   //  ...
5567   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5568   //  ...
5569   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5570   // break;
5571   // case 2:
5572   //  ...
5573   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5574   //  ...
5575   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5576   // break;
5577   // default:;
5578   // }
5579   //
5580   // if SimpleReduction is true, only the next code is generated:
5581   //  ...
5582   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5583   //  ...
5584 
5585   ASTContext &C = CGM.getContext();
5586 
5587   if (SimpleReduction) {
5588     CodeGenFunction::RunCleanupsScope Scope(CGF);
5589     auto IPriv = Privates.begin();
5590     auto ILHS = LHSExprs.begin();
5591     auto IRHS = RHSExprs.begin();
5592     for (const Expr *E : ReductionOps) {
5593       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5594                                   cast<DeclRefExpr>(*IRHS));
5595       ++IPriv;
5596       ++ILHS;
5597       ++IRHS;
5598     }
5599     return;
5600   }
5601 
5602   // 1. Build a list of reduction variables.
5603   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5604   auto Size = RHSExprs.size();
5605   for (const Expr *E : Privates) {
5606     if (E->getType()->isVariablyModifiedType())
5607       // Reserve place for array size.
5608       ++Size;
5609   }
5610   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5611   QualType ReductionArrayTy =
5612       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5613                              /*IndexTypeQuals=*/0);
5614   Address ReductionList =
5615       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5616   auto IPriv = Privates.begin();
5617   unsigned Idx = 0;
5618   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5619     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5620     CGF.Builder.CreateStore(
5621         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5622             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5623         Elem);
5624     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5625       // Store array size.
5626       ++Idx;
5627       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5628       llvm::Value *Size = CGF.Builder.CreateIntCast(
5629           CGF.getVLASize(
5630                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5631               .NumElts,
5632           CGF.SizeTy, /*isSigned=*/false);
5633       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5634                               Elem);
5635     }
5636   }
5637 
5638   // 2. Emit reduce_func().
5639   llvm::Function *ReductionFn = emitReductionFunction(
5640       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5641       LHSExprs, RHSExprs, ReductionOps);
5642 
5643   // 3. Create static kmp_critical_name lock = { 0 };
5644   std::string Name = getName({"reduction"});
5645   llvm::Value *Lock = getCriticalRegionLock(Name);
5646 
5647   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5648   // RedList, reduce_func, &<lock>);
5649   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5650   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5651   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5652   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5653       ReductionList.getPointer(), CGF.VoidPtrTy);
5654   llvm::Value *Args[] = {
5655       IdentTLoc,                             // ident_t *<loc>
5656       ThreadId,                              // i32 <gtid>
5657       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5658       ReductionArrayTySize,                  // size_type sizeof(RedList)
5659       RL,                                    // void *RedList
5660       ReductionFn, // void (*) (void *, void *) <reduce_func>
5661       Lock         // kmp_critical_name *&<lock>
5662   };
5663   llvm::Value *Res = CGF.EmitRuntimeCall(
5664       OMPBuilder.getOrCreateRuntimeFunction(
5665           CGM.getModule(),
5666           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5667       Args);
5668 
5669   // 5. Build switch(res)
5670   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5671   llvm::SwitchInst *SwInst =
5672       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5673 
5674   // 6. Build case 1:
5675   //  ...
5676   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5677   //  ...
5678   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5679   // break;
5680   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5681   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5682   CGF.EmitBlock(Case1BB);
5683 
5684   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685   llvm::Value *EndArgs[] = {
5686       IdentTLoc, // ident_t *<loc>
5687       ThreadId,  // i32 <gtid>
5688       Lock       // kmp_critical_name *&<lock>
5689   };
5690   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5691                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5692     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5693     auto IPriv = Privates.begin();
5694     auto ILHS = LHSExprs.begin();
5695     auto IRHS = RHSExprs.begin();
5696     for (const Expr *E : ReductionOps) {
5697       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5698                                      cast<DeclRefExpr>(*IRHS));
5699       ++IPriv;
5700       ++ILHS;
5701       ++IRHS;
5702     }
5703   };
5704   RegionCodeGenTy RCG(CodeGen);
5705   CommonActionTy Action(
5706       nullptr, llvm::None,
5707       OMPBuilder.getOrCreateRuntimeFunction(
5708           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5709                                       : OMPRTL___kmpc_end_reduce),
5710       EndArgs);
5711   RCG.setAction(Action);
5712   RCG(CGF);
5713 
5714   CGF.EmitBranch(DefaultBB);
5715 
5716   // 7. Build case 2:
5717   //  ...
5718   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5719   //  ...
5720   // break;
5721   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5722   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5723   CGF.EmitBlock(Case2BB);
5724 
5725   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5726                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5727     auto ILHS = LHSExprs.begin();
5728     auto IRHS = RHSExprs.begin();
5729     auto IPriv = Privates.begin();
5730     for (const Expr *E : ReductionOps) {
5731       const Expr *XExpr = nullptr;
5732       const Expr *EExpr = nullptr;
5733       const Expr *UpExpr = nullptr;
5734       BinaryOperatorKind BO = BO_Comma;
5735       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5736         if (BO->getOpcode() == BO_Assign) {
5737           XExpr = BO->getLHS();
5738           UpExpr = BO->getRHS();
5739         }
5740       }
5741       // Try to emit update expression as a simple atomic.
5742       const Expr *RHSExpr = UpExpr;
5743       if (RHSExpr) {
5744         // Analyze RHS part of the whole expression.
5745         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5746                 RHSExpr->IgnoreParenImpCasts())) {
5747           // If this is a conditional operator, analyze its condition for
5748           // min/max reduction operator.
5749           RHSExpr = ACO->getCond();
5750         }
5751         if (const auto *BORHS =
5752                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5753           EExpr = BORHS->getRHS();
5754           BO = BORHS->getOpcode();
5755         }
5756       }
5757       if (XExpr) {
5758         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5759         auto &&AtomicRedGen = [BO, VD,
5760                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5761                                     const Expr *EExpr, const Expr *UpExpr) {
5762           LValue X = CGF.EmitLValue(XExpr);
5763           RValue E;
5764           if (EExpr)
5765             E = CGF.EmitAnyExpr(EExpr);
5766           CGF.EmitOMPAtomicSimpleUpdateExpr(
5767               X, E, BO, /*IsXLHSInRHSPart=*/true,
5768               llvm::AtomicOrdering::Monotonic, Loc,
5769               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5770                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5771                 PrivateScope.addPrivate(
5772                     VD, [&CGF, VD, XRValue, Loc]() {
5773                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5774                       CGF.emitOMPSimpleStore(
5775                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5776                           VD->getType().getNonReferenceType(), Loc);
5777                       return LHSTemp;
5778                     });
5779                 (void)PrivateScope.Privatize();
5780                 return CGF.EmitAnyExpr(UpExpr);
5781               });
5782         };
5783         if ((*IPriv)->getType()->isArrayType()) {
5784           // Emit atomic reduction for array section.
5785           const auto *RHSVar =
5786               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5787           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5788                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5789         } else {
5790           // Emit atomic reduction for array subscript or single variable.
5791           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5792         }
5793       } else {
5794         // Emit as a critical region.
5795         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5796                                            const Expr *, const Expr *) {
5797           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5798           std::string Name = RT.getName({"atomic_reduction"});
5799           RT.emitCriticalRegion(
5800               CGF, Name,
5801               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5802                 Action.Enter(CGF);
5803                 emitReductionCombiner(CGF, E);
5804               },
5805               Loc);
5806         };
5807         if ((*IPriv)->getType()->isArrayType()) {
5808           const auto *LHSVar =
5809               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5810           const auto *RHSVar =
5811               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5812           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5813                                     CritRedGen);
5814         } else {
5815           CritRedGen(CGF, nullptr, nullptr, nullptr);
5816         }
5817       }
5818       ++ILHS;
5819       ++IRHS;
5820       ++IPriv;
5821     }
5822   };
5823   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5824   if (!WithNowait) {
5825     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5826     llvm::Value *EndArgs[] = {
5827         IdentTLoc, // ident_t *<loc>
5828         ThreadId,  // i32 <gtid>
5829         Lock       // kmp_critical_name *&<lock>
5830     };
5831     CommonActionTy Action(nullptr, llvm::None,
5832                           OMPBuilder.getOrCreateRuntimeFunction(
5833                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5834                           EndArgs);
5835     AtomicRCG.setAction(Action);
5836     AtomicRCG(CGF);
5837   } else {
5838     AtomicRCG(CGF);
5839   }
5840 
5841   CGF.EmitBranch(DefaultBB);
5842   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5843 }
5844 
5845 /// Generates unique name for artificial threadprivate variables.
5846 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5847 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5848                                       const Expr *Ref) {
5849   SmallString<256> Buffer;
5850   llvm::raw_svector_ostream Out(Buffer);
5851   const clang::DeclRefExpr *DE;
5852   const VarDecl *D = ::getBaseDecl(Ref, DE);
5853   if (!D)
5854     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5855   D = D->getCanonicalDecl();
5856   std::string Name = CGM.getOpenMPRuntime().getName(
5857       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5858   Out << Prefix << Name << "_"
5859       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5860   return std::string(Out.str());
5861 }
5862 
5863 /// Emits reduction initializer function:
5864 /// \code
5865 /// void @.red_init(void* %arg, void* %orig) {
5866 /// %0 = bitcast void* %arg to <type>*
5867 /// store <type> <init>, <type>* %0
5868 /// ret void
5869 /// }
5870 /// \endcode
5871 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5872                                            SourceLocation Loc,
5873                                            ReductionCodeGen &RCG, unsigned N) {
5874   ASTContext &C = CGM.getContext();
5875   QualType VoidPtrTy = C.VoidPtrTy;
5876   VoidPtrTy.addRestrict();
5877   FunctionArgList Args;
5878   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5879                           ImplicitParamDecl::Other);
5880   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5881                               ImplicitParamDecl::Other);
5882   Args.emplace_back(&Param);
5883   Args.emplace_back(&ParamOrig);
5884   const auto &FnInfo =
5885       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5886   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5887   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5888   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5889                                     Name, &CGM.getModule());
5890   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5891   Fn->setDoesNotRecurse();
5892   CodeGenFunction CGF(CGM);
5893   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5894   Address PrivateAddr = CGF.EmitLoadOfPointer(
5895       CGF.GetAddrOfLocalVar(&Param),
5896       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5897   llvm::Value *Size = nullptr;
5898   // If the size of the reduction item is non-constant, load it from global
5899   // threadprivate variable.
5900   if (RCG.getSizes(N).second) {
5901     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5902         CGF, CGM.getContext().getSizeType(),
5903         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5904     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5905                                 CGM.getContext().getSizeType(), Loc);
5906   }
5907   RCG.emitAggregateType(CGF, N, Size);
5908   LValue OrigLVal;
5909   // If initializer uses initializer from declare reduction construct, emit a
5910   // pointer to the address of the original reduction item (reuired by reduction
5911   // initializer)
5912   if (RCG.usesReductionInitializer(N)) {
5913     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5914     SharedAddr = CGF.EmitLoadOfPointer(
5915         SharedAddr,
5916         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5917     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5918   } else {
5919     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5920         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5921         CGM.getContext().VoidPtrTy);
5922   }
5923   // Emit the initializer:
5924   // %0 = bitcast void* %arg to <type>*
5925   // store <type> <init>, <type>* %0
5926   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5927                          [](CodeGenFunction &) { return false; });
5928   CGF.FinishFunction();
5929   return Fn;
5930 }
5931 
5932 /// Emits reduction combiner function:
5933 /// \code
5934 /// void @.red_comb(void* %arg0, void* %arg1) {
5935 /// %lhs = bitcast void* %arg0 to <type>*
5936 /// %rhs = bitcast void* %arg1 to <type>*
5937 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5938 /// store <type> %2, <type>* %lhs
5939 /// ret void
5940 /// }
5941 /// \endcode
5942 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5943                                            SourceLocation Loc,
5944                                            ReductionCodeGen &RCG, unsigned N,
5945                                            const Expr *ReductionOp,
5946                                            const Expr *LHS, const Expr *RHS,
5947                                            const Expr *PrivateRef) {
5948   ASTContext &C = CGM.getContext();
5949   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5950   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5951   FunctionArgList Args;
5952   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5953                                C.VoidPtrTy, ImplicitParamDecl::Other);
5954   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5955                             ImplicitParamDecl::Other);
5956   Args.emplace_back(&ParamInOut);
5957   Args.emplace_back(&ParamIn);
5958   const auto &FnInfo =
5959       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5960   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5961   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5962   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5963                                     Name, &CGM.getModule());
5964   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5965   Fn->setDoesNotRecurse();
5966   CodeGenFunction CGF(CGM);
5967   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5968   llvm::Value *Size = nullptr;
5969   // If the size of the reduction item is non-constant, load it from global
5970   // threadprivate variable.
5971   if (RCG.getSizes(N).second) {
5972     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5973         CGF, CGM.getContext().getSizeType(),
5974         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5975     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5976                                 CGM.getContext().getSizeType(), Loc);
5977   }
5978   RCG.emitAggregateType(CGF, N, Size);
5979   // Remap lhs and rhs variables to the addresses of the function arguments.
5980   // %lhs = bitcast void* %arg0 to <type>*
5981   // %rhs = bitcast void* %arg1 to <type>*
5982   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5983   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5984     // Pull out the pointer to the variable.
5985     Address PtrAddr = CGF.EmitLoadOfPointer(
5986         CGF.GetAddrOfLocalVar(&ParamInOut),
5987         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988     return CGF.Builder.CreateElementBitCast(
5989         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5990   });
5991   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5992     // Pull out the pointer to the variable.
5993     Address PtrAddr = CGF.EmitLoadOfPointer(
5994         CGF.GetAddrOfLocalVar(&ParamIn),
5995         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5996     return CGF.Builder.CreateElementBitCast(
5997         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5998   });
5999   PrivateScope.Privatize();
6000   // Emit the combiner body:
6001   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6002   // store <type> %2, <type>* %lhs
6003   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6004       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6005       cast<DeclRefExpr>(RHS));
6006   CGF.FinishFunction();
6007   return Fn;
6008 }
6009 
6010 /// Emits reduction finalizer function:
6011 /// \code
6012 /// void @.red_fini(void* %arg) {
6013 /// %0 = bitcast void* %arg to <type>*
6014 /// <destroy>(<type>* %0)
6015 /// ret void
6016 /// }
6017 /// \endcode
6018 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6019                                            SourceLocation Loc,
6020                                            ReductionCodeGen &RCG, unsigned N) {
6021   if (!RCG.needCleanups(N))
6022     return nullptr;
6023   ASTContext &C = CGM.getContext();
6024   FunctionArgList Args;
6025   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6026                           ImplicitParamDecl::Other);
6027   Args.emplace_back(&Param);
6028   const auto &FnInfo =
6029       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6030   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6031   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6032   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6033                                     Name, &CGM.getModule());
6034   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6035   Fn->setDoesNotRecurse();
6036   CodeGenFunction CGF(CGM);
6037   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6038   Address PrivateAddr = CGF.EmitLoadOfPointer(
6039       CGF.GetAddrOfLocalVar(&Param),
6040       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6041   llvm::Value *Size = nullptr;
6042   // If the size of the reduction item is non-constant, load it from global
6043   // threadprivate variable.
6044   if (RCG.getSizes(N).second) {
6045     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6046         CGF, CGM.getContext().getSizeType(),
6047         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6048     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6049                                 CGM.getContext().getSizeType(), Loc);
6050   }
6051   RCG.emitAggregateType(CGF, N, Size);
6052   // Emit the finalizer body:
6053   // <destroy>(<type>* %0)
6054   RCG.emitCleanups(CGF, N, PrivateAddr);
6055   CGF.FinishFunction(Loc);
6056   return Fn;
6057 }
6058 
6059 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6060     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6061     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6062   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6063     return nullptr;
6064 
6065   // Build typedef struct:
6066   // kmp_taskred_input {
6067   //   void *reduce_shar; // shared reduction item
6068   //   void *reduce_orig; // original reduction item used for initialization
6069   //   size_t reduce_size; // size of data item
6070   //   void *reduce_init; // data initialization routine
6071   //   void *reduce_fini; // data finalization routine
6072   //   void *reduce_comb; // data combiner routine
6073   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6074   // } kmp_taskred_input_t;
6075   ASTContext &C = CGM.getContext();
6076   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6077   RD->startDefinition();
6078   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6079   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6080   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6081   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6085       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6086   RD->completeDefinition();
6087   QualType RDType = C.getRecordType(RD);
6088   unsigned Size = Data.ReductionVars.size();
6089   llvm::APInt ArraySize(/*numBits=*/64, Size);
6090   QualType ArrayRDType = C.getConstantArrayType(
6091       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6092   // kmp_task_red_input_t .rd_input.[Size];
6093   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6094   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6095                        Data.ReductionCopies, Data.ReductionOps);
6096   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6097     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6098     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6099                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6100     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6101         TaskRedInput.getPointer(), Idxs,
6102         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6103         ".rd_input.gep.");
6104     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6105     // ElemLVal.reduce_shar = &Shareds[Cnt];
6106     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6107     RCG.emitSharedOrigLValue(CGF, Cnt);
6108     llvm::Value *CastedShared =
6109         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6110     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6111     // ElemLVal.reduce_orig = &Origs[Cnt];
6112     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6113     llvm::Value *CastedOrig =
6114         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6115     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6116     RCG.emitAggregateType(CGF, Cnt);
6117     llvm::Value *SizeValInChars;
6118     llvm::Value *SizeVal;
6119     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6120     // We use delayed creation/initialization for VLAs and array sections. It is
6121     // required because runtime does not provide the way to pass the sizes of
6122     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6123     // threadprivate global variables are used to store these values and use
6124     // them in the functions.
6125     bool DelayedCreation = !!SizeVal;
6126     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6127                                                /*isSigned=*/false);
6128     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6129     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6130     // ElemLVal.reduce_init = init;
6131     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6132     llvm::Value *InitAddr =
6133         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6134     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6135     // ElemLVal.reduce_fini = fini;
6136     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6137     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6138     llvm::Value *FiniAddr = Fini
6139                                 ? CGF.EmitCastToVoidPtr(Fini)
6140                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6141     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6142     // ElemLVal.reduce_comb = comb;
6143     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6144     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6145         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6146         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6147     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6148     // ElemLVal.flags = 0;
6149     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6150     if (DelayedCreation) {
6151       CGF.EmitStoreOfScalar(
6152           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6153           FlagsLVal);
6154     } else
6155       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6156                                  FlagsLVal.getType());
6157   }
6158   if (Data.IsReductionWithTaskMod) {
6159     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6160     // is_ws, int num, void *data);
6161     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6162     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6163                                                   CGM.IntTy, /*isSigned=*/true);
6164     llvm::Value *Args[] = {
6165         IdentTLoc, GTid,
6166         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6167                                /*isSigned=*/true),
6168         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6169         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6170             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6171     return CGF.EmitRuntimeCall(
6172         OMPBuilder.getOrCreateRuntimeFunction(
6173             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6174         Args);
6175   }
6176   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6177   llvm::Value *Args[] = {
6178       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6179                                 /*isSigned=*/true),
6180       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6181       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6182                                                       CGM.VoidPtrTy)};
6183   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6184                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6185                              Args);
6186 }
6187 
6188 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6189                                             SourceLocation Loc,
6190                                             bool IsWorksharingReduction) {
6191   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6192   // is_ws, int num, void *data);
6193   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6194   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6195                                                 CGM.IntTy, /*isSigned=*/true);
6196   llvm::Value *Args[] = {IdentTLoc, GTid,
6197                          llvm::ConstantInt::get(CGM.IntTy,
6198                                                 IsWorksharingReduction ? 1 : 0,
6199                                                 /*isSigned=*/true)};
6200   (void)CGF.EmitRuntimeCall(
6201       OMPBuilder.getOrCreateRuntimeFunction(
6202           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6203       Args);
6204 }
6205 
6206 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6207                                               SourceLocation Loc,
6208                                               ReductionCodeGen &RCG,
6209                                               unsigned N) {
6210   auto Sizes = RCG.getSizes(N);
6211   // Emit threadprivate global variable if the type is non-constant
6212   // (Sizes.second = nullptr).
6213   if (Sizes.second) {
6214     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6215                                                      /*isSigned=*/false);
6216     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6217         CGF, CGM.getContext().getSizeType(),
6218         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6219     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6220   }
6221 }
6222 
6223 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6224                                               SourceLocation Loc,
6225                                               llvm::Value *ReductionsPtr,
6226                                               LValue SharedLVal) {
6227   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6228   // *d);
6229   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6230                                                    CGM.IntTy,
6231                                                    /*isSigned=*/true),
6232                          ReductionsPtr,
6233                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6234                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6235   return Address(
6236       CGF.EmitRuntimeCall(
6237           OMPBuilder.getOrCreateRuntimeFunction(
6238               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6239           Args),
6240       SharedLVal.getAlignment());
6241 }
6242 
6243 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6244                                        SourceLocation Loc) {
6245   if (!CGF.HaveInsertPoint())
6246     return;
6247 
6248   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6249     OMPBuilder.createTaskwait(CGF.Builder);
6250   } else {
6251     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6252     // global_tid);
6253     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6254     // Ignore return result until untied tasks are supported.
6255     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6256                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6257                         Args);
6258   }
6259 
6260   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6261     Region->emitUntiedSwitch(CGF);
6262 }
6263 
6264 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6265                                            OpenMPDirectiveKind InnerKind,
6266                                            const RegionCodeGenTy &CodeGen,
6267                                            bool HasCancel) {
6268   if (!CGF.HaveInsertPoint())
6269     return;
6270   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6271                                  InnerKind != OMPD_critical &&
6272                                      InnerKind != OMPD_master &&
6273                                      InnerKind != OMPD_masked);
6274   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6275 }
6276 
6277 namespace {
6278 enum RTCancelKind {
6279   CancelNoreq = 0,
6280   CancelParallel = 1,
6281   CancelLoop = 2,
6282   CancelSections = 3,
6283   CancelTaskgroup = 4
6284 };
6285 } // anonymous namespace
6286 
6287 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6288   RTCancelKind CancelKind = CancelNoreq;
6289   if (CancelRegion == OMPD_parallel)
6290     CancelKind = CancelParallel;
6291   else if (CancelRegion == OMPD_for)
6292     CancelKind = CancelLoop;
6293   else if (CancelRegion == OMPD_sections)
6294     CancelKind = CancelSections;
6295   else {
6296     assert(CancelRegion == OMPD_taskgroup);
6297     CancelKind = CancelTaskgroup;
6298   }
6299   return CancelKind;
6300 }
6301 
6302 void CGOpenMPRuntime::emitCancellationPointCall(
6303     CodeGenFunction &CGF, SourceLocation Loc,
6304     OpenMPDirectiveKind CancelRegion) {
6305   if (!CGF.HaveInsertPoint())
6306     return;
6307   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6308   // global_tid, kmp_int32 cncl_kind);
6309   if (auto *OMPRegionInfo =
6310           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6311     // For 'cancellation point taskgroup', the task region info may not have a
6312     // cancel. This may instead happen in another adjacent task.
6313     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6314       llvm::Value *Args[] = {
6315           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6316           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6317       // Ignore return result until untied tasks are supported.
6318       llvm::Value *Result = CGF.EmitRuntimeCall(
6319           OMPBuilder.getOrCreateRuntimeFunction(
6320               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6321           Args);
6322       // if (__kmpc_cancellationpoint()) {
6323       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6324       //   exit from construct;
6325       // }
6326       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6327       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6328       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6329       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6330       CGF.EmitBlock(ExitBB);
6331       if (CancelRegion == OMPD_parallel)
6332         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6333       // exit from construct;
6334       CodeGenFunction::JumpDest CancelDest =
6335           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6336       CGF.EmitBranchThroughCleanup(CancelDest);
6337       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6338     }
6339   }
6340 }
6341 
6342 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6343                                      const Expr *IfCond,
6344                                      OpenMPDirectiveKind CancelRegion) {
6345   if (!CGF.HaveInsertPoint())
6346     return;
6347   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6348   // kmp_int32 cncl_kind);
6349   auto &M = CGM.getModule();
6350   if (auto *OMPRegionInfo =
6351           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6352     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6353                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6354       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6355       llvm::Value *Args[] = {
6356           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6357           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6358       // Ignore return result until untied tasks are supported.
6359       llvm::Value *Result = CGF.EmitRuntimeCall(
6360           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6361       // if (__kmpc_cancel()) {
6362       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6363       //   exit from construct;
6364       // }
6365       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6366       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6367       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6368       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6369       CGF.EmitBlock(ExitBB);
6370       if (CancelRegion == OMPD_parallel)
6371         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6372       // exit from construct;
6373       CodeGenFunction::JumpDest CancelDest =
6374           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6375       CGF.EmitBranchThroughCleanup(CancelDest);
6376       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6377     };
6378     if (IfCond) {
6379       emitIfClause(CGF, IfCond, ThenGen,
6380                    [](CodeGenFunction &, PrePostActionTy &) {});
6381     } else {
6382       RegionCodeGenTy ThenRCG(ThenGen);
6383       ThenRCG(CGF);
6384     }
6385   }
6386 }
6387 
6388 namespace {
6389 /// Cleanup action for uses_allocators support.
6390 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6391   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6392 
6393 public:
6394   OMPUsesAllocatorsActionTy(
6395       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6396       : Allocators(Allocators) {}
6397   void Enter(CodeGenFunction &CGF) override {
6398     if (!CGF.HaveInsertPoint())
6399       return;
6400     for (const auto &AllocatorData : Allocators) {
6401       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6402           CGF, AllocatorData.first, AllocatorData.second);
6403     }
6404   }
6405   void Exit(CodeGenFunction &CGF) override {
6406     if (!CGF.HaveInsertPoint())
6407       return;
6408     for (const auto &AllocatorData : Allocators) {
6409       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6410                                                         AllocatorData.first);
6411     }
6412   }
6413 };
6414 } // namespace
6415 
6416 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6417     const OMPExecutableDirective &D, StringRef ParentName,
6418     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6419     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6420   assert(!ParentName.empty() && "Invalid target region parent name!");
6421   HasEmittedTargetRegion = true;
6422   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6423   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6424     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6425       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6426       if (!D.AllocatorTraits)
6427         continue;
6428       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6429     }
6430   }
6431   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6432   CodeGen.setAction(UsesAllocatorAction);
6433   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6434                                    IsOffloadEntry, CodeGen);
6435 }
6436 
6437 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6438                                              const Expr *Allocator,
6439                                              const Expr *AllocatorTraits) {
6440   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6441   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6442   // Use default memspace handle.
6443   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6444   llvm::Value *NumTraits = llvm::ConstantInt::get(
6445       CGF.IntTy, cast<ConstantArrayType>(
6446                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6447                      ->getSize()
6448                      .getLimitedValue());
6449   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6450   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6451       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6452   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6453                                            AllocatorTraitsLVal.getBaseInfo(),
6454                                            AllocatorTraitsLVal.getTBAAInfo());
6455   llvm::Value *Traits =
6456       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6457 
6458   llvm::Value *AllocatorVal =
6459       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6460                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6461                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6462   // Store to allocator.
6463   CGF.EmitVarDecl(*cast<VarDecl>(
6464       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6465   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6466   AllocatorVal =
6467       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6468                                Allocator->getType(), Allocator->getExprLoc());
6469   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6470 }
6471 
6472 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6473                                              const Expr *Allocator) {
6474   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6475   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6476   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6477   llvm::Value *AllocatorVal =
6478       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6479   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6480                                           CGF.getContext().VoidPtrTy,
6481                                           Allocator->getExprLoc());
6482   (void)CGF.EmitRuntimeCall(
6483       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6484                                             OMPRTL___kmpc_destroy_allocator),
6485       {ThreadId, AllocatorVal});
6486 }
6487 
6488 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6489     const OMPExecutableDirective &D, StringRef ParentName,
6490     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6491     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6492   // Create a unique name for the entry function using the source location
6493   // information of the current target region. The name will be something like:
6494   //
6495   // __omp_offloading_DD_FFFF_PP_lBB
6496   //
6497   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6498   // mangled name of the function that encloses the target region and BB is the
6499   // line number of the target region.
6500 
6501   unsigned DeviceID;
6502   unsigned FileID;
6503   unsigned Line;
6504   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6505                            Line);
6506   SmallString<64> EntryFnName;
6507   {
6508     llvm::raw_svector_ostream OS(EntryFnName);
6509     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6510        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6511   }
6512 
6513   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6514 
6515   CodeGenFunction CGF(CGM, true);
6516   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6517   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6518 
6519   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6520 
6521   // If this target outline function is not an offload entry, we don't need to
6522   // register it.
6523   if (!IsOffloadEntry)
6524     return;
6525 
6526   // The target region ID is used by the runtime library to identify the current
6527   // target region, so it only has to be unique and not necessarily point to
6528   // anything. It could be the pointer to the outlined function that implements
6529   // the target region, but we aren't using that so that the compiler doesn't
6530   // need to keep that, and could therefore inline the host function if proven
6531   // worthwhile during optimization. In the other hand, if emitting code for the
6532   // device, the ID has to be the function address so that it can retrieved from
6533   // the offloading entry and launched by the runtime library. We also mark the
6534   // outlined function to have external linkage in case we are emitting code for
6535   // the device, because these functions will be entry points to the device.
6536 
6537   if (CGM.getLangOpts().OpenMPIsDevice) {
6538     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6539     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6540     OutlinedFn->setDSOLocal(false);
6541     if (CGM.getTriple().isAMDGCN())
6542       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6543   } else {
6544     std::string Name = getName({EntryFnName, "region_id"});
6545     OutlinedFnID = new llvm::GlobalVariable(
6546         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6547         llvm::GlobalValue::WeakAnyLinkage,
6548         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6549   }
6550 
6551   // Register the information for the entry associated with this target region.
6552   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6553       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6554       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6555 
6556   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6557   int32_t DefaultValTeams = -1;
6558   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6559   if (DefaultValTeams > 0) {
6560     OutlinedFn->addFnAttr("omp_target_num_teams",
6561                           std::to_string(DefaultValTeams));
6562   }
6563   int32_t DefaultValThreads = -1;
6564   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6565   if (DefaultValThreads > 0) {
6566     OutlinedFn->addFnAttr("omp_target_thread_limit",
6567                           std::to_string(DefaultValThreads));
6568   }
6569 }
6570 
6571 /// Checks if the expression is constant or does not have non-trivial function
6572 /// calls.
6573 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6574   // We can skip constant expressions.
6575   // We can skip expressions with trivial calls or simple expressions.
6576   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6577           !E->hasNonTrivialCall(Ctx)) &&
6578          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6579 }
6580 
6581 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6582                                                     const Stmt *Body) {
6583   const Stmt *Child = Body->IgnoreContainers();
6584   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6585     Child = nullptr;
6586     for (const Stmt *S : C->body()) {
6587       if (const auto *E = dyn_cast<Expr>(S)) {
6588         if (isTrivial(Ctx, E))
6589           continue;
6590       }
6591       // Some of the statements can be ignored.
6592       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6593           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6594         continue;
6595       // Analyze declarations.
6596       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6597         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6598               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6599                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6600                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6601                   isa<UsingDirectiveDecl>(D) ||
6602                   isa<OMPDeclareReductionDecl>(D) ||
6603                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6604                 return true;
6605               const auto *VD = dyn_cast<VarDecl>(D);
6606               if (!VD)
6607                 return false;
6608               return VD->hasGlobalStorage() || !VD->isUsed();
6609             }))
6610           continue;
6611       }
6612       // Found multiple children - cannot get the one child only.
6613       if (Child)
6614         return nullptr;
6615       Child = S;
6616     }
6617     if (Child)
6618       Child = Child->IgnoreContainers();
6619   }
6620   return Child;
6621 }
6622 
6623 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6624     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6625     int32_t &DefaultVal) {
6626 
6627   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6628   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6629          "Expected target-based executable directive.");
6630   switch (DirectiveKind) {
6631   case OMPD_target: {
6632     const auto *CS = D.getInnermostCapturedStmt();
6633     const auto *Body =
6634         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6635     const Stmt *ChildStmt =
6636         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6637     if (const auto *NestedDir =
6638             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6639       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6640         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6641           const Expr *NumTeams =
6642               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6643           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6644             if (auto Constant =
6645                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6646               DefaultVal = Constant->getExtValue();
6647           return NumTeams;
6648         }
6649         DefaultVal = 0;
6650         return nullptr;
6651       }
6652       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6653           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6654         DefaultVal = 1;
6655         return nullptr;
6656       }
6657       DefaultVal = 1;
6658       return nullptr;
6659     }
6660     // A value of -1 is used to check if we need to emit no teams region
6661     DefaultVal = -1;
6662     return nullptr;
6663   }
6664   case OMPD_target_teams:
6665   case OMPD_target_teams_distribute:
6666   case OMPD_target_teams_distribute_simd:
6667   case OMPD_target_teams_distribute_parallel_for:
6668   case OMPD_target_teams_distribute_parallel_for_simd: {
6669     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6670       const Expr *NumTeams =
6671           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6673         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6674           DefaultVal = Constant->getExtValue();
6675       return NumTeams;
6676     }
6677     DefaultVal = 0;
6678     return nullptr;
6679   }
6680   case OMPD_target_parallel:
6681   case OMPD_target_parallel_for:
6682   case OMPD_target_parallel_for_simd:
6683   case OMPD_target_simd:
6684     DefaultVal = 1;
6685     return nullptr;
6686   case OMPD_parallel:
6687   case OMPD_for:
6688   case OMPD_parallel_for:
6689   case OMPD_parallel_master:
6690   case OMPD_parallel_sections:
6691   case OMPD_for_simd:
6692   case OMPD_parallel_for_simd:
6693   case OMPD_cancel:
6694   case OMPD_cancellation_point:
6695   case OMPD_ordered:
6696   case OMPD_threadprivate:
6697   case OMPD_allocate:
6698   case OMPD_task:
6699   case OMPD_simd:
6700   case OMPD_tile:
6701   case OMPD_unroll:
6702   case OMPD_sections:
6703   case OMPD_section:
6704   case OMPD_single:
6705   case OMPD_master:
6706   case OMPD_critical:
6707   case OMPD_taskyield:
6708   case OMPD_barrier:
6709   case OMPD_taskwait:
6710   case OMPD_taskgroup:
6711   case OMPD_atomic:
6712   case OMPD_flush:
6713   case OMPD_depobj:
6714   case OMPD_scan:
6715   case OMPD_teams:
6716   case OMPD_target_data:
6717   case OMPD_target_exit_data:
6718   case OMPD_target_enter_data:
6719   case OMPD_distribute:
6720   case OMPD_distribute_simd:
6721   case OMPD_distribute_parallel_for:
6722   case OMPD_distribute_parallel_for_simd:
6723   case OMPD_teams_distribute:
6724   case OMPD_teams_distribute_simd:
6725   case OMPD_teams_distribute_parallel_for:
6726   case OMPD_teams_distribute_parallel_for_simd:
6727   case OMPD_target_update:
6728   case OMPD_declare_simd:
6729   case OMPD_declare_variant:
6730   case OMPD_begin_declare_variant:
6731   case OMPD_end_declare_variant:
6732   case OMPD_declare_target:
6733   case OMPD_end_declare_target:
6734   case OMPD_declare_reduction:
6735   case OMPD_declare_mapper:
6736   case OMPD_taskloop:
6737   case OMPD_taskloop_simd:
6738   case OMPD_master_taskloop:
6739   case OMPD_master_taskloop_simd:
6740   case OMPD_parallel_master_taskloop:
6741   case OMPD_parallel_master_taskloop_simd:
6742   case OMPD_requires:
6743   case OMPD_unknown:
6744     break;
6745   default:
6746     break;
6747   }
6748   llvm_unreachable("Unexpected directive kind.");
6749 }
6750 
6751 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6752     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6753   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6754          "Clauses associated with the teams directive expected to be emitted "
6755          "only for the host!");
6756   CGBuilderTy &Bld = CGF.Builder;
6757   int32_t DefaultNT = -1;
6758   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6759   if (NumTeams != nullptr) {
6760     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6761 
6762     switch (DirectiveKind) {
6763     case OMPD_target: {
6764       const auto *CS = D.getInnermostCapturedStmt();
6765       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6766       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6768                                                   /*IgnoreResultAssign*/ true);
6769       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6770                              /*isSigned=*/true);
6771     }
6772     case OMPD_target_teams:
6773     case OMPD_target_teams_distribute:
6774     case OMPD_target_teams_distribute_simd:
6775     case OMPD_target_teams_distribute_parallel_for:
6776     case OMPD_target_teams_distribute_parallel_for_simd: {
6777       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6778       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6779                                                   /*IgnoreResultAssign*/ true);
6780       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6781                              /*isSigned=*/true);
6782     }
6783     default:
6784       break;
6785     }
6786   } else if (DefaultNT == -1) {
6787     return nullptr;
6788   }
6789 
6790   return Bld.getInt32(DefaultNT);
6791 }
6792 
6793 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6794                                   llvm::Value *DefaultThreadLimitVal) {
6795   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6796       CGF.getContext(), CS->getCapturedStmt());
6797   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6798     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6799       llvm::Value *NumThreads = nullptr;
6800       llvm::Value *CondVal = nullptr;
6801       // Handle if clause. If if clause present, the number of threads is
6802       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6803       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6804         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6805         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6806         const OMPIfClause *IfClause = nullptr;
6807         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6808           if (C->getNameModifier() == OMPD_unknown ||
6809               C->getNameModifier() == OMPD_parallel) {
6810             IfClause = C;
6811             break;
6812           }
6813         }
6814         if (IfClause) {
6815           const Expr *Cond = IfClause->getCondition();
6816           bool Result;
6817           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6818             if (!Result)
6819               return CGF.Builder.getInt32(1);
6820           } else {
6821             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6822             if (const auto *PreInit =
6823                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6824               for (const auto *I : PreInit->decls()) {
6825                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6826                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6827                 } else {
6828                   CodeGenFunction::AutoVarEmission Emission =
6829                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6830                   CGF.EmitAutoVarCleanups(Emission);
6831                 }
6832               }
6833             }
6834             CondVal = CGF.EvaluateExprAsBool(Cond);
6835           }
6836         }
6837       }
6838       // Check the value of num_threads clause iff if clause was not specified
6839       // or is not evaluated to false.
6840       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6841         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6842         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6843         const auto *NumThreadsClause =
6844             Dir->getSingleClause<OMPNumThreadsClause>();
6845         CodeGenFunction::LexicalScope Scope(
6846             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6847         if (const auto *PreInit =
6848                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6849           for (const auto *I : PreInit->decls()) {
6850             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6851               CGF.EmitVarDecl(cast<VarDecl>(*I));
6852             } else {
6853               CodeGenFunction::AutoVarEmission Emission =
6854                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6855               CGF.EmitAutoVarCleanups(Emission);
6856             }
6857           }
6858         }
6859         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6860         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6861                                                /*isSigned=*/false);
6862         if (DefaultThreadLimitVal)
6863           NumThreads = CGF.Builder.CreateSelect(
6864               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6865               DefaultThreadLimitVal, NumThreads);
6866       } else {
6867         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6868                                            : CGF.Builder.getInt32(0);
6869       }
6870       // Process condition of the if clause.
6871       if (CondVal) {
6872         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6873                                               CGF.Builder.getInt32(1));
6874       }
6875       return NumThreads;
6876     }
6877     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6878       return CGF.Builder.getInt32(1);
6879     return DefaultThreadLimitVal;
6880   }
6881   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6882                                : CGF.Builder.getInt32(0);
6883 }
6884 
6885 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6886     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6887     int32_t &DefaultVal) {
6888   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6889   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6890          "Expected target-based executable directive.");
6891 
6892   switch (DirectiveKind) {
6893   case OMPD_target:
6894     // Teams have no clause thread_limit
6895     return nullptr;
6896   case OMPD_target_teams:
6897   case OMPD_target_teams_distribute:
6898     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6899       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6900       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6901       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6902         if (auto Constant =
6903                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6904           DefaultVal = Constant->getExtValue();
6905       return ThreadLimit;
6906     }
6907     return nullptr;
6908   case OMPD_target_parallel:
6909   case OMPD_target_parallel_for:
6910   case OMPD_target_parallel_for_simd:
6911   case OMPD_target_teams_distribute_parallel_for:
6912   case OMPD_target_teams_distribute_parallel_for_simd: {
6913     Expr *ThreadLimit = nullptr;
6914     Expr *NumThreads = nullptr;
6915     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6916       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6917       ThreadLimit = ThreadLimitClause->getThreadLimit();
6918       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6919         if (auto Constant =
6920                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6921           DefaultVal = Constant->getExtValue();
6922     }
6923     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6924       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6925       NumThreads = NumThreadsClause->getNumThreads();
6926       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6927         if (auto Constant =
6928                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6929           if (Constant->getExtValue() < DefaultVal) {
6930             DefaultVal = Constant->getExtValue();
6931             ThreadLimit = NumThreads;
6932           }
6933         }
6934       }
6935     }
6936     return ThreadLimit;
6937   }
6938   case OMPD_target_teams_distribute_simd:
6939   case OMPD_target_simd:
6940     DefaultVal = 1;
6941     return nullptr;
6942   case OMPD_parallel:
6943   case OMPD_for:
6944   case OMPD_parallel_for:
6945   case OMPD_parallel_master:
6946   case OMPD_parallel_sections:
6947   case OMPD_for_simd:
6948   case OMPD_parallel_for_simd:
6949   case OMPD_cancel:
6950   case OMPD_cancellation_point:
6951   case OMPD_ordered:
6952   case OMPD_threadprivate:
6953   case OMPD_allocate:
6954   case OMPD_task:
6955   case OMPD_simd:
6956   case OMPD_tile:
6957   case OMPD_unroll:
6958   case OMPD_sections:
6959   case OMPD_section:
6960   case OMPD_single:
6961   case OMPD_master:
6962   case OMPD_critical:
6963   case OMPD_taskyield:
6964   case OMPD_barrier:
6965   case OMPD_taskwait:
6966   case OMPD_taskgroup:
6967   case OMPD_atomic:
6968   case OMPD_flush:
6969   case OMPD_depobj:
6970   case OMPD_scan:
6971   case OMPD_teams:
6972   case OMPD_target_data:
6973   case OMPD_target_exit_data:
6974   case OMPD_target_enter_data:
6975   case OMPD_distribute:
6976   case OMPD_distribute_simd:
6977   case OMPD_distribute_parallel_for:
6978   case OMPD_distribute_parallel_for_simd:
6979   case OMPD_teams_distribute:
6980   case OMPD_teams_distribute_simd:
6981   case OMPD_teams_distribute_parallel_for:
6982   case OMPD_teams_distribute_parallel_for_simd:
6983   case OMPD_target_update:
6984   case OMPD_declare_simd:
6985   case OMPD_declare_variant:
6986   case OMPD_begin_declare_variant:
6987   case OMPD_end_declare_variant:
6988   case OMPD_declare_target:
6989   case OMPD_end_declare_target:
6990   case OMPD_declare_reduction:
6991   case OMPD_declare_mapper:
6992   case OMPD_taskloop:
6993   case OMPD_taskloop_simd:
6994   case OMPD_master_taskloop:
6995   case OMPD_master_taskloop_simd:
6996   case OMPD_parallel_master_taskloop:
6997   case OMPD_parallel_master_taskloop_simd:
6998   case OMPD_requires:
6999   case OMPD_unknown:
7000     break;
7001   default:
7002     break;
7003   }
7004   llvm_unreachable("Unsupported directive kind.");
7005 }
7006 
7007 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7008     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7009   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7010          "Clauses associated with the teams directive expected to be emitted "
7011          "only for the host!");
7012   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7013   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7014          "Expected target-based executable directive.");
7015   CGBuilderTy &Bld = CGF.Builder;
7016   llvm::Value *ThreadLimitVal = nullptr;
7017   llvm::Value *NumThreadsVal = nullptr;
7018   switch (DirectiveKind) {
7019   case OMPD_target: {
7020     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7021     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7022       return NumThreads;
7023     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7024         CGF.getContext(), CS->getCapturedStmt());
7025     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7026       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7027         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7028         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7029         const auto *ThreadLimitClause =
7030             Dir->getSingleClause<OMPThreadLimitClause>();
7031         CodeGenFunction::LexicalScope Scope(
7032             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7033         if (const auto *PreInit =
7034                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7035           for (const auto *I : PreInit->decls()) {
7036             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7037               CGF.EmitVarDecl(cast<VarDecl>(*I));
7038             } else {
7039               CodeGenFunction::AutoVarEmission Emission =
7040                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7041               CGF.EmitAutoVarCleanups(Emission);
7042             }
7043           }
7044         }
7045         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7046             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7047         ThreadLimitVal =
7048             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7049       }
7050       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7051           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7052         CS = Dir->getInnermostCapturedStmt();
7053         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7054             CGF.getContext(), CS->getCapturedStmt());
7055         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7056       }
7057       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7058           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7059         CS = Dir->getInnermostCapturedStmt();
7060         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7061           return NumThreads;
7062       }
7063       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7064         return Bld.getInt32(1);
7065     }
7066     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7067   }
7068   case OMPD_target_teams: {
7069     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7070       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7071       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7072       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7073           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7074       ThreadLimitVal =
7075           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7076     }
7077     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7078     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7079       return NumThreads;
7080     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7081         CGF.getContext(), CS->getCapturedStmt());
7082     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7083       if (Dir->getDirectiveKind() == OMPD_distribute) {
7084         CS = Dir->getInnermostCapturedStmt();
7085         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7086           return NumThreads;
7087       }
7088     }
7089     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7090   }
7091   case OMPD_target_teams_distribute:
7092     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7093       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7094       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7095       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7096           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7097       ThreadLimitVal =
7098           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7099     }
7100     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7101   case OMPD_target_parallel:
7102   case OMPD_target_parallel_for:
7103   case OMPD_target_parallel_for_simd:
7104   case OMPD_target_teams_distribute_parallel_for:
7105   case OMPD_target_teams_distribute_parallel_for_simd: {
7106     llvm::Value *CondVal = nullptr;
7107     // Handle if clause. If if clause present, the number of threads is
7108     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7109     if (D.hasClausesOfKind<OMPIfClause>()) {
7110       const OMPIfClause *IfClause = nullptr;
7111       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7112         if (C->getNameModifier() == OMPD_unknown ||
7113             C->getNameModifier() == OMPD_parallel) {
7114           IfClause = C;
7115           break;
7116         }
7117       }
7118       if (IfClause) {
7119         const Expr *Cond = IfClause->getCondition();
7120         bool Result;
7121         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7122           if (!Result)
7123             return Bld.getInt32(1);
7124         } else {
7125           CodeGenFunction::RunCleanupsScope Scope(CGF);
7126           CondVal = CGF.EvaluateExprAsBool(Cond);
7127         }
7128       }
7129     }
7130     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7131       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7132       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7133       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7134           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7135       ThreadLimitVal =
7136           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7137     }
7138     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7139       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7140       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7141       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7142           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7143       NumThreadsVal =
7144           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7145       ThreadLimitVal = ThreadLimitVal
7146                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7147                                                                 ThreadLimitVal),
7148                                               NumThreadsVal, ThreadLimitVal)
7149                            : NumThreadsVal;
7150     }
7151     if (!ThreadLimitVal)
7152       ThreadLimitVal = Bld.getInt32(0);
7153     if (CondVal)
7154       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7155     return ThreadLimitVal;
7156   }
7157   case OMPD_target_teams_distribute_simd:
7158   case OMPD_target_simd:
7159     return Bld.getInt32(1);
7160   case OMPD_parallel:
7161   case OMPD_for:
7162   case OMPD_parallel_for:
7163   case OMPD_parallel_master:
7164   case OMPD_parallel_sections:
7165   case OMPD_for_simd:
7166   case OMPD_parallel_for_simd:
7167   case OMPD_cancel:
7168   case OMPD_cancellation_point:
7169   case OMPD_ordered:
7170   case OMPD_threadprivate:
7171   case OMPD_allocate:
7172   case OMPD_task:
7173   case OMPD_simd:
7174   case OMPD_tile:
7175   case OMPD_unroll:
7176   case OMPD_sections:
7177   case OMPD_section:
7178   case OMPD_single:
7179   case OMPD_master:
7180   case OMPD_critical:
7181   case OMPD_taskyield:
7182   case OMPD_barrier:
7183   case OMPD_taskwait:
7184   case OMPD_taskgroup:
7185   case OMPD_atomic:
7186   case OMPD_flush:
7187   case OMPD_depobj:
7188   case OMPD_scan:
7189   case OMPD_teams:
7190   case OMPD_target_data:
7191   case OMPD_target_exit_data:
7192   case OMPD_target_enter_data:
7193   case OMPD_distribute:
7194   case OMPD_distribute_simd:
7195   case OMPD_distribute_parallel_for:
7196   case OMPD_distribute_parallel_for_simd:
7197   case OMPD_teams_distribute:
7198   case OMPD_teams_distribute_simd:
7199   case OMPD_teams_distribute_parallel_for:
7200   case OMPD_teams_distribute_parallel_for_simd:
7201   case OMPD_target_update:
7202   case OMPD_declare_simd:
7203   case OMPD_declare_variant:
7204   case OMPD_begin_declare_variant:
7205   case OMPD_end_declare_variant:
7206   case OMPD_declare_target:
7207   case OMPD_end_declare_target:
7208   case OMPD_declare_reduction:
7209   case OMPD_declare_mapper:
7210   case OMPD_taskloop:
7211   case OMPD_taskloop_simd:
7212   case OMPD_master_taskloop:
7213   case OMPD_master_taskloop_simd:
7214   case OMPD_parallel_master_taskloop:
7215   case OMPD_parallel_master_taskloop_simd:
7216   case OMPD_requires:
7217   case OMPD_unknown:
7218     break;
7219   default:
7220     break;
7221   }
7222   llvm_unreachable("Unsupported directive kind.");
7223 }
7224 
7225 namespace {
7226 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7227 
7228 // Utility to handle information from clauses associated with a given
7229 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7230 // It provides a convenient interface to obtain the information and generate
7231 // code for that information.
7232 class MappableExprsHandler {
7233 public:
7234   /// Values for bit flags used to specify the mapping type for
7235   /// offloading.
7236   enum OpenMPOffloadMappingFlags : uint64_t {
7237     /// No flags
7238     OMP_MAP_NONE = 0x0,
7239     /// Allocate memory on the device and move data from host to device.
7240     OMP_MAP_TO = 0x01,
7241     /// Allocate memory on the device and move data from device to host.
7242     OMP_MAP_FROM = 0x02,
7243     /// Always perform the requested mapping action on the element, even
7244     /// if it was already mapped before.
7245     OMP_MAP_ALWAYS = 0x04,
7246     /// Delete the element from the device environment, ignoring the
7247     /// current reference count associated with the element.
7248     OMP_MAP_DELETE = 0x08,
7249     /// The element being mapped is a pointer-pointee pair; both the
7250     /// pointer and the pointee should be mapped.
7251     OMP_MAP_PTR_AND_OBJ = 0x10,
7252     /// This flags signals that the base address of an entry should be
7253     /// passed to the target kernel as an argument.
7254     OMP_MAP_TARGET_PARAM = 0x20,
7255     /// Signal that the runtime library has to return the device pointer
7256     /// in the current position for the data being mapped. Used when we have the
7257     /// use_device_ptr or use_device_addr clause.
7258     OMP_MAP_RETURN_PARAM = 0x40,
7259     /// This flag signals that the reference being passed is a pointer to
7260     /// private data.
7261     OMP_MAP_PRIVATE = 0x80,
7262     /// Pass the element to the device by value.
7263     OMP_MAP_LITERAL = 0x100,
7264     /// Implicit map
7265     OMP_MAP_IMPLICIT = 0x200,
7266     /// Close is a hint to the runtime to allocate memory close to
7267     /// the target device.
7268     OMP_MAP_CLOSE = 0x400,
7269     /// 0x800 is reserved for compatibility with XLC.
7270     /// Produce a runtime error if the data is not already allocated.
7271     OMP_MAP_PRESENT = 0x1000,
7272     // Increment and decrement a separate reference counter so that the data
7273     // cannot be unmapped within the associated region.  Thus, this flag is
7274     // intended to be used on 'target' and 'target data' directives because they
7275     // are inherently structured.  It is not intended to be used on 'target
7276     // enter data' and 'target exit data' directives because they are inherently
7277     // dynamic.
7278     // This is an OpenMP extension for the sake of OpenACC support.
7279     OMP_MAP_OMPX_HOLD = 0x2000,
7280     /// Signal that the runtime library should use args as an array of
7281     /// descriptor_dim pointers and use args_size as dims. Used when we have
7282     /// non-contiguous list items in target update directive
7283     OMP_MAP_NON_CONTIG = 0x100000000000,
7284     /// The 16 MSBs of the flags indicate whether the entry is member of some
7285     /// struct/class.
7286     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7287     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7288   };
7289 
7290   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7291   static unsigned getFlagMemberOffset() {
7292     unsigned Offset = 0;
7293     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7294          Remain = Remain >> 1)
7295       Offset++;
7296     return Offset;
7297   }
7298 
7299   /// Class that holds debugging information for a data mapping to be passed to
7300   /// the runtime library.
7301   class MappingExprInfo {
7302     /// The variable declaration used for the data mapping.
7303     const ValueDecl *MapDecl = nullptr;
7304     /// The original expression used in the map clause, or null if there is
7305     /// none.
7306     const Expr *MapExpr = nullptr;
7307 
7308   public:
7309     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7310         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7311 
7312     const ValueDecl *getMapDecl() const { return MapDecl; }
7313     const Expr *getMapExpr() const { return MapExpr; }
7314   };
7315 
7316   /// Class that associates information with a base pointer to be passed to the
7317   /// runtime library.
7318   class BasePointerInfo {
7319     /// The base pointer.
7320     llvm::Value *Ptr = nullptr;
7321     /// The base declaration that refers to this device pointer, or null if
7322     /// there is none.
7323     const ValueDecl *DevPtrDecl = nullptr;
7324 
7325   public:
7326     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7327         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7328     llvm::Value *operator*() const { return Ptr; }
7329     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7330     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7331   };
7332 
7333   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7334   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7335   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7336   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7337   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7338   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7339   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7340 
7341   /// This structure contains combined information generated for mappable
7342   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7343   /// mappers, and non-contiguous information.
7344   struct MapCombinedInfoTy {
7345     struct StructNonContiguousInfo {
7346       bool IsNonContiguous = false;
7347       MapDimArrayTy Dims;
7348       MapNonContiguousArrayTy Offsets;
7349       MapNonContiguousArrayTy Counts;
7350       MapNonContiguousArrayTy Strides;
7351     };
7352     MapExprsArrayTy Exprs;
7353     MapBaseValuesArrayTy BasePointers;
7354     MapValuesArrayTy Pointers;
7355     MapValuesArrayTy Sizes;
7356     MapFlagsArrayTy Types;
7357     MapMappersArrayTy Mappers;
7358     StructNonContiguousInfo NonContigInfo;
7359 
7360     /// Append arrays in \a CurInfo.
7361     void append(MapCombinedInfoTy &CurInfo) {
7362       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7363       BasePointers.append(CurInfo.BasePointers.begin(),
7364                           CurInfo.BasePointers.end());
7365       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7366       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7367       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7368       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7369       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7370                                  CurInfo.NonContigInfo.Dims.end());
7371       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7372                                     CurInfo.NonContigInfo.Offsets.end());
7373       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7374                                    CurInfo.NonContigInfo.Counts.end());
7375       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7376                                     CurInfo.NonContigInfo.Strides.end());
7377     }
7378   };
7379 
7380   /// Map between a struct and the its lowest & highest elements which have been
7381   /// mapped.
7382   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7383   ///                    HE(FieldIndex, Pointer)}
7384   struct StructRangeInfoTy {
7385     MapCombinedInfoTy PreliminaryMapData;
7386     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7387         0, Address::invalid()};
7388     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7389         0, Address::invalid()};
7390     Address Base = Address::invalid();
7391     Address LB = Address::invalid();
7392     bool IsArraySection = false;
7393     bool HasCompleteRecord = false;
7394   };
7395 
7396 private:
7397   /// Kind that defines how a device pointer has to be returned.
7398   struct MapInfo {
7399     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7400     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7401     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7402     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7403     bool ReturnDevicePointer = false;
7404     bool IsImplicit = false;
7405     const ValueDecl *Mapper = nullptr;
7406     const Expr *VarRef = nullptr;
7407     bool ForDeviceAddr = false;
7408 
7409     MapInfo() = default;
7410     MapInfo(
7411         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7412         OpenMPMapClauseKind MapType,
7413         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7414         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7415         bool ReturnDevicePointer, bool IsImplicit,
7416         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7417         bool ForDeviceAddr = false)
7418         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7419           MotionModifiers(MotionModifiers),
7420           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7421           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7422   };
7423 
7424   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7425   /// member and there is no map information about it, then emission of that
7426   /// entry is deferred until the whole struct has been processed.
7427   struct DeferredDevicePtrEntryTy {
7428     const Expr *IE = nullptr;
7429     const ValueDecl *VD = nullptr;
7430     bool ForDeviceAddr = false;
7431 
7432     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7433                              bool ForDeviceAddr)
7434         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7435   };
7436 
7437   /// The target directive from where the mappable clauses were extracted. It
7438   /// is either a executable directive or a user-defined mapper directive.
7439   llvm::PointerUnion<const OMPExecutableDirective *,
7440                      const OMPDeclareMapperDecl *>
7441       CurDir;
7442 
7443   /// Function the directive is being generated for.
7444   CodeGenFunction &CGF;
7445 
7446   /// Set of all first private variables in the current directive.
7447   /// bool data is set to true if the variable is implicitly marked as
7448   /// firstprivate, false otherwise.
7449   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7450 
7451   /// Map between device pointer declarations and their expression components.
7452   /// The key value for declarations in 'this' is null.
7453   llvm::DenseMap<
7454       const ValueDecl *,
7455       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7456       DevPointersMap;
7457 
7458   llvm::Value *getExprTypeSize(const Expr *E) const {
7459     QualType ExprTy = E->getType().getCanonicalType();
7460 
7461     // Calculate the size for array shaping expression.
7462     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7463       llvm::Value *Size =
7464           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7465       for (const Expr *SE : OAE->getDimensions()) {
7466         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7467         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7468                                       CGF.getContext().getSizeType(),
7469                                       SE->getExprLoc());
7470         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7471       }
7472       return Size;
7473     }
7474 
7475     // Reference types are ignored for mapping purposes.
7476     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7477       ExprTy = RefTy->getPointeeType().getCanonicalType();
7478 
7479     // Given that an array section is considered a built-in type, we need to
7480     // do the calculation based on the length of the section instead of relying
7481     // on CGF.getTypeSize(E->getType()).
7482     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7483       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7484                             OAE->getBase()->IgnoreParenImpCasts())
7485                             .getCanonicalType();
7486 
7487       // If there is no length associated with the expression and lower bound is
7488       // not specified too, that means we are using the whole length of the
7489       // base.
7490       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7491           !OAE->getLowerBound())
7492         return CGF.getTypeSize(BaseTy);
7493 
7494       llvm::Value *ElemSize;
7495       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7496         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7497       } else {
7498         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7499         assert(ATy && "Expecting array type if not a pointer type.");
7500         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7501       }
7502 
7503       // If we don't have a length at this point, that is because we have an
7504       // array section with a single element.
7505       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7506         return ElemSize;
7507 
7508       if (const Expr *LenExpr = OAE->getLength()) {
7509         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7510         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7511                                              CGF.getContext().getSizeType(),
7512                                              LenExpr->getExprLoc());
7513         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7514       }
7515       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7516              OAE->getLowerBound() && "expected array_section[lb:].");
7517       // Size = sizetype - lb * elemtype;
7518       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7519       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7520       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7521                                        CGF.getContext().getSizeType(),
7522                                        OAE->getLowerBound()->getExprLoc());
7523       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7524       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7525       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7526       LengthVal = CGF.Builder.CreateSelect(
7527           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7528       return LengthVal;
7529     }
7530     return CGF.getTypeSize(ExprTy);
7531   }
7532 
7533   /// Return the corresponding bits for a given map clause modifier. Add
7534   /// a flag marking the map as a pointer if requested. Add a flag marking the
7535   /// map as the first one of a series of maps that relate to the same map
7536   /// expression.
7537   OpenMPOffloadMappingFlags getMapTypeBits(
7538       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7539       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7540       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7541     OpenMPOffloadMappingFlags Bits =
7542         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7543     switch (MapType) {
7544     case OMPC_MAP_alloc:
7545     case OMPC_MAP_release:
7546       // alloc and release is the default behavior in the runtime library,  i.e.
7547       // if we don't pass any bits alloc/release that is what the runtime is
7548       // going to do. Therefore, we don't need to signal anything for these two
7549       // type modifiers.
7550       break;
7551     case OMPC_MAP_to:
7552       Bits |= OMP_MAP_TO;
7553       break;
7554     case OMPC_MAP_from:
7555       Bits |= OMP_MAP_FROM;
7556       break;
7557     case OMPC_MAP_tofrom:
7558       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7559       break;
7560     case OMPC_MAP_delete:
7561       Bits |= OMP_MAP_DELETE;
7562       break;
7563     case OMPC_MAP_unknown:
7564       llvm_unreachable("Unexpected map type!");
7565     }
7566     if (AddPtrFlag)
7567       Bits |= OMP_MAP_PTR_AND_OBJ;
7568     if (AddIsTargetParamFlag)
7569       Bits |= OMP_MAP_TARGET_PARAM;
7570     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7571         != MapModifiers.end())
7572       Bits |= OMP_MAP_ALWAYS;
7573     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7574         != MapModifiers.end())
7575       Bits |= OMP_MAP_CLOSE;
7576     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7577             MapModifiers.end() ||
7578         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7579             MotionModifiers.end())
7580       Bits |= OMP_MAP_PRESENT;
7581     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold) !=
7582         MapModifiers.end())
7583       Bits |= OMP_MAP_OMPX_HOLD;
7584     if (IsNonContiguous)
7585       Bits |= OMP_MAP_NON_CONTIG;
7586     return Bits;
7587   }
7588 
7589   /// Return true if the provided expression is a final array section. A
7590   /// final array section, is one whose length can't be proved to be one.
7591   bool isFinalArraySectionExpression(const Expr *E) const {
7592     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7593 
7594     // It is not an array section and therefore not a unity-size one.
7595     if (!OASE)
7596       return false;
7597 
7598     // An array section with no colon always refer to a single element.
7599     if (OASE->getColonLocFirst().isInvalid())
7600       return false;
7601 
7602     const Expr *Length = OASE->getLength();
7603 
7604     // If we don't have a length we have to check if the array has size 1
7605     // for this dimension. Also, we should always expect a length if the
7606     // base type is pointer.
7607     if (!Length) {
7608       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7609                              OASE->getBase()->IgnoreParenImpCasts())
7610                              .getCanonicalType();
7611       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7612         return ATy->getSize().getSExtValue() != 1;
7613       // If we don't have a constant dimension length, we have to consider
7614       // the current section as having any size, so it is not necessarily
7615       // unitary. If it happen to be unity size, that's user fault.
7616       return true;
7617     }
7618 
7619     // Check if the length evaluates to 1.
7620     Expr::EvalResult Result;
7621     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7622       return true; // Can have more that size 1.
7623 
7624     llvm::APSInt ConstLength = Result.Val.getInt();
7625     return ConstLength.getSExtValue() != 1;
7626   }
7627 
7628   /// Generate the base pointers, section pointers, sizes, map type bits, and
7629   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7630   /// map type, map or motion modifiers, and expression components.
7631   /// \a IsFirstComponent should be set to true if the provided set of
7632   /// components is the first associated with a capture.
7633   void generateInfoForComponentList(
7634       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7635       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7636       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7637       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7638       bool IsFirstComponentList, bool IsImplicit,
7639       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7640       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7641       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7642           OverlappedElements = llvm::None) const {
7643     // The following summarizes what has to be generated for each map and the
7644     // types below. The generated information is expressed in this order:
7645     // base pointer, section pointer, size, flags
7646     // (to add to the ones that come from the map type and modifier).
7647     //
7648     // double d;
7649     // int i[100];
7650     // float *p;
7651     //
7652     // struct S1 {
7653     //   int i;
7654     //   float f[50];
7655     // }
7656     // struct S2 {
7657     //   int i;
7658     //   float f[50];
7659     //   S1 s;
7660     //   double *p;
7661     //   struct S2 *ps;
7662     //   int &ref;
7663     // }
7664     // S2 s;
7665     // S2 *ps;
7666     //
7667     // map(d)
7668     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7669     //
7670     // map(i)
7671     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7672     //
7673     // map(i[1:23])
7674     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7675     //
7676     // map(p)
7677     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7678     //
7679     // map(p[1:24])
7680     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7681     // in unified shared memory mode or for local pointers
7682     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7683     //
7684     // map(s)
7685     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7686     //
7687     // map(s.i)
7688     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7689     //
7690     // map(s.s.f)
7691     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7692     //
7693     // map(s.p)
7694     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7695     //
7696     // map(to: s.p[:22])
7697     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7698     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7699     // &(s.p), &(s.p[0]), 22*sizeof(double),
7700     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7701     // (*) alloc space for struct members, only this is a target parameter
7702     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7703     //      optimizes this entry out, same in the examples below)
7704     // (***) map the pointee (map: to)
7705     //
7706     // map(to: s.ref)
7707     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7708     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7709     // (*) alloc space for struct members, only this is a target parameter
7710     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7711     //      optimizes this entry out, same in the examples below)
7712     // (***) map the pointee (map: to)
7713     //
7714     // map(s.ps)
7715     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7716     //
7717     // map(from: s.ps->s.i)
7718     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7719     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7720     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7721     //
7722     // map(to: s.ps->ps)
7723     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7724     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7725     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7726     //
7727     // map(s.ps->ps->ps)
7728     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7729     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7730     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7731     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7732     //
7733     // map(to: s.ps->ps->s.f[:22])
7734     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7735     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7736     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7737     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7738     //
7739     // map(ps)
7740     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7741     //
7742     // map(ps->i)
7743     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7744     //
7745     // map(ps->s.f)
7746     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7747     //
7748     // map(from: ps->p)
7749     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7750     //
7751     // map(to: ps->p[:22])
7752     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7753     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7754     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7755     //
7756     // map(ps->ps)
7757     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7758     //
7759     // map(from: ps->ps->s.i)
7760     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7761     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7762     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7763     //
7764     // map(from: ps->ps->ps)
7765     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7766     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7767     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7768     //
7769     // map(ps->ps->ps->ps)
7770     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7771     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7772     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7773     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7774     //
7775     // map(to: ps->ps->ps->s.f[:22])
7776     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7777     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7778     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7779     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7780     //
7781     // map(to: s.f[:22]) map(from: s.p[:33])
7782     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7783     //     sizeof(double*) (**), TARGET_PARAM
7784     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7785     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7786     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7787     // (*) allocate contiguous space needed to fit all mapped members even if
7788     //     we allocate space for members not mapped (in this example,
7789     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7790     //     them as well because they fall between &s.f[0] and &s.p)
7791     //
7792     // map(from: s.f[:22]) map(to: ps->p[:33])
7793     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7794     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7795     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7796     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7797     // (*) the struct this entry pertains to is the 2nd element in the list of
7798     //     arguments, hence MEMBER_OF(2)
7799     //
7800     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7801     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7802     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7803     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7804     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7805     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7806     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7807     // (*) the struct this entry pertains to is the 4th element in the list
7808     //     of arguments, hence MEMBER_OF(4)
7809 
7810     // Track if the map information being generated is the first for a capture.
7811     bool IsCaptureFirstInfo = IsFirstComponentList;
7812     // When the variable is on a declare target link or in a to clause with
7813     // unified memory, a reference is needed to hold the host/device address
7814     // of the variable.
7815     bool RequiresReference = false;
7816 
7817     // Scan the components from the base to the complete expression.
7818     auto CI = Components.rbegin();
7819     auto CE = Components.rend();
7820     auto I = CI;
7821 
7822     // Track if the map information being generated is the first for a list of
7823     // components.
7824     bool IsExpressionFirstInfo = true;
7825     bool FirstPointerInComplexData = false;
7826     Address BP = Address::invalid();
7827     const Expr *AssocExpr = I->getAssociatedExpression();
7828     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7829     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7830     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7831 
7832     if (isa<MemberExpr>(AssocExpr)) {
7833       // The base is the 'this' pointer. The content of the pointer is going
7834       // to be the base of the field being mapped.
7835       BP = CGF.LoadCXXThisAddress();
7836     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7837                (OASE &&
7838                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7839       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7840     } else if (OAShE &&
7841                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7842       BP = Address(
7843           CGF.EmitScalarExpr(OAShE->getBase()),
7844           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7845     } else {
7846       // The base is the reference to the variable.
7847       // BP = &Var.
7848       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7849       if (const auto *VD =
7850               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7851         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7852                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7853           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7854               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7855                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7856             RequiresReference = true;
7857             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7858           }
7859         }
7860       }
7861 
7862       // If the variable is a pointer and is being dereferenced (i.e. is not
7863       // the last component), the base has to be the pointer itself, not its
7864       // reference. References are ignored for mapping purposes.
7865       QualType Ty =
7866           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7867       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7868         // No need to generate individual map information for the pointer, it
7869         // can be associated with the combined storage if shared memory mode is
7870         // active or the base declaration is not global variable.
7871         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7872         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7873             !VD || VD->hasLocalStorage())
7874           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7875         else
7876           FirstPointerInComplexData = true;
7877         ++I;
7878       }
7879     }
7880 
7881     // Track whether a component of the list should be marked as MEMBER_OF some
7882     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7883     // in a component list should be marked as MEMBER_OF, all subsequent entries
7884     // do not belong to the base struct. E.g.
7885     // struct S2 s;
7886     // s.ps->ps->ps->f[:]
7887     //   (1) (2) (3) (4)
7888     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7889     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7890     // is the pointee of ps(2) which is not member of struct s, so it should not
7891     // be marked as such (it is still PTR_AND_OBJ).
7892     // The variable is initialized to false so that PTR_AND_OBJ entries which
7893     // are not struct members are not considered (e.g. array of pointers to
7894     // data).
7895     bool ShouldBeMemberOf = false;
7896 
7897     // Variable keeping track of whether or not we have encountered a component
7898     // in the component list which is a member expression. Useful when we have a
7899     // pointer or a final array section, in which case it is the previous
7900     // component in the list which tells us whether we have a member expression.
7901     // E.g. X.f[:]
7902     // While processing the final array section "[:]" it is "f" which tells us
7903     // whether we are dealing with a member of a declared struct.
7904     const MemberExpr *EncounteredME = nullptr;
7905 
7906     // Track for the total number of dimension. Start from one for the dummy
7907     // dimension.
7908     uint64_t DimSize = 1;
7909 
7910     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7911     bool IsPrevMemberReference = false;
7912 
7913     for (; I != CE; ++I) {
7914       // If the current component is member of a struct (parent struct) mark it.
7915       if (!EncounteredME) {
7916         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7917         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7918         // as MEMBER_OF the parent struct.
7919         if (EncounteredME) {
7920           ShouldBeMemberOf = true;
7921           // Do not emit as complex pointer if this is actually not array-like
7922           // expression.
7923           if (FirstPointerInComplexData) {
7924             QualType Ty = std::prev(I)
7925                               ->getAssociatedDeclaration()
7926                               ->getType()
7927                               .getNonReferenceType();
7928             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7929             FirstPointerInComplexData = false;
7930           }
7931         }
7932       }
7933 
7934       auto Next = std::next(I);
7935 
7936       // We need to generate the addresses and sizes if this is the last
7937       // component, if the component is a pointer or if it is an array section
7938       // whose length can't be proved to be one. If this is a pointer, it
7939       // becomes the base address for the following components.
7940 
7941       // A final array section, is one whose length can't be proved to be one.
7942       // If the map item is non-contiguous then we don't treat any array section
7943       // as final array section.
7944       bool IsFinalArraySection =
7945           !IsNonContiguous &&
7946           isFinalArraySectionExpression(I->getAssociatedExpression());
7947 
7948       // If we have a declaration for the mapping use that, otherwise use
7949       // the base declaration of the map clause.
7950       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7951                                      ? I->getAssociatedDeclaration()
7952                                      : BaseDecl;
7953       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7954                                                : MapExpr;
7955 
7956       // Get information on whether the element is a pointer. Have to do a
7957       // special treatment for array sections given that they are built-in
7958       // types.
7959       const auto *OASE =
7960           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7961       const auto *OAShE =
7962           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7963       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7964       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7965       bool IsPointer =
7966           OAShE ||
7967           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7968                        .getCanonicalType()
7969                        ->isAnyPointerType()) ||
7970           I->getAssociatedExpression()->getType()->isAnyPointerType();
7971       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7972                                MapDecl &&
7973                                MapDecl->getType()->isLValueReferenceType();
7974       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7975 
7976       if (OASE)
7977         ++DimSize;
7978 
7979       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7980           IsFinalArraySection) {
7981         // If this is not the last component, we expect the pointer to be
7982         // associated with an array expression or member expression.
7983         assert((Next == CE ||
7984                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7985                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7986                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7987                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7988                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7989                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7990                "Unexpected expression");
7991 
7992         Address LB = Address::invalid();
7993         Address LowestElem = Address::invalid();
7994         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7995                                        const MemberExpr *E) {
7996           const Expr *BaseExpr = E->getBase();
7997           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7998           // scalar.
7999           LValue BaseLV;
8000           if (E->isArrow()) {
8001             LValueBaseInfo BaseInfo;
8002             TBAAAccessInfo TBAAInfo;
8003             Address Addr =
8004                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8005             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8006             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8007           } else {
8008             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8009           }
8010           return BaseLV;
8011         };
8012         if (OAShE) {
8013           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8014                                     CGF.getContext().getTypeAlignInChars(
8015                                         OAShE->getBase()->getType()));
8016         } else if (IsMemberReference) {
8017           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8018           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8019           LowestElem = CGF.EmitLValueForFieldInitialization(
8020                               BaseLVal, cast<FieldDecl>(MapDecl))
8021                            .getAddress(CGF);
8022           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8023                    .getAddress(CGF);
8024         } else {
8025           LowestElem = LB =
8026               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8027                   .getAddress(CGF);
8028         }
8029 
8030         // If this component is a pointer inside the base struct then we don't
8031         // need to create any entry for it - it will be combined with the object
8032         // it is pointing to into a single PTR_AND_OBJ entry.
8033         bool IsMemberPointerOrAddr =
8034             EncounteredME &&
8035             (((IsPointer || ForDeviceAddr) &&
8036               I->getAssociatedExpression() == EncounteredME) ||
8037              (IsPrevMemberReference && !IsPointer) ||
8038              (IsMemberReference && Next != CE &&
8039               !Next->getAssociatedExpression()->getType()->isPointerType()));
8040         if (!OverlappedElements.empty() && Next == CE) {
8041           // Handle base element with the info for overlapped elements.
8042           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8043           assert(!IsPointer &&
8044                  "Unexpected base element with the pointer type.");
8045           // Mark the whole struct as the struct that requires allocation on the
8046           // device.
8047           PartialStruct.LowestElem = {0, LowestElem};
8048           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8049               I->getAssociatedExpression()->getType());
8050           Address HB = CGF.Builder.CreateConstGEP(
8051               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8052                                                               CGF.VoidPtrTy),
8053               TypeSize.getQuantity() - 1);
8054           PartialStruct.HighestElem = {
8055               std::numeric_limits<decltype(
8056                   PartialStruct.HighestElem.first)>::max(),
8057               HB};
8058           PartialStruct.Base = BP;
8059           PartialStruct.LB = LB;
8060           assert(
8061               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8062               "Overlapped elements must be used only once for the variable.");
8063           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8064           // Emit data for non-overlapped data.
8065           OpenMPOffloadMappingFlags Flags =
8066               OMP_MAP_MEMBER_OF |
8067               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8068                              /*AddPtrFlag=*/false,
8069                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8070           llvm::Value *Size = nullptr;
8071           // Do bitcopy of all non-overlapped structure elements.
8072           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8073                    Component : OverlappedElements) {
8074             Address ComponentLB = Address::invalid();
8075             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8076                  Component) {
8077               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8078                 const auto *FD = dyn_cast<FieldDecl>(VD);
8079                 if (FD && FD->getType()->isLValueReferenceType()) {
8080                   const auto *ME =
8081                       cast<MemberExpr>(MC.getAssociatedExpression());
8082                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8083                   ComponentLB =
8084                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8085                           .getAddress(CGF);
8086                 } else {
8087                   ComponentLB =
8088                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8089                           .getAddress(CGF);
8090                 }
8091                 Size = CGF.Builder.CreatePtrDiff(
8092                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8093                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8094                 break;
8095               }
8096             }
8097             assert(Size && "Failed to determine structure size");
8098             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8099             CombinedInfo.BasePointers.push_back(BP.getPointer());
8100             CombinedInfo.Pointers.push_back(LB.getPointer());
8101             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8102                 Size, CGF.Int64Ty, /*isSigned=*/true));
8103             CombinedInfo.Types.push_back(Flags);
8104             CombinedInfo.Mappers.push_back(nullptr);
8105             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8106                                                                       : 1);
8107             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8108           }
8109           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8110           CombinedInfo.BasePointers.push_back(BP.getPointer());
8111           CombinedInfo.Pointers.push_back(LB.getPointer());
8112           Size = CGF.Builder.CreatePtrDiff(
8113               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8114               CGF.EmitCastToVoidPtr(LB.getPointer()));
8115           CombinedInfo.Sizes.push_back(
8116               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8117           CombinedInfo.Types.push_back(Flags);
8118           CombinedInfo.Mappers.push_back(nullptr);
8119           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8120                                                                     : 1);
8121           break;
8122         }
8123         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8124         if (!IsMemberPointerOrAddr ||
8125             (Next == CE && MapType != OMPC_MAP_unknown)) {
8126           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8127           CombinedInfo.BasePointers.push_back(BP.getPointer());
8128           CombinedInfo.Pointers.push_back(LB.getPointer());
8129           CombinedInfo.Sizes.push_back(
8130               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8131           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8132                                                                     : 1);
8133 
8134           // If Mapper is valid, the last component inherits the mapper.
8135           bool HasMapper = Mapper && Next == CE;
8136           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8137 
8138           // We need to add a pointer flag for each map that comes from the
8139           // same expression except for the first one. We also need to signal
8140           // this map is the first one that relates with the current capture
8141           // (there is a set of entries for each capture).
8142           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8143               MapType, MapModifiers, MotionModifiers, IsImplicit,
8144               !IsExpressionFirstInfo || RequiresReference ||
8145                   FirstPointerInComplexData || IsMemberReference,
8146               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8147 
8148           if (!IsExpressionFirstInfo || IsMemberReference) {
8149             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8150             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8151             if (IsPointer || (IsMemberReference && Next != CE))
8152               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8153                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8154 
8155             if (ShouldBeMemberOf) {
8156               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8157               // should be later updated with the correct value of MEMBER_OF.
8158               Flags |= OMP_MAP_MEMBER_OF;
8159               // From now on, all subsequent PTR_AND_OBJ entries should not be
8160               // marked as MEMBER_OF.
8161               ShouldBeMemberOf = false;
8162             }
8163           }
8164 
8165           CombinedInfo.Types.push_back(Flags);
8166         }
8167 
8168         // If we have encountered a member expression so far, keep track of the
8169         // mapped member. If the parent is "*this", then the value declaration
8170         // is nullptr.
8171         if (EncounteredME) {
8172           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8173           unsigned FieldIndex = FD->getFieldIndex();
8174 
8175           // Update info about the lowest and highest elements for this struct
8176           if (!PartialStruct.Base.isValid()) {
8177             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8178             if (IsFinalArraySection) {
8179               Address HB =
8180                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8181                       .getAddress(CGF);
8182               PartialStruct.HighestElem = {FieldIndex, HB};
8183             } else {
8184               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8185             }
8186             PartialStruct.Base = BP;
8187             PartialStruct.LB = BP;
8188           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8189             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8190           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8191             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8192           }
8193         }
8194 
8195         // Need to emit combined struct for array sections.
8196         if (IsFinalArraySection || IsNonContiguous)
8197           PartialStruct.IsArraySection = true;
8198 
8199         // If we have a final array section, we are done with this expression.
8200         if (IsFinalArraySection)
8201           break;
8202 
8203         // The pointer becomes the base for the next element.
8204         if (Next != CE)
8205           BP = IsMemberReference ? LowestElem : LB;
8206 
8207         IsExpressionFirstInfo = false;
8208         IsCaptureFirstInfo = false;
8209         FirstPointerInComplexData = false;
8210         IsPrevMemberReference = IsMemberReference;
8211       } else if (FirstPointerInComplexData) {
8212         QualType Ty = Components.rbegin()
8213                           ->getAssociatedDeclaration()
8214                           ->getType()
8215                           .getNonReferenceType();
8216         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8217         FirstPointerInComplexData = false;
8218       }
8219     }
8220     // If ran into the whole component - allocate the space for the whole
8221     // record.
8222     if (!EncounteredME)
8223       PartialStruct.HasCompleteRecord = true;
8224 
8225     if (!IsNonContiguous)
8226       return;
8227 
8228     const ASTContext &Context = CGF.getContext();
8229 
8230     // For supporting stride in array section, we need to initialize the first
8231     // dimension size as 1, first offset as 0, and first count as 1
8232     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8233     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8234     MapValuesArrayTy CurStrides;
8235     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8236     uint64_t ElementTypeSize;
8237 
8238     // Collect Size information for each dimension and get the element size as
8239     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8240     // should be [10, 10] and the first stride is 4 btyes.
8241     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8242          Components) {
8243       const Expr *AssocExpr = Component.getAssociatedExpression();
8244       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8245 
8246       if (!OASE)
8247         continue;
8248 
8249       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8250       auto *CAT = Context.getAsConstantArrayType(Ty);
8251       auto *VAT = Context.getAsVariableArrayType(Ty);
8252 
8253       // We need all the dimension size except for the last dimension.
8254       assert((VAT || CAT || &Component == &*Components.begin()) &&
8255              "Should be either ConstantArray or VariableArray if not the "
8256              "first Component");
8257 
8258       // Get element size if CurStrides is empty.
8259       if (CurStrides.empty()) {
8260         const Type *ElementType = nullptr;
8261         if (CAT)
8262           ElementType = CAT->getElementType().getTypePtr();
8263         else if (VAT)
8264           ElementType = VAT->getElementType().getTypePtr();
8265         else
8266           assert(&Component == &*Components.begin() &&
8267                  "Only expect pointer (non CAT or VAT) when this is the "
8268                  "first Component");
8269         // If ElementType is null, then it means the base is a pointer
8270         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8271         // for next iteration.
8272         if (ElementType) {
8273           // For the case that having pointer as base, we need to remove one
8274           // level of indirection.
8275           if (&Component != &*Components.begin())
8276             ElementType = ElementType->getPointeeOrArrayElementType();
8277           ElementTypeSize =
8278               Context.getTypeSizeInChars(ElementType).getQuantity();
8279           CurStrides.push_back(
8280               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8281         }
8282       }
8283       // Get dimension value except for the last dimension since we don't need
8284       // it.
8285       if (DimSizes.size() < Components.size() - 1) {
8286         if (CAT)
8287           DimSizes.push_back(llvm::ConstantInt::get(
8288               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8289         else if (VAT)
8290           DimSizes.push_back(CGF.Builder.CreateIntCast(
8291               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8292               /*IsSigned=*/false));
8293       }
8294     }
8295 
8296     // Skip the dummy dimension since we have already have its information.
8297     auto DI = DimSizes.begin() + 1;
8298     // Product of dimension.
8299     llvm::Value *DimProd =
8300         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8301 
8302     // Collect info for non-contiguous. Notice that offset, count, and stride
8303     // are only meaningful for array-section, so we insert a null for anything
8304     // other than array-section.
8305     // Also, the size of offset, count, and stride are not the same as
8306     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8307     // count, and stride are the same as the number of non-contiguous
8308     // declaration in target update to/from clause.
8309     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8310          Components) {
8311       const Expr *AssocExpr = Component.getAssociatedExpression();
8312 
8313       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8314         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8315             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8316             /*isSigned=*/false);
8317         CurOffsets.push_back(Offset);
8318         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8319         CurStrides.push_back(CurStrides.back());
8320         continue;
8321       }
8322 
8323       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8324 
8325       if (!OASE)
8326         continue;
8327 
8328       // Offset
8329       const Expr *OffsetExpr = OASE->getLowerBound();
8330       llvm::Value *Offset = nullptr;
8331       if (!OffsetExpr) {
8332         // If offset is absent, then we just set it to zero.
8333         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8334       } else {
8335         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8336                                            CGF.Int64Ty,
8337                                            /*isSigned=*/false);
8338       }
8339       CurOffsets.push_back(Offset);
8340 
8341       // Count
8342       const Expr *CountExpr = OASE->getLength();
8343       llvm::Value *Count = nullptr;
8344       if (!CountExpr) {
8345         // In Clang, once a high dimension is an array section, we construct all
8346         // the lower dimension as array section, however, for case like
8347         // arr[0:2][2], Clang construct the inner dimension as an array section
8348         // but it actually is not in an array section form according to spec.
8349         if (!OASE->getColonLocFirst().isValid() &&
8350             !OASE->getColonLocSecond().isValid()) {
8351           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8352         } else {
8353           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8354           // When the length is absent it defaults to ⌈(size −
8355           // lower-bound)/stride⌉, where size is the size of the array
8356           // dimension.
8357           const Expr *StrideExpr = OASE->getStride();
8358           llvm::Value *Stride =
8359               StrideExpr
8360                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8361                                               CGF.Int64Ty, /*isSigned=*/false)
8362                   : nullptr;
8363           if (Stride)
8364             Count = CGF.Builder.CreateUDiv(
8365                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8366           else
8367             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8368         }
8369       } else {
8370         Count = CGF.EmitScalarExpr(CountExpr);
8371       }
8372       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8373       CurCounts.push_back(Count);
8374 
8375       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8376       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8377       //              Offset      Count     Stride
8378       //    D0          0           1         4    (int)    <- dummy dimension
8379       //    D1          0           2         8    (2 * (1) * 4)
8380       //    D2          1           2         20   (1 * (1 * 5) * 4)
8381       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8382       const Expr *StrideExpr = OASE->getStride();
8383       llvm::Value *Stride =
8384           StrideExpr
8385               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8386                                           CGF.Int64Ty, /*isSigned=*/false)
8387               : nullptr;
8388       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8389       if (Stride)
8390         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8391       else
8392         CurStrides.push_back(DimProd);
8393       if (DI != DimSizes.end())
8394         ++DI;
8395     }
8396 
8397     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8398     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8399     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8400   }
8401 
8402   /// Return the adjusted map modifiers if the declaration a capture refers to
8403   /// appears in a first-private clause. This is expected to be used only with
8404   /// directives that start with 'target'.
8405   MappableExprsHandler::OpenMPOffloadMappingFlags
8406   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8407     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8408 
8409     // A first private variable captured by reference will use only the
8410     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8411     // declaration is known as first-private in this handler.
8412     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8413       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8414         return MappableExprsHandler::OMP_MAP_TO |
8415                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8416       return MappableExprsHandler::OMP_MAP_PRIVATE |
8417              MappableExprsHandler::OMP_MAP_TO;
8418     }
8419     return MappableExprsHandler::OMP_MAP_TO |
8420            MappableExprsHandler::OMP_MAP_FROM;
8421   }
8422 
8423   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8424     // Rotate by getFlagMemberOffset() bits.
8425     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8426                                                   << getFlagMemberOffset());
8427   }
8428 
8429   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8430                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8431     // If the entry is PTR_AND_OBJ but has not been marked with the special
8432     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8433     // marked as MEMBER_OF.
8434     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8435         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8436       return;
8437 
8438     // Reset the placeholder value to prepare the flag for the assignment of the
8439     // proper MEMBER_OF value.
8440     Flags &= ~OMP_MAP_MEMBER_OF;
8441     Flags |= MemberOfFlag;
8442   }
8443 
8444   void getPlainLayout(const CXXRecordDecl *RD,
8445                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8446                       bool AsBase) const {
8447     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8448 
8449     llvm::StructType *St =
8450         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8451 
8452     unsigned NumElements = St->getNumElements();
8453     llvm::SmallVector<
8454         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8455         RecordLayout(NumElements);
8456 
8457     // Fill bases.
8458     for (const auto &I : RD->bases()) {
8459       if (I.isVirtual())
8460         continue;
8461       const auto *Base = I.getType()->getAsCXXRecordDecl();
8462       // Ignore empty bases.
8463       if (Base->isEmpty() || CGF.getContext()
8464                                  .getASTRecordLayout(Base)
8465                                  .getNonVirtualSize()
8466                                  .isZero())
8467         continue;
8468 
8469       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8470       RecordLayout[FieldIndex] = Base;
8471     }
8472     // Fill in virtual bases.
8473     for (const auto &I : RD->vbases()) {
8474       const auto *Base = I.getType()->getAsCXXRecordDecl();
8475       // Ignore empty bases.
8476       if (Base->isEmpty())
8477         continue;
8478       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8479       if (RecordLayout[FieldIndex])
8480         continue;
8481       RecordLayout[FieldIndex] = Base;
8482     }
8483     // Fill in all the fields.
8484     assert(!RD->isUnion() && "Unexpected union.");
8485     for (const auto *Field : RD->fields()) {
8486       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8487       // will fill in later.)
8488       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8489         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8490         RecordLayout[FieldIndex] = Field;
8491       }
8492     }
8493     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8494              &Data : RecordLayout) {
8495       if (Data.isNull())
8496         continue;
8497       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8498         getPlainLayout(Base, Layout, /*AsBase=*/true);
8499       else
8500         Layout.push_back(Data.get<const FieldDecl *>());
8501     }
8502   }
8503 
8504   /// Generate all the base pointers, section pointers, sizes, map types, and
8505   /// mappers for the extracted mappable expressions (all included in \a
8506   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8507   /// pair of the relevant declaration and index where it occurs is appended to
8508   /// the device pointers info array.
8509   void generateAllInfoForClauses(
8510       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8511       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8512           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8513     // We have to process the component lists that relate with the same
8514     // declaration in a single chunk so that we can generate the map flags
8515     // correctly. Therefore, we organize all lists in a map.
8516     enum MapKind { Present, Allocs, Other, Total };
8517     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8518                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8519         Info;
8520 
8521     // Helper function to fill the information map for the different supported
8522     // clauses.
8523     auto &&InfoGen =
8524         [&Info, &SkipVarSet](
8525             const ValueDecl *D, MapKind Kind,
8526             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8527             OpenMPMapClauseKind MapType,
8528             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8529             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8530             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8531             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8532           if (SkipVarSet.contains(D))
8533             return;
8534           auto It = Info.find(D);
8535           if (It == Info.end())
8536             It = Info
8537                      .insert(std::make_pair(
8538                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8539                      .first;
8540           It->second[Kind].emplace_back(
8541               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8542               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8543         };
8544 
8545     for (const auto *Cl : Clauses) {
8546       const auto *C = dyn_cast<OMPMapClause>(Cl);
8547       if (!C)
8548         continue;
8549       MapKind Kind = Other;
8550       if (!C->getMapTypeModifiers().empty() &&
8551           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8552             return K == OMPC_MAP_MODIFIER_present;
8553           }))
8554         Kind = Present;
8555       else if (C->getMapType() == OMPC_MAP_alloc)
8556         Kind = Allocs;
8557       const auto *EI = C->getVarRefs().begin();
8558       for (const auto L : C->component_lists()) {
8559         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8560         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8561                 C->getMapTypeModifiers(), llvm::None,
8562                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8563                 E);
8564         ++EI;
8565       }
8566     }
8567     for (const auto *Cl : Clauses) {
8568       const auto *C = dyn_cast<OMPToClause>(Cl);
8569       if (!C)
8570         continue;
8571       MapKind Kind = Other;
8572       if (!C->getMotionModifiers().empty() &&
8573           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8574             return K == OMPC_MOTION_MODIFIER_present;
8575           }))
8576         Kind = Present;
8577       const auto *EI = C->getVarRefs().begin();
8578       for (const auto L : C->component_lists()) {
8579         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8580                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8581                 C->isImplicit(), std::get<2>(L), *EI);
8582         ++EI;
8583       }
8584     }
8585     for (const auto *Cl : Clauses) {
8586       const auto *C = dyn_cast<OMPFromClause>(Cl);
8587       if (!C)
8588         continue;
8589       MapKind Kind = Other;
8590       if (!C->getMotionModifiers().empty() &&
8591           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8592             return K == OMPC_MOTION_MODIFIER_present;
8593           }))
8594         Kind = Present;
8595       const auto *EI = C->getVarRefs().begin();
8596       for (const auto L : C->component_lists()) {
8597         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8598                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8599                 C->isImplicit(), std::get<2>(L), *EI);
8600         ++EI;
8601       }
8602     }
8603 
8604     // Look at the use_device_ptr clause information and mark the existing map
8605     // entries as such. If there is no map information for an entry in the
8606     // use_device_ptr list, we create one with map type 'alloc' and zero size
8607     // section. It is the user fault if that was not mapped before. If there is
8608     // no map information and the pointer is a struct member, then we defer the
8609     // emission of that entry until the whole struct has been processed.
8610     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8611                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8612         DeferredInfo;
8613     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8614 
8615     for (const auto *Cl : Clauses) {
8616       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8617       if (!C)
8618         continue;
8619       for (const auto L : C->component_lists()) {
8620         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8621             std::get<1>(L);
8622         assert(!Components.empty() &&
8623                "Not expecting empty list of components!");
8624         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8625         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8626         const Expr *IE = Components.back().getAssociatedExpression();
8627         // If the first component is a member expression, we have to look into
8628         // 'this', which maps to null in the map of map information. Otherwise
8629         // look directly for the information.
8630         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8631 
8632         // We potentially have map information for this declaration already.
8633         // Look for the first set of components that refer to it.
8634         if (It != Info.end()) {
8635           bool Found = false;
8636           for (auto &Data : It->second) {
8637             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8638               return MI.Components.back().getAssociatedDeclaration() == VD;
8639             });
8640             // If we found a map entry, signal that the pointer has to be
8641             // returned and move on to the next declaration. Exclude cases where
8642             // the base pointer is mapped as array subscript, array section or
8643             // array shaping. The base address is passed as a pointer to base in
8644             // this case and cannot be used as a base for use_device_ptr list
8645             // item.
8646             if (CI != Data.end()) {
8647               auto PrevCI = std::next(CI->Components.rbegin());
8648               const auto *VarD = dyn_cast<VarDecl>(VD);
8649               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8650                   isa<MemberExpr>(IE) ||
8651                   !VD->getType().getNonReferenceType()->isPointerType() ||
8652                   PrevCI == CI->Components.rend() ||
8653                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8654                   VarD->hasLocalStorage()) {
8655                 CI->ReturnDevicePointer = true;
8656                 Found = true;
8657                 break;
8658               }
8659             }
8660           }
8661           if (Found)
8662             continue;
8663         }
8664 
8665         // We didn't find any match in our map information - generate a zero
8666         // size array section - if the pointer is a struct member we defer this
8667         // action until the whole struct has been processed.
8668         if (isa<MemberExpr>(IE)) {
8669           // Insert the pointer into Info to be processed by
8670           // generateInfoForComponentList. Because it is a member pointer
8671           // without a pointee, no entry will be generated for it, therefore
8672           // we need to generate one after the whole struct has been processed.
8673           // Nonetheless, generateInfoForComponentList must be called to take
8674           // the pointer into account for the calculation of the range of the
8675           // partial struct.
8676           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8677                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8678                   nullptr);
8679           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8680         } else {
8681           llvm::Value *Ptr =
8682               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8683           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8684           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8685           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8686           UseDevicePtrCombinedInfo.Sizes.push_back(
8687               llvm::Constant::getNullValue(CGF.Int64Ty));
8688           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8689           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8690         }
8691       }
8692     }
8693 
8694     // Look at the use_device_addr clause information and mark the existing map
8695     // entries as such. If there is no map information for an entry in the
8696     // use_device_addr list, we create one with map type 'alloc' and zero size
8697     // section. It is the user fault if that was not mapped before. If there is
8698     // no map information and the pointer is a struct member, then we defer the
8699     // emission of that entry until the whole struct has been processed.
8700     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8701     for (const auto *Cl : Clauses) {
8702       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8703       if (!C)
8704         continue;
8705       for (const auto L : C->component_lists()) {
8706         assert(!std::get<1>(L).empty() &&
8707                "Not expecting empty list of components!");
8708         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8709         if (!Processed.insert(VD).second)
8710           continue;
8711         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8712         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8713         // If the first component is a member expression, we have to look into
8714         // 'this', which maps to null in the map of map information. Otherwise
8715         // look directly for the information.
8716         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8717 
8718         // We potentially have map information for this declaration already.
8719         // Look for the first set of components that refer to it.
8720         if (It != Info.end()) {
8721           bool Found = false;
8722           for (auto &Data : It->second) {
8723             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8724               return MI.Components.back().getAssociatedDeclaration() == VD;
8725             });
8726             // If we found a map entry, signal that the pointer has to be
8727             // returned and move on to the next declaration.
8728             if (CI != Data.end()) {
8729               CI->ReturnDevicePointer = true;
8730               Found = true;
8731               break;
8732             }
8733           }
8734           if (Found)
8735             continue;
8736         }
8737 
8738         // We didn't find any match in our map information - generate a zero
8739         // size array section - if the pointer is a struct member we defer this
8740         // action until the whole struct has been processed.
8741         if (isa<MemberExpr>(IE)) {
8742           // Insert the pointer into Info to be processed by
8743           // generateInfoForComponentList. Because it is a member pointer
8744           // without a pointee, no entry will be generated for it, therefore
8745           // we need to generate one after the whole struct has been processed.
8746           // Nonetheless, generateInfoForComponentList must be called to take
8747           // the pointer into account for the calculation of the range of the
8748           // partial struct.
8749           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8750                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8751                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8752           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8753         } else {
8754           llvm::Value *Ptr;
8755           if (IE->isGLValue())
8756             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8757           else
8758             Ptr = CGF.EmitScalarExpr(IE);
8759           CombinedInfo.Exprs.push_back(VD);
8760           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8761           CombinedInfo.Pointers.push_back(Ptr);
8762           CombinedInfo.Sizes.push_back(
8763               llvm::Constant::getNullValue(CGF.Int64Ty));
8764           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8765           CombinedInfo.Mappers.push_back(nullptr);
8766         }
8767       }
8768     }
8769 
8770     for (const auto &Data : Info) {
8771       StructRangeInfoTy PartialStruct;
8772       // Temporary generated information.
8773       MapCombinedInfoTy CurInfo;
8774       const Decl *D = Data.first;
8775       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8776       for (const auto &M : Data.second) {
8777         for (const MapInfo &L : M) {
8778           assert(!L.Components.empty() &&
8779                  "Not expecting declaration with no component lists.");
8780 
8781           // Remember the current base pointer index.
8782           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8783           CurInfo.NonContigInfo.IsNonContiguous =
8784               L.Components.back().isNonContiguous();
8785           generateInfoForComponentList(
8786               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8787               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8788               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8789 
8790           // If this entry relates with a device pointer, set the relevant
8791           // declaration and add the 'return pointer' flag.
8792           if (L.ReturnDevicePointer) {
8793             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8794                    "Unexpected number of mapped base pointers.");
8795 
8796             const ValueDecl *RelevantVD =
8797                 L.Components.back().getAssociatedDeclaration();
8798             assert(RelevantVD &&
8799                    "No relevant declaration related with device pointer??");
8800 
8801             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8802                 RelevantVD);
8803             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8804           }
8805         }
8806       }
8807 
8808       // Append any pending zero-length pointers which are struct members and
8809       // used with use_device_ptr or use_device_addr.
8810       auto CI = DeferredInfo.find(Data.first);
8811       if (CI != DeferredInfo.end()) {
8812         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8813           llvm::Value *BasePtr;
8814           llvm::Value *Ptr;
8815           if (L.ForDeviceAddr) {
8816             if (L.IE->isGLValue())
8817               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8818             else
8819               Ptr = this->CGF.EmitScalarExpr(L.IE);
8820             BasePtr = Ptr;
8821             // Entry is RETURN_PARAM. Also, set the placeholder value
8822             // MEMBER_OF=FFFF so that the entry is later updated with the
8823             // correct value of MEMBER_OF.
8824             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8825           } else {
8826             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8827             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8828                                              L.IE->getExprLoc());
8829             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8830             // placeholder value MEMBER_OF=FFFF so that the entry is later
8831             // updated with the correct value of MEMBER_OF.
8832             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8833                                     OMP_MAP_MEMBER_OF);
8834           }
8835           CurInfo.Exprs.push_back(L.VD);
8836           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8837           CurInfo.Pointers.push_back(Ptr);
8838           CurInfo.Sizes.push_back(
8839               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8840           CurInfo.Mappers.push_back(nullptr);
8841         }
8842       }
8843       // If there is an entry in PartialStruct it means we have a struct with
8844       // individual members mapped. Emit an extra combined entry.
8845       if (PartialStruct.Base.isValid()) {
8846         CurInfo.NonContigInfo.Dims.push_back(0);
8847         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8848       }
8849 
8850       // We need to append the results of this capture to what we already
8851       // have.
8852       CombinedInfo.append(CurInfo);
8853     }
8854     // Append data for use_device_ptr clauses.
8855     CombinedInfo.append(UseDevicePtrCombinedInfo);
8856   }
8857 
8858 public:
8859   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8860       : CurDir(&Dir), CGF(CGF) {
8861     // Extract firstprivate clause information.
8862     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8863       for (const auto *D : C->varlists())
8864         FirstPrivateDecls.try_emplace(
8865             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8866     // Extract implicit firstprivates from uses_allocators clauses.
8867     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8868       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8869         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8870         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8871           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8872                                         /*Implicit=*/true);
8873         else if (const auto *VD = dyn_cast<VarDecl>(
8874                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8875                          ->getDecl()))
8876           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8877       }
8878     }
8879     // Extract device pointer clause information.
8880     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8881       for (auto L : C->component_lists())
8882         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8883   }
8884 
8885   /// Constructor for the declare mapper directive.
8886   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8887       : CurDir(&Dir), CGF(CGF) {}
8888 
8889   /// Generate code for the combined entry if we have a partially mapped struct
8890   /// and take care of the mapping flags of the arguments corresponding to
8891   /// individual struct members.
8892   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8893                          MapFlagsArrayTy &CurTypes,
8894                          const StructRangeInfoTy &PartialStruct,
8895                          const ValueDecl *VD = nullptr,
8896                          bool NotTargetParams = true) const {
8897     if (CurTypes.size() == 1 &&
8898         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8899         !PartialStruct.IsArraySection)
8900       return;
8901     Address LBAddr = PartialStruct.LowestElem.second;
8902     Address HBAddr = PartialStruct.HighestElem.second;
8903     if (PartialStruct.HasCompleteRecord) {
8904       LBAddr = PartialStruct.LB;
8905       HBAddr = PartialStruct.LB;
8906     }
8907     CombinedInfo.Exprs.push_back(VD);
8908     // Base is the base of the struct
8909     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8910     // Pointer is the address of the lowest element
8911     llvm::Value *LB = LBAddr.getPointer();
8912     CombinedInfo.Pointers.push_back(LB);
8913     // There should not be a mapper for a combined entry.
8914     CombinedInfo.Mappers.push_back(nullptr);
8915     // Size is (addr of {highest+1} element) - (addr of lowest element)
8916     llvm::Value *HB = HBAddr.getPointer();
8917     llvm::Value *HAddr =
8918         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8919     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8920     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8921     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8922     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8923                                                   /*isSigned=*/false);
8924     CombinedInfo.Sizes.push_back(Size);
8925     // Map type is always TARGET_PARAM, if generate info for captures.
8926     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8927                                                  : OMP_MAP_TARGET_PARAM);
8928     // If any element has the present modifier, then make sure the runtime
8929     // doesn't attempt to allocate the struct.
8930     if (CurTypes.end() !=
8931         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8932           return Type & OMP_MAP_PRESENT;
8933         }))
8934       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8935     // Remove TARGET_PARAM flag from the first element
8936     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8937     // If any element has the ompx_hold modifier, then make sure the runtime
8938     // uses the hold reference count for the struct as a whole so that it won't
8939     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8940     // elements as well so the runtime knows which reference count to check
8941     // when determining whether it's time for device-to-host transfers of
8942     // individual elements.
8943     if (CurTypes.end() !=
8944         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8945           return Type & OMP_MAP_OMPX_HOLD;
8946         })) {
8947       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8948       for (auto &M : CurTypes)
8949         M |= OMP_MAP_OMPX_HOLD;
8950     }
8951 
8952     // All other current entries will be MEMBER_OF the combined entry
8953     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8954     // 0xFFFF in the MEMBER_OF field).
8955     OpenMPOffloadMappingFlags MemberOfFlag =
8956         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8957     for (auto &M : CurTypes)
8958       setCorrectMemberOfFlag(M, MemberOfFlag);
8959   }
8960 
8961   /// Generate all the base pointers, section pointers, sizes, map types, and
8962   /// mappers for the extracted mappable expressions (all included in \a
8963   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8964   /// pair of the relevant declaration and index where it occurs is appended to
8965   /// the device pointers info array.
8966   void generateAllInfo(
8967       MapCombinedInfoTy &CombinedInfo,
8968       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8969           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8970     assert(CurDir.is<const OMPExecutableDirective *>() &&
8971            "Expect a executable directive");
8972     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8973     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8974   }
8975 
8976   /// Generate all the base pointers, section pointers, sizes, map types, and
8977   /// mappers for the extracted map clauses of user-defined mapper (all included
8978   /// in \a CombinedInfo).
8979   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8980     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8981            "Expect a declare mapper directive");
8982     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8983     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8984   }
8985 
8986   /// Emit capture info for lambdas for variables captured by reference.
8987   void generateInfoForLambdaCaptures(
8988       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8989       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8990     const auto *RD = VD->getType()
8991                          .getCanonicalType()
8992                          .getNonReferenceType()
8993                          ->getAsCXXRecordDecl();
8994     if (!RD || !RD->isLambda())
8995       return;
8996     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8997     LValue VDLVal = CGF.MakeAddrLValue(
8998         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8999     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9000     FieldDecl *ThisCapture = nullptr;
9001     RD->getCaptureFields(Captures, ThisCapture);
9002     if (ThisCapture) {
9003       LValue ThisLVal =
9004           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9005       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9006       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9007                                  VDLVal.getPointer(CGF));
9008       CombinedInfo.Exprs.push_back(VD);
9009       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9010       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9011       CombinedInfo.Sizes.push_back(
9012           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9013                                     CGF.Int64Ty, /*isSigned=*/true));
9014       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9015                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9016       CombinedInfo.Mappers.push_back(nullptr);
9017     }
9018     for (const LambdaCapture &LC : RD->captures()) {
9019       if (!LC.capturesVariable())
9020         continue;
9021       const VarDecl *VD = LC.getCapturedVar();
9022       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9023         continue;
9024       auto It = Captures.find(VD);
9025       assert(It != Captures.end() && "Found lambda capture without field.");
9026       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9027       if (LC.getCaptureKind() == LCK_ByRef) {
9028         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9029         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9030                                    VDLVal.getPointer(CGF));
9031         CombinedInfo.Exprs.push_back(VD);
9032         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9033         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9034         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9035             CGF.getTypeSize(
9036                 VD->getType().getCanonicalType().getNonReferenceType()),
9037             CGF.Int64Ty, /*isSigned=*/true));
9038       } else {
9039         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9040         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9041                                    VDLVal.getPointer(CGF));
9042         CombinedInfo.Exprs.push_back(VD);
9043         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9044         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9045         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9046       }
9047       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9048                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9049       CombinedInfo.Mappers.push_back(nullptr);
9050     }
9051   }
9052 
9053   /// Set correct indices for lambdas captures.
9054   void adjustMemberOfForLambdaCaptures(
9055       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9056       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9057       MapFlagsArrayTy &Types) const {
9058     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9059       // Set correct member_of idx for all implicit lambda captures.
9060       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9061                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9062         continue;
9063       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9064       assert(BasePtr && "Unable to find base lambda address.");
9065       int TgtIdx = -1;
9066       for (unsigned J = I; J > 0; --J) {
9067         unsigned Idx = J - 1;
9068         if (Pointers[Idx] != BasePtr)
9069           continue;
9070         TgtIdx = Idx;
9071         break;
9072       }
9073       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9074       // All other current entries will be MEMBER_OF the combined entry
9075       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9076       // 0xFFFF in the MEMBER_OF field).
9077       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9078       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9079     }
9080   }
9081 
9082   /// Generate the base pointers, section pointers, sizes, map types, and
9083   /// mappers associated to a given capture (all included in \a CombinedInfo).
9084   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9085                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9086                               StructRangeInfoTy &PartialStruct) const {
9087     assert(!Cap->capturesVariableArrayType() &&
9088            "Not expecting to generate map info for a variable array type!");
9089 
9090     // We need to know when we generating information for the first component
9091     const ValueDecl *VD = Cap->capturesThis()
9092                               ? nullptr
9093                               : Cap->getCapturedVar()->getCanonicalDecl();
9094 
9095     // If this declaration appears in a is_device_ptr clause we just have to
9096     // pass the pointer by value. If it is a reference to a declaration, we just
9097     // pass its value.
9098     if (DevPointersMap.count(VD)) {
9099       CombinedInfo.Exprs.push_back(VD);
9100       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9101       CombinedInfo.Pointers.push_back(Arg);
9102       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9103           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9104           /*isSigned=*/true));
9105       CombinedInfo.Types.push_back(
9106           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9107           OMP_MAP_TARGET_PARAM);
9108       CombinedInfo.Mappers.push_back(nullptr);
9109       return;
9110     }
9111 
9112     using MapData =
9113         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9114                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9115                    const ValueDecl *, const Expr *>;
9116     SmallVector<MapData, 4> DeclComponentLists;
9117     assert(CurDir.is<const OMPExecutableDirective *>() &&
9118            "Expect a executable directive");
9119     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9120     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9121       const auto *EI = C->getVarRefs().begin();
9122       for (const auto L : C->decl_component_lists(VD)) {
9123         const ValueDecl *VDecl, *Mapper;
9124         // The Expression is not correct if the mapping is implicit
9125         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9126         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9127         std::tie(VDecl, Components, Mapper) = L;
9128         assert(VDecl == VD && "We got information for the wrong declaration??");
9129         assert(!Components.empty() &&
9130                "Not expecting declaration with no component lists.");
9131         DeclComponentLists.emplace_back(Components, C->getMapType(),
9132                                         C->getMapTypeModifiers(),
9133                                         C->isImplicit(), Mapper, E);
9134         ++EI;
9135       }
9136     }
9137     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9138                                              const MapData &RHS) {
9139       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9140       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9141       bool HasPresent = !MapModifiers.empty() &&
9142                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9143                           return K == clang::OMPC_MAP_MODIFIER_present;
9144                         });
9145       bool HasAllocs = MapType == OMPC_MAP_alloc;
9146       MapModifiers = std::get<2>(RHS);
9147       MapType = std::get<1>(LHS);
9148       bool HasPresentR =
9149           !MapModifiers.empty() &&
9150           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9151             return K == clang::OMPC_MAP_MODIFIER_present;
9152           });
9153       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9154       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9155     });
9156 
9157     // Find overlapping elements (including the offset from the base element).
9158     llvm::SmallDenseMap<
9159         const MapData *,
9160         llvm::SmallVector<
9161             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9162         4>
9163         OverlappedData;
9164     size_t Count = 0;
9165     for (const MapData &L : DeclComponentLists) {
9166       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9167       OpenMPMapClauseKind MapType;
9168       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9169       bool IsImplicit;
9170       const ValueDecl *Mapper;
9171       const Expr *VarRef;
9172       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9173           L;
9174       ++Count;
9175       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9176         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9177         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9178                  VarRef) = L1;
9179         auto CI = Components.rbegin();
9180         auto CE = Components.rend();
9181         auto SI = Components1.rbegin();
9182         auto SE = Components1.rend();
9183         for (; CI != CE && SI != SE; ++CI, ++SI) {
9184           if (CI->getAssociatedExpression()->getStmtClass() !=
9185               SI->getAssociatedExpression()->getStmtClass())
9186             break;
9187           // Are we dealing with different variables/fields?
9188           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9189             break;
9190         }
9191         // Found overlapping if, at least for one component, reached the head
9192         // of the components list.
9193         if (CI == CE || SI == SE) {
9194           // Ignore it if it is the same component.
9195           if (CI == CE && SI == SE)
9196             continue;
9197           const auto It = (SI == SE) ? CI : SI;
9198           // If one component is a pointer and another one is a kind of
9199           // dereference of this pointer (array subscript, section, dereference,
9200           // etc.), it is not an overlapping.
9201           // Same, if one component is a base and another component is a
9202           // dereferenced pointer memberexpr with the same base.
9203           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9204               (std::prev(It)->getAssociatedDeclaration() &&
9205                std::prev(It)
9206                    ->getAssociatedDeclaration()
9207                    ->getType()
9208                    ->isPointerType()) ||
9209               (It->getAssociatedDeclaration() &&
9210                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9211                std::next(It) != CE && std::next(It) != SE))
9212             continue;
9213           const MapData &BaseData = CI == CE ? L : L1;
9214           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9215               SI == SE ? Components : Components1;
9216           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9217           OverlappedElements.getSecond().push_back(SubData);
9218         }
9219       }
9220     }
9221     // Sort the overlapped elements for each item.
9222     llvm::SmallVector<const FieldDecl *, 4> Layout;
9223     if (!OverlappedData.empty()) {
9224       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9225       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9226       while (BaseType != OrigType) {
9227         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9228         OrigType = BaseType->getPointeeOrArrayElementType();
9229       }
9230 
9231       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9232         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9233       else {
9234         const auto *RD = BaseType->getAsRecordDecl();
9235         Layout.append(RD->field_begin(), RD->field_end());
9236       }
9237     }
9238     for (auto &Pair : OverlappedData) {
9239       llvm::stable_sort(
9240           Pair.getSecond(),
9241           [&Layout](
9242               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9243               OMPClauseMappableExprCommon::MappableExprComponentListRef
9244                   Second) {
9245             auto CI = First.rbegin();
9246             auto CE = First.rend();
9247             auto SI = Second.rbegin();
9248             auto SE = Second.rend();
9249             for (; CI != CE && SI != SE; ++CI, ++SI) {
9250               if (CI->getAssociatedExpression()->getStmtClass() !=
9251                   SI->getAssociatedExpression()->getStmtClass())
9252                 break;
9253               // Are we dealing with different variables/fields?
9254               if (CI->getAssociatedDeclaration() !=
9255                   SI->getAssociatedDeclaration())
9256                 break;
9257             }
9258 
9259             // Lists contain the same elements.
9260             if (CI == CE && SI == SE)
9261               return false;
9262 
9263             // List with less elements is less than list with more elements.
9264             if (CI == CE || SI == SE)
9265               return CI == CE;
9266 
9267             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9268             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9269             if (FD1->getParent() == FD2->getParent())
9270               return FD1->getFieldIndex() < FD2->getFieldIndex();
9271             const auto *It =
9272                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9273                   return FD == FD1 || FD == FD2;
9274                 });
9275             return *It == FD1;
9276           });
9277     }
9278 
9279     // Associated with a capture, because the mapping flags depend on it.
9280     // Go through all of the elements with the overlapped elements.
9281     bool IsFirstComponentList = true;
9282     for (const auto &Pair : OverlappedData) {
9283       const MapData &L = *Pair.getFirst();
9284       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9285       OpenMPMapClauseKind MapType;
9286       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9287       bool IsImplicit;
9288       const ValueDecl *Mapper;
9289       const Expr *VarRef;
9290       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9291           L;
9292       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9293           OverlappedComponents = Pair.getSecond();
9294       generateInfoForComponentList(
9295           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9296           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9297           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9298       IsFirstComponentList = false;
9299     }
9300     // Go through other elements without overlapped elements.
9301     for (const MapData &L : DeclComponentLists) {
9302       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9303       OpenMPMapClauseKind MapType;
9304       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9305       bool IsImplicit;
9306       const ValueDecl *Mapper;
9307       const Expr *VarRef;
9308       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9309           L;
9310       auto It = OverlappedData.find(&L);
9311       if (It == OverlappedData.end())
9312         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9313                                      Components, CombinedInfo, PartialStruct,
9314                                      IsFirstComponentList, IsImplicit, Mapper,
9315                                      /*ForDeviceAddr=*/false, VD, VarRef);
9316       IsFirstComponentList = false;
9317     }
9318   }
9319 
9320   /// Generate the default map information for a given capture \a CI,
9321   /// record field declaration \a RI and captured value \a CV.
9322   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9323                               const FieldDecl &RI, llvm::Value *CV,
9324                               MapCombinedInfoTy &CombinedInfo) const {
9325     bool IsImplicit = true;
9326     // Do the default mapping.
9327     if (CI.capturesThis()) {
9328       CombinedInfo.Exprs.push_back(nullptr);
9329       CombinedInfo.BasePointers.push_back(CV);
9330       CombinedInfo.Pointers.push_back(CV);
9331       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9332       CombinedInfo.Sizes.push_back(
9333           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9334                                     CGF.Int64Ty, /*isSigned=*/true));
9335       // Default map type.
9336       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9337     } else if (CI.capturesVariableByCopy()) {
9338       const VarDecl *VD = CI.getCapturedVar();
9339       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9340       CombinedInfo.BasePointers.push_back(CV);
9341       CombinedInfo.Pointers.push_back(CV);
9342       if (!RI.getType()->isAnyPointerType()) {
9343         // We have to signal to the runtime captures passed by value that are
9344         // not pointers.
9345         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9346         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9347             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9348       } else {
9349         // Pointers are implicitly mapped with a zero size and no flags
9350         // (other than first map that is added for all implicit maps).
9351         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9352         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9353       }
9354       auto I = FirstPrivateDecls.find(VD);
9355       if (I != FirstPrivateDecls.end())
9356         IsImplicit = I->getSecond();
9357     } else {
9358       assert(CI.capturesVariable() && "Expected captured reference.");
9359       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9360       QualType ElementType = PtrTy->getPointeeType();
9361       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9362           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9363       // The default map type for a scalar/complex type is 'to' because by
9364       // default the value doesn't have to be retrieved. For an aggregate
9365       // type, the default is 'tofrom'.
9366       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9367       const VarDecl *VD = CI.getCapturedVar();
9368       auto I = FirstPrivateDecls.find(VD);
9369       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9370       CombinedInfo.BasePointers.push_back(CV);
9371       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9372         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9373             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9374             AlignmentSource::Decl));
9375         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9376       } else {
9377         CombinedInfo.Pointers.push_back(CV);
9378       }
9379       if (I != FirstPrivateDecls.end())
9380         IsImplicit = I->getSecond();
9381     }
9382     // Every default map produces a single argument which is a target parameter.
9383     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9384 
9385     // Add flag stating this is an implicit map.
9386     if (IsImplicit)
9387       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9388 
9389     // No user-defined mapper for default mapping.
9390     CombinedInfo.Mappers.push_back(nullptr);
9391   }
9392 };
9393 } // anonymous namespace
9394 
9395 static void emitNonContiguousDescriptor(
9396     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9397     CGOpenMPRuntime::TargetDataInfo &Info) {
9398   CodeGenModule &CGM = CGF.CGM;
9399   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9400       &NonContigInfo = CombinedInfo.NonContigInfo;
9401 
9402   // Build an array of struct descriptor_dim and then assign it to
9403   // offload_args.
9404   //
9405   // struct descriptor_dim {
9406   //  uint64_t offset;
9407   //  uint64_t count;
9408   //  uint64_t stride
9409   // };
9410   ASTContext &C = CGF.getContext();
9411   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9412   RecordDecl *RD;
9413   RD = C.buildImplicitRecord("descriptor_dim");
9414   RD->startDefinition();
9415   addFieldToRecordDecl(C, RD, Int64Ty);
9416   addFieldToRecordDecl(C, RD, Int64Ty);
9417   addFieldToRecordDecl(C, RD, Int64Ty);
9418   RD->completeDefinition();
9419   QualType DimTy = C.getRecordType(RD);
9420 
9421   enum { OffsetFD = 0, CountFD, StrideFD };
9422   // We need two index variable here since the size of "Dims" is the same as the
9423   // size of Components, however, the size of offset, count, and stride is equal
9424   // to the size of base declaration that is non-contiguous.
9425   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9426     // Skip emitting ir if dimension size is 1 since it cannot be
9427     // non-contiguous.
9428     if (NonContigInfo.Dims[I] == 1)
9429       continue;
9430     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9431     QualType ArrayTy =
9432         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9433     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9434     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9435       unsigned RevIdx = EE - II - 1;
9436       LValue DimsLVal = CGF.MakeAddrLValue(
9437           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9438       // Offset
9439       LValue OffsetLVal = CGF.EmitLValueForField(
9440           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9441       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9442       // Count
9443       LValue CountLVal = CGF.EmitLValueForField(
9444           DimsLVal, *std::next(RD->field_begin(), CountFD));
9445       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9446       // Stride
9447       LValue StrideLVal = CGF.EmitLValueForField(
9448           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9449       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9450     }
9451     // args[I] = &dims
9452     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9453         DimsAddr, CGM.Int8PtrTy);
9454     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9455         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9456         Info.PointersArray, 0, I);
9457     Address PAddr(P, CGF.getPointerAlign());
9458     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9459     ++L;
9460   }
9461 }
9462 
9463 // Try to extract the base declaration from a `this->x` expression if possible.
9464 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9465   if (!E)
9466     return nullptr;
9467 
9468   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9469     if (const MemberExpr *ME =
9470             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9471       return ME->getMemberDecl();
9472   return nullptr;
9473 }
9474 
9475 /// Emit a string constant containing the names of the values mapped to the
9476 /// offloading runtime library.
9477 llvm::Constant *
9478 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9479                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9480 
9481   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9482     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9483 
9484   SourceLocation Loc;
9485   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9486     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9487       Loc = VD->getLocation();
9488     else
9489       Loc = MapExprs.getMapExpr()->getExprLoc();
9490   } else {
9491     Loc = MapExprs.getMapDecl()->getLocation();
9492   }
9493 
9494   std::string ExprName = "";
9495   if (MapExprs.getMapExpr()) {
9496     PrintingPolicy P(CGF.getContext().getLangOpts());
9497     llvm::raw_string_ostream OS(ExprName);
9498     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9499     OS.flush();
9500   } else {
9501     ExprName = MapExprs.getMapDecl()->getNameAsString();
9502   }
9503 
9504   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9505   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9506                                          PLoc.getLine(), PLoc.getColumn());
9507 }
9508 
9509 /// Emit the arrays used to pass the captures and map information to the
9510 /// offloading runtime library. If there is no map or capture information,
9511 /// return nullptr by reference.
9512 static void emitOffloadingArrays(
9513     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9514     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9515     bool IsNonContiguous = false) {
9516   CodeGenModule &CGM = CGF.CGM;
9517   ASTContext &Ctx = CGF.getContext();
9518 
9519   // Reset the array information.
9520   Info.clearArrayInfo();
9521   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9522 
9523   if (Info.NumberOfPtrs) {
9524     // Detect if we have any capture size requiring runtime evaluation of the
9525     // size so that a constant array could be eventually used.
9526     bool hasRuntimeEvaluationCaptureSize = false;
9527     for (llvm::Value *S : CombinedInfo.Sizes)
9528       if (!isa<llvm::Constant>(S)) {
9529         hasRuntimeEvaluationCaptureSize = true;
9530         break;
9531       }
9532 
9533     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9534     QualType PointerArrayType = Ctx.getConstantArrayType(
9535         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9536         /*IndexTypeQuals=*/0);
9537 
9538     Info.BasePointersArray =
9539         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9540     Info.PointersArray =
9541         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9542     Address MappersArray =
9543         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9544     Info.MappersArray = MappersArray.getPointer();
9545 
9546     // If we don't have any VLA types or other types that require runtime
9547     // evaluation, we can use a constant array for the map sizes, otherwise we
9548     // need to fill up the arrays as we do for the pointers.
9549     QualType Int64Ty =
9550         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9551     if (hasRuntimeEvaluationCaptureSize) {
9552       QualType SizeArrayType = Ctx.getConstantArrayType(
9553           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9554           /*IndexTypeQuals=*/0);
9555       Info.SizesArray =
9556           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9557     } else {
9558       // We expect all the sizes to be constant, so we collect them to create
9559       // a constant array.
9560       SmallVector<llvm::Constant *, 16> ConstSizes;
9561       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9562         if (IsNonContiguous &&
9563             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9564           ConstSizes.push_back(llvm::ConstantInt::get(
9565               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9566         } else {
9567           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9568         }
9569       }
9570 
9571       auto *SizesArrayInit = llvm::ConstantArray::get(
9572           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9573       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9574       auto *SizesArrayGbl = new llvm::GlobalVariable(
9575           CGM.getModule(), SizesArrayInit->getType(),
9576           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9577           SizesArrayInit, Name);
9578       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9579       Info.SizesArray = SizesArrayGbl;
9580     }
9581 
9582     // The map types are always constant so we don't need to generate code to
9583     // fill arrays. Instead, we create an array constant.
9584     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9585     llvm::copy(CombinedInfo.Types, Mapping.begin());
9586     std::string MaptypesName =
9587         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9588     auto *MapTypesArrayGbl =
9589         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9590     Info.MapTypesArray = MapTypesArrayGbl;
9591 
9592     // The information types are only built if there is debug information
9593     // requested.
9594     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9595       Info.MapNamesArray = llvm::Constant::getNullValue(
9596           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9597     } else {
9598       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9599         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9600       };
9601       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9602       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9603       std::string MapnamesName =
9604           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9605       auto *MapNamesArrayGbl =
9606           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9607       Info.MapNamesArray = MapNamesArrayGbl;
9608     }
9609 
9610     // If there's a present map type modifier, it must not be applied to the end
9611     // of a region, so generate a separate map type array in that case.
9612     if (Info.separateBeginEndCalls()) {
9613       bool EndMapTypesDiffer = false;
9614       for (uint64_t &Type : Mapping) {
9615         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9616           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9617           EndMapTypesDiffer = true;
9618         }
9619       }
9620       if (EndMapTypesDiffer) {
9621         MapTypesArrayGbl =
9622             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9623         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9624       }
9625     }
9626 
9627     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9628       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9629       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9630           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9631           Info.BasePointersArray, 0, I);
9632       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9633           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9634       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9635       CGF.Builder.CreateStore(BPVal, BPAddr);
9636 
9637       if (Info.requiresDevicePointerInfo())
9638         if (const ValueDecl *DevVD =
9639                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9640           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9641 
9642       llvm::Value *PVal = CombinedInfo.Pointers[I];
9643       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9644           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9645           Info.PointersArray, 0, I);
9646       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9647           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9648       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9649       CGF.Builder.CreateStore(PVal, PAddr);
9650 
9651       if (hasRuntimeEvaluationCaptureSize) {
9652         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9653             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9654             Info.SizesArray,
9655             /*Idx0=*/0,
9656             /*Idx1=*/I);
9657         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9658         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9659                                                           CGM.Int64Ty,
9660                                                           /*isSigned=*/true),
9661                                 SAddr);
9662       }
9663 
9664       // Fill up the mapper array.
9665       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9666       if (CombinedInfo.Mappers[I]) {
9667         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9668             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9669         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9670         Info.HasMapper = true;
9671       }
9672       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9673       CGF.Builder.CreateStore(MFunc, MAddr);
9674     }
9675   }
9676 
9677   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9678       Info.NumberOfPtrs == 0)
9679     return;
9680 
9681   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9682 }
9683 
9684 namespace {
9685 /// Additional arguments for emitOffloadingArraysArgument function.
9686 struct ArgumentsOptions {
9687   bool ForEndCall = false;
9688   ArgumentsOptions() = default;
9689   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9690 };
9691 } // namespace
9692 
9693 /// Emit the arguments to be passed to the runtime library based on the
9694 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9695 /// ForEndCall, emit map types to be passed for the end of the region instead of
9696 /// the beginning.
9697 static void emitOffloadingArraysArgument(
9698     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9699     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9700     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9701     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9702     const ArgumentsOptions &Options = ArgumentsOptions()) {
9703   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9704          "expected region end call to runtime only when end call is separate");
9705   CodeGenModule &CGM = CGF.CGM;
9706   if (Info.NumberOfPtrs) {
9707     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9708         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9709         Info.BasePointersArray,
9710         /*Idx0=*/0, /*Idx1=*/0);
9711     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9712         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9713         Info.PointersArray,
9714         /*Idx0=*/0,
9715         /*Idx1=*/0);
9716     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9717         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9718         /*Idx0=*/0, /*Idx1=*/0);
9719     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9720         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9721         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9722                                                     : Info.MapTypesArray,
9723         /*Idx0=*/0,
9724         /*Idx1=*/0);
9725 
9726     // Only emit the mapper information arrays if debug information is
9727     // requested.
9728     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9729       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9730     else
9731       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9732           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9733           Info.MapNamesArray,
9734           /*Idx0=*/0,
9735           /*Idx1=*/0);
9736     // If there is no user-defined mapper, set the mapper array to nullptr to
9737     // avoid an unnecessary data privatization
9738     if (!Info.HasMapper)
9739       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9740     else
9741       MappersArrayArg =
9742           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9743   } else {
9744     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9745     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9746     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9747     MapTypesArrayArg =
9748         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9749     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9750     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9751   }
9752 }
9753 
9754 /// Check for inner distribute directive.
9755 static const OMPExecutableDirective *
9756 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9757   const auto *CS = D.getInnermostCapturedStmt();
9758   const auto *Body =
9759       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9760   const Stmt *ChildStmt =
9761       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9762 
9763   if (const auto *NestedDir =
9764           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9765     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9766     switch (D.getDirectiveKind()) {
9767     case OMPD_target:
9768       if (isOpenMPDistributeDirective(DKind))
9769         return NestedDir;
9770       if (DKind == OMPD_teams) {
9771         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9772             /*IgnoreCaptured=*/true);
9773         if (!Body)
9774           return nullptr;
9775         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9776         if (const auto *NND =
9777                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9778           DKind = NND->getDirectiveKind();
9779           if (isOpenMPDistributeDirective(DKind))
9780             return NND;
9781         }
9782       }
9783       return nullptr;
9784     case OMPD_target_teams:
9785       if (isOpenMPDistributeDirective(DKind))
9786         return NestedDir;
9787       return nullptr;
9788     case OMPD_target_parallel:
9789     case OMPD_target_simd:
9790     case OMPD_target_parallel_for:
9791     case OMPD_target_parallel_for_simd:
9792       return nullptr;
9793     case OMPD_target_teams_distribute:
9794     case OMPD_target_teams_distribute_simd:
9795     case OMPD_target_teams_distribute_parallel_for:
9796     case OMPD_target_teams_distribute_parallel_for_simd:
9797     case OMPD_parallel:
9798     case OMPD_for:
9799     case OMPD_parallel_for:
9800     case OMPD_parallel_master:
9801     case OMPD_parallel_sections:
9802     case OMPD_for_simd:
9803     case OMPD_parallel_for_simd:
9804     case OMPD_cancel:
9805     case OMPD_cancellation_point:
9806     case OMPD_ordered:
9807     case OMPD_threadprivate:
9808     case OMPD_allocate:
9809     case OMPD_task:
9810     case OMPD_simd:
9811     case OMPD_tile:
9812     case OMPD_unroll:
9813     case OMPD_sections:
9814     case OMPD_section:
9815     case OMPD_single:
9816     case OMPD_master:
9817     case OMPD_critical:
9818     case OMPD_taskyield:
9819     case OMPD_barrier:
9820     case OMPD_taskwait:
9821     case OMPD_taskgroup:
9822     case OMPD_atomic:
9823     case OMPD_flush:
9824     case OMPD_depobj:
9825     case OMPD_scan:
9826     case OMPD_teams:
9827     case OMPD_target_data:
9828     case OMPD_target_exit_data:
9829     case OMPD_target_enter_data:
9830     case OMPD_distribute:
9831     case OMPD_distribute_simd:
9832     case OMPD_distribute_parallel_for:
9833     case OMPD_distribute_parallel_for_simd:
9834     case OMPD_teams_distribute:
9835     case OMPD_teams_distribute_simd:
9836     case OMPD_teams_distribute_parallel_for:
9837     case OMPD_teams_distribute_parallel_for_simd:
9838     case OMPD_target_update:
9839     case OMPD_declare_simd:
9840     case OMPD_declare_variant:
9841     case OMPD_begin_declare_variant:
9842     case OMPD_end_declare_variant:
9843     case OMPD_declare_target:
9844     case OMPD_end_declare_target:
9845     case OMPD_declare_reduction:
9846     case OMPD_declare_mapper:
9847     case OMPD_taskloop:
9848     case OMPD_taskloop_simd:
9849     case OMPD_master_taskloop:
9850     case OMPD_master_taskloop_simd:
9851     case OMPD_parallel_master_taskloop:
9852     case OMPD_parallel_master_taskloop_simd:
9853     case OMPD_requires:
9854     case OMPD_unknown:
9855     default:
9856       llvm_unreachable("Unexpected directive.");
9857     }
9858   }
9859 
9860   return nullptr;
9861 }
9862 
9863 /// Emit the user-defined mapper function. The code generation follows the
9864 /// pattern in the example below.
9865 /// \code
9866 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9867 ///                                           void *base, void *begin,
9868 ///                                           int64_t size, int64_t type,
9869 ///                                           void *name = nullptr) {
9870 ///   // Allocate space for an array section first or add a base/begin for
9871 ///   // pointer dereference.
9872 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9873 ///       !maptype.IsDelete)
9874 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9875 ///                                 size*sizeof(Ty), clearToFromMember(type));
9876 ///   // Map members.
9877 ///   for (unsigned i = 0; i < size; i++) {
9878 ///     // For each component specified by this mapper:
9879 ///     for (auto c : begin[i]->all_components) {
9880 ///       if (c.hasMapper())
9881 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9882 ///                       c.arg_type, c.arg_name);
9883 ///       else
9884 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9885 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9886 ///                                     c.arg_name);
9887 ///     }
9888 ///   }
9889 ///   // Delete the array section.
9890 ///   if (size > 1 && maptype.IsDelete)
9891 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9892 ///                                 size*sizeof(Ty), clearToFromMember(type));
9893 /// }
9894 /// \endcode
9895 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9896                                             CodeGenFunction *CGF) {
9897   if (UDMMap.count(D) > 0)
9898     return;
9899   ASTContext &C = CGM.getContext();
9900   QualType Ty = D->getType();
9901   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9902   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9903   auto *MapperVarDecl =
9904       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9905   SourceLocation Loc = D->getLocation();
9906   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9907 
9908   // Prepare mapper function arguments and attributes.
9909   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9910                               C.VoidPtrTy, ImplicitParamDecl::Other);
9911   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9912                             ImplicitParamDecl::Other);
9913   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9914                              C.VoidPtrTy, ImplicitParamDecl::Other);
9915   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9916                             ImplicitParamDecl::Other);
9917   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9918                             ImplicitParamDecl::Other);
9919   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9920                             ImplicitParamDecl::Other);
9921   FunctionArgList Args;
9922   Args.push_back(&HandleArg);
9923   Args.push_back(&BaseArg);
9924   Args.push_back(&BeginArg);
9925   Args.push_back(&SizeArg);
9926   Args.push_back(&TypeArg);
9927   Args.push_back(&NameArg);
9928   const CGFunctionInfo &FnInfo =
9929       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9930   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9931   SmallString<64> TyStr;
9932   llvm::raw_svector_ostream Out(TyStr);
9933   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9934   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9935   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9936                                     Name, &CGM.getModule());
9937   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9938   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9939   // Start the mapper function code generation.
9940   CodeGenFunction MapperCGF(CGM);
9941   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9942   // Compute the starting and end addresses of array elements.
9943   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9944       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9945       C.getPointerType(Int64Ty), Loc);
9946   // Prepare common arguments for array initiation and deletion.
9947   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9948       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9949       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9950   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9951       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9952       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9953   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9954       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9955       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9956   // Convert the size in bytes into the number of array elements.
9957   Size = MapperCGF.Builder.CreateExactUDiv(
9958       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9959   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9960       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9961   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
9962       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
9963   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9964       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9965       C.getPointerType(Int64Ty), Loc);
9966   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9967       MapperCGF.GetAddrOfLocalVar(&NameArg),
9968       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9969 
9970   // Emit array initiation if this is an array section and \p MapType indicates
9971   // that memory allocation is required.
9972   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9973   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9974                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9975 
9976   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9977 
9978   // Emit the loop header block.
9979   MapperCGF.EmitBlock(HeadBB);
9980   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9981   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9982   // Evaluate whether the initial condition is satisfied.
9983   llvm::Value *IsEmpty =
9984       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9985   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9986   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9987 
9988   // Emit the loop body block.
9989   MapperCGF.EmitBlock(BodyBB);
9990   llvm::BasicBlock *LastBB = BodyBB;
9991   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9992       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9993   PtrPHI->addIncoming(PtrBegin, EntryBB);
9994   Address PtrCurrent =
9995       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9996                           .getAlignment()
9997                           .alignmentOfArrayElement(ElementSize));
9998   // Privatize the declared variable of mapper to be the current array element.
9999   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10000   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10001   (void)Scope.Privatize();
10002 
10003   // Get map clause information. Fill up the arrays with all mapped variables.
10004   MappableExprsHandler::MapCombinedInfoTy Info;
10005   MappableExprsHandler MEHandler(*D, MapperCGF);
10006   MEHandler.generateAllInfoForMapper(Info);
10007 
10008   // Call the runtime API __tgt_mapper_num_components to get the number of
10009   // pre-existing components.
10010   llvm::Value *OffloadingArgs[] = {Handle};
10011   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10012       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10013                                             OMPRTL___tgt_mapper_num_components),
10014       OffloadingArgs);
10015   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10016       PreviousSize,
10017       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10018 
10019   // Fill up the runtime mapper handle for all components.
10020   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10021     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10022         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10023     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10024         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10025     llvm::Value *CurSizeArg = Info.Sizes[I];
10026     llvm::Value *CurNameArg =
10027         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10028             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10029             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10030 
10031     // Extract the MEMBER_OF field from the map type.
10032     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10033     llvm::Value *MemberMapType =
10034         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10035 
10036     // Combine the map type inherited from user-defined mapper with that
10037     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10038     // bits of the \a MapType, which is the input argument of the mapper
10039     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10040     // bits of MemberMapType.
10041     // [OpenMP 5.0], 1.2.6. map-type decay.
10042     //        | alloc |  to   | from  | tofrom | release | delete
10043     // ----------------------------------------------------------
10044     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10045     // to     | alloc |  to   | alloc |   to   | release | delete
10046     // from   | alloc | alloc | from  |  from  | release | delete
10047     // tofrom | alloc |  to   | from  | tofrom | release | delete
10048     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10049         MapType,
10050         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10051                                    MappableExprsHandler::OMP_MAP_FROM));
10052     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10053     llvm::BasicBlock *AllocElseBB =
10054         MapperCGF.createBasicBlock("omp.type.alloc.else");
10055     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10056     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10057     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10058     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10059     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10060     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10061     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10062     MapperCGF.EmitBlock(AllocBB);
10063     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10064         MemberMapType,
10065         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10066                                      MappableExprsHandler::OMP_MAP_FROM)));
10067     MapperCGF.Builder.CreateBr(EndBB);
10068     MapperCGF.EmitBlock(AllocElseBB);
10069     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10070         LeftToFrom,
10071         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10072     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10073     // In case of to, clear OMP_MAP_FROM.
10074     MapperCGF.EmitBlock(ToBB);
10075     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10076         MemberMapType,
10077         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10078     MapperCGF.Builder.CreateBr(EndBB);
10079     MapperCGF.EmitBlock(ToElseBB);
10080     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10081         LeftToFrom,
10082         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10083     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10084     // In case of from, clear OMP_MAP_TO.
10085     MapperCGF.EmitBlock(FromBB);
10086     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10087         MemberMapType,
10088         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10089     // In case of tofrom, do nothing.
10090     MapperCGF.EmitBlock(EndBB);
10091     LastBB = EndBB;
10092     llvm::PHINode *CurMapType =
10093         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10094     CurMapType->addIncoming(AllocMapType, AllocBB);
10095     CurMapType->addIncoming(ToMapType, ToBB);
10096     CurMapType->addIncoming(FromMapType, FromBB);
10097     CurMapType->addIncoming(MemberMapType, ToElseBB);
10098 
10099     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10100                                      CurSizeArg, CurMapType, CurNameArg};
10101     if (Info.Mappers[I]) {
10102       // Call the corresponding mapper function.
10103       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10104           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10105       assert(MapperFunc && "Expect a valid mapper function is available.");
10106       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10107     } else {
10108       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10109       // data structure.
10110       MapperCGF.EmitRuntimeCall(
10111           OMPBuilder.getOrCreateRuntimeFunction(
10112               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10113           OffloadingArgs);
10114     }
10115   }
10116 
10117   // Update the pointer to point to the next element that needs to be mapped,
10118   // and check whether we have mapped all elements.
10119   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10120   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10121       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10122   PtrPHI->addIncoming(PtrNext, LastBB);
10123   llvm::Value *IsDone =
10124       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10125   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10126   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10127 
10128   MapperCGF.EmitBlock(ExitBB);
10129   // Emit array deletion if this is an array section and \p MapType indicates
10130   // that deletion is required.
10131   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10132                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10133 
10134   // Emit the function exit block.
10135   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10136   MapperCGF.FinishFunction();
10137   UDMMap.try_emplace(D, Fn);
10138   if (CGF) {
10139     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10140     Decls.second.push_back(D);
10141   }
10142 }
10143 
10144 /// Emit the array initialization or deletion portion for user-defined mapper
10145 /// code generation. First, it evaluates whether an array section is mapped and
10146 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10147 /// true, and \a MapType indicates to not delete this array, array
10148 /// initialization code is generated. If \a IsInit is false, and \a MapType
10149 /// indicates to not this array, array deletion code is generated.
10150 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10151     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10152     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10153     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10154     bool IsInit) {
10155   StringRef Prefix = IsInit ? ".init" : ".del";
10156 
10157   // Evaluate if this is an array section.
10158   llvm::BasicBlock *BodyBB =
10159       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10160   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10161       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10162   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10163       MapType,
10164       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10165   llvm::Value *DeleteCond;
10166   llvm::Value *Cond;
10167   if (IsInit) {
10168     // base != begin?
10169     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10170         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10171     // IsPtrAndObj?
10172     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10173         MapType,
10174         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10175     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10176     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10177     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10178     DeleteCond = MapperCGF.Builder.CreateIsNull(
10179         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10180   } else {
10181     Cond = IsArray;
10182     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10183         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10184   }
10185   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10186   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10187 
10188   MapperCGF.EmitBlock(BodyBB);
10189   // Get the array size by multiplying element size and element number (i.e., \p
10190   // Size).
10191   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10192       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10193   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10194   // memory allocation/deletion purpose only.
10195   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10196       MapType,
10197       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10198                                    MappableExprsHandler::OMP_MAP_FROM)));
10199   MapTypeArg = MapperCGF.Builder.CreateOr(
10200       MapTypeArg,
10201       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10202 
10203   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10204   // data structure.
10205   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10206                                    ArraySize, MapTypeArg, MapName};
10207   MapperCGF.EmitRuntimeCall(
10208       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10209                                             OMPRTL___tgt_push_mapper_component),
10210       OffloadingArgs);
10211 }
10212 
10213 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10214     const OMPDeclareMapperDecl *D) {
10215   auto I = UDMMap.find(D);
10216   if (I != UDMMap.end())
10217     return I->second;
10218   emitUserDefinedMapper(D);
10219   return UDMMap.lookup(D);
10220 }
10221 
10222 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10223     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10224     llvm::Value *DeviceID,
10225     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10226                                      const OMPLoopDirective &D)>
10227         SizeEmitter) {
10228   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10229   const OMPExecutableDirective *TD = &D;
10230   // Get nested teams distribute kind directive, if any.
10231   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10232     TD = getNestedDistributeDirective(CGM.getContext(), D);
10233   if (!TD)
10234     return;
10235   const auto *LD = cast<OMPLoopDirective>(TD);
10236   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10237                                                          PrePostActionTy &) {
10238     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10239       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10240       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10241       CGF.EmitRuntimeCall(
10242           OMPBuilder.getOrCreateRuntimeFunction(
10243               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10244           Args);
10245     }
10246   };
10247   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10248 }
10249 
10250 void CGOpenMPRuntime::emitTargetCall(
10251     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10252     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10253     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10254     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10255                                      const OMPLoopDirective &D)>
10256         SizeEmitter) {
10257   if (!CGF.HaveInsertPoint())
10258     return;
10259 
10260   assert(OutlinedFn && "Invalid outlined function!");
10261 
10262   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10263                                  D.hasClausesOfKind<OMPNowaitClause>();
10264   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10265   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10266   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10267                                             PrePostActionTy &) {
10268     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10269   };
10270   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10271 
10272   CodeGenFunction::OMPTargetDataInfo InputInfo;
10273   llvm::Value *MapTypesArray = nullptr;
10274   llvm::Value *MapNamesArray = nullptr;
10275   // Fill up the pointer arrays and transfer execution to the device.
10276   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10277                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10278                     &CapturedVars,
10279                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10280     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10281       // Reverse offloading is not supported, so just execute on the host.
10282       if (RequiresOuterTask) {
10283         CapturedVars.clear();
10284         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10285       }
10286       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10287       return;
10288     }
10289 
10290     // On top of the arrays that were filled up, the target offloading call
10291     // takes as arguments the device id as well as the host pointer. The host
10292     // pointer is used by the runtime library to identify the current target
10293     // region, so it only has to be unique and not necessarily point to
10294     // anything. It could be the pointer to the outlined function that
10295     // implements the target region, but we aren't using that so that the
10296     // compiler doesn't need to keep that, and could therefore inline the host
10297     // function if proven worthwhile during optimization.
10298 
10299     // From this point on, we need to have an ID of the target region defined.
10300     assert(OutlinedFnID && "Invalid outlined function ID!");
10301 
10302     // Emit device ID if any.
10303     llvm::Value *DeviceID;
10304     if (Device.getPointer()) {
10305       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10306               Device.getInt() == OMPC_DEVICE_device_num) &&
10307              "Expected device_num modifier.");
10308       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10309       DeviceID =
10310           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10311     } else {
10312       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10313     }
10314 
10315     // Emit the number of elements in the offloading arrays.
10316     llvm::Value *PointerNum =
10317         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10318 
10319     // Return value of the runtime offloading call.
10320     llvm::Value *Return;
10321 
10322     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10323     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10324 
10325     // Source location for the ident struct
10326     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10327 
10328     // Emit tripcount for the target loop-based directive.
10329     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10330 
10331     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10332     // The target region is an outlined function launched by the runtime
10333     // via calls __tgt_target() or __tgt_target_teams().
10334     //
10335     // __tgt_target() launches a target region with one team and one thread,
10336     // executing a serial region.  This master thread may in turn launch
10337     // more threads within its team upon encountering a parallel region,
10338     // however, no additional teams can be launched on the device.
10339     //
10340     // __tgt_target_teams() launches a target region with one or more teams,
10341     // each with one or more threads.  This call is required for target
10342     // constructs such as:
10343     //  'target teams'
10344     //  'target' / 'teams'
10345     //  'target teams distribute parallel for'
10346     //  'target parallel'
10347     // and so on.
10348     //
10349     // Note that on the host and CPU targets, the runtime implementation of
10350     // these calls simply call the outlined function without forking threads.
10351     // The outlined functions themselves have runtime calls to
10352     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10353     // the compiler in emitTeamsCall() and emitParallelCall().
10354     //
10355     // In contrast, on the NVPTX target, the implementation of
10356     // __tgt_target_teams() launches a GPU kernel with the requested number
10357     // of teams and threads so no additional calls to the runtime are required.
10358     if (NumTeams) {
10359       // If we have NumTeams defined this means that we have an enclosed teams
10360       // region. Therefore we also expect to have NumThreads defined. These two
10361       // values should be defined in the presence of a teams directive,
10362       // regardless of having any clauses associated. If the user is using teams
10363       // but no clauses, these two values will be the default that should be
10364       // passed to the runtime library - a 32-bit integer with the value zero.
10365       assert(NumThreads && "Thread limit expression should be available along "
10366                            "with number of teams.");
10367       SmallVector<llvm::Value *> OffloadingArgs = {
10368           RTLoc,
10369           DeviceID,
10370           OutlinedFnID,
10371           PointerNum,
10372           InputInfo.BasePointersArray.getPointer(),
10373           InputInfo.PointersArray.getPointer(),
10374           InputInfo.SizesArray.getPointer(),
10375           MapTypesArray,
10376           MapNamesArray,
10377           InputInfo.MappersArray.getPointer(),
10378           NumTeams,
10379           NumThreads};
10380       if (HasNowait) {
10381         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10382         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10383         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10384         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10385         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10386         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10387       }
10388       Return = CGF.EmitRuntimeCall(
10389           OMPBuilder.getOrCreateRuntimeFunction(
10390               CGM.getModule(), HasNowait
10391                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10392                                    : OMPRTL___tgt_target_teams_mapper),
10393           OffloadingArgs);
10394     } else {
10395       SmallVector<llvm::Value *> OffloadingArgs = {
10396           RTLoc,
10397           DeviceID,
10398           OutlinedFnID,
10399           PointerNum,
10400           InputInfo.BasePointersArray.getPointer(),
10401           InputInfo.PointersArray.getPointer(),
10402           InputInfo.SizesArray.getPointer(),
10403           MapTypesArray,
10404           MapNamesArray,
10405           InputInfo.MappersArray.getPointer()};
10406       if (HasNowait) {
10407         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10408         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10409         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10410         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10411         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10412         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10413       }
10414       Return = CGF.EmitRuntimeCall(
10415           OMPBuilder.getOrCreateRuntimeFunction(
10416               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10417                                          : OMPRTL___tgt_target_mapper),
10418           OffloadingArgs);
10419     }
10420 
10421     // Check the error code and execute the host version if required.
10422     llvm::BasicBlock *OffloadFailedBlock =
10423         CGF.createBasicBlock("omp_offload.failed");
10424     llvm::BasicBlock *OffloadContBlock =
10425         CGF.createBasicBlock("omp_offload.cont");
10426     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10427     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10428 
10429     CGF.EmitBlock(OffloadFailedBlock);
10430     if (RequiresOuterTask) {
10431       CapturedVars.clear();
10432       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10433     }
10434     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10435     CGF.EmitBranch(OffloadContBlock);
10436 
10437     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10438   };
10439 
10440   // Notify that the host version must be executed.
10441   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10442                     RequiresOuterTask](CodeGenFunction &CGF,
10443                                        PrePostActionTy &) {
10444     if (RequiresOuterTask) {
10445       CapturedVars.clear();
10446       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10447     }
10448     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10449   };
10450 
10451   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10452                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10453                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10454     // Fill up the arrays with all the captured variables.
10455     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10456 
10457     // Get mappable expression information.
10458     MappableExprsHandler MEHandler(D, CGF);
10459     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10460     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10461 
10462     auto RI = CS.getCapturedRecordDecl()->field_begin();
10463     auto *CV = CapturedVars.begin();
10464     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10465                                               CE = CS.capture_end();
10466          CI != CE; ++CI, ++RI, ++CV) {
10467       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10468       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10469 
10470       // VLA sizes are passed to the outlined region by copy and do not have map
10471       // information associated.
10472       if (CI->capturesVariableArrayType()) {
10473         CurInfo.Exprs.push_back(nullptr);
10474         CurInfo.BasePointers.push_back(*CV);
10475         CurInfo.Pointers.push_back(*CV);
10476         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10477             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10478         // Copy to the device as an argument. No need to retrieve it.
10479         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10480                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10481                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10482         CurInfo.Mappers.push_back(nullptr);
10483       } else {
10484         // If we have any information in the map clause, we use it, otherwise we
10485         // just do a default mapping.
10486         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10487         if (!CI->capturesThis())
10488           MappedVarSet.insert(CI->getCapturedVar());
10489         else
10490           MappedVarSet.insert(nullptr);
10491         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10492           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10493         // Generate correct mapping for variables captured by reference in
10494         // lambdas.
10495         if (CI->capturesVariable())
10496           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10497                                                   CurInfo, LambdaPointers);
10498       }
10499       // We expect to have at least an element of information for this capture.
10500       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10501              "Non-existing map pointer for capture!");
10502       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10503              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10504              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10505              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10506              "Inconsistent map information sizes!");
10507 
10508       // If there is an entry in PartialStruct it means we have a struct with
10509       // individual members mapped. Emit an extra combined entry.
10510       if (PartialStruct.Base.isValid()) {
10511         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10512         MEHandler.emitCombinedEntry(
10513             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10514             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10515       }
10516 
10517       // We need to append the results of this capture to what we already have.
10518       CombinedInfo.append(CurInfo);
10519     }
10520     // Adjust MEMBER_OF flags for the lambdas captures.
10521     MEHandler.adjustMemberOfForLambdaCaptures(
10522         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10523         CombinedInfo.Types);
10524     // Map any list items in a map clause that were not captures because they
10525     // weren't referenced within the construct.
10526     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10527 
10528     TargetDataInfo Info;
10529     // Fill up the arrays and create the arguments.
10530     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10531     emitOffloadingArraysArgument(
10532         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10533         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10534         {/*ForEndTask=*/false});
10535 
10536     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10537     InputInfo.BasePointersArray =
10538         Address(Info.BasePointersArray, CGM.getPointerAlign());
10539     InputInfo.PointersArray =
10540         Address(Info.PointersArray, CGM.getPointerAlign());
10541     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10542     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10543     MapTypesArray = Info.MapTypesArray;
10544     MapNamesArray = Info.MapNamesArray;
10545     if (RequiresOuterTask)
10546       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10547     else
10548       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10549   };
10550 
10551   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10552                              CodeGenFunction &CGF, PrePostActionTy &) {
10553     if (RequiresOuterTask) {
10554       CodeGenFunction::OMPTargetDataInfo InputInfo;
10555       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10556     } else {
10557       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10558     }
10559   };
10560 
10561   // If we have a target function ID it means that we need to support
10562   // offloading, otherwise, just execute on the host. We need to execute on host
10563   // regardless of the conditional in the if clause if, e.g., the user do not
10564   // specify target triples.
10565   if (OutlinedFnID) {
10566     if (IfCond) {
10567       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10568     } else {
10569       RegionCodeGenTy ThenRCG(TargetThenGen);
10570       ThenRCG(CGF);
10571     }
10572   } else {
10573     RegionCodeGenTy ElseRCG(TargetElseGen);
10574     ElseRCG(CGF);
10575   }
10576 }
10577 
10578 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10579                                                     StringRef ParentName) {
10580   if (!S)
10581     return;
10582 
10583   // Codegen OMP target directives that offload compute to the device.
10584   bool RequiresDeviceCodegen =
10585       isa<OMPExecutableDirective>(S) &&
10586       isOpenMPTargetExecutionDirective(
10587           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10588 
10589   if (RequiresDeviceCodegen) {
10590     const auto &E = *cast<OMPExecutableDirective>(S);
10591     unsigned DeviceID;
10592     unsigned FileID;
10593     unsigned Line;
10594     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10595                              FileID, Line);
10596 
10597     // Is this a target region that should not be emitted as an entry point? If
10598     // so just signal we are done with this target region.
10599     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10600                                                             ParentName, Line))
10601       return;
10602 
10603     switch (E.getDirectiveKind()) {
10604     case OMPD_target:
10605       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10606                                                    cast<OMPTargetDirective>(E));
10607       break;
10608     case OMPD_target_parallel:
10609       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10610           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10611       break;
10612     case OMPD_target_teams:
10613       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10614           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10615       break;
10616     case OMPD_target_teams_distribute:
10617       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10618           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10619       break;
10620     case OMPD_target_teams_distribute_simd:
10621       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10622           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10623       break;
10624     case OMPD_target_parallel_for:
10625       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10626           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10627       break;
10628     case OMPD_target_parallel_for_simd:
10629       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10630           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10631       break;
10632     case OMPD_target_simd:
10633       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10634           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10635       break;
10636     case OMPD_target_teams_distribute_parallel_for:
10637       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10638           CGM, ParentName,
10639           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10640       break;
10641     case OMPD_target_teams_distribute_parallel_for_simd:
10642       CodeGenFunction::
10643           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10644               CGM, ParentName,
10645               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10646       break;
10647     case OMPD_parallel:
10648     case OMPD_for:
10649     case OMPD_parallel_for:
10650     case OMPD_parallel_master:
10651     case OMPD_parallel_sections:
10652     case OMPD_for_simd:
10653     case OMPD_parallel_for_simd:
10654     case OMPD_cancel:
10655     case OMPD_cancellation_point:
10656     case OMPD_ordered:
10657     case OMPD_threadprivate:
10658     case OMPD_allocate:
10659     case OMPD_task:
10660     case OMPD_simd:
10661     case OMPD_tile:
10662     case OMPD_unroll:
10663     case OMPD_sections:
10664     case OMPD_section:
10665     case OMPD_single:
10666     case OMPD_master:
10667     case OMPD_critical:
10668     case OMPD_taskyield:
10669     case OMPD_barrier:
10670     case OMPD_taskwait:
10671     case OMPD_taskgroup:
10672     case OMPD_atomic:
10673     case OMPD_flush:
10674     case OMPD_depobj:
10675     case OMPD_scan:
10676     case OMPD_teams:
10677     case OMPD_target_data:
10678     case OMPD_target_exit_data:
10679     case OMPD_target_enter_data:
10680     case OMPD_distribute:
10681     case OMPD_distribute_simd:
10682     case OMPD_distribute_parallel_for:
10683     case OMPD_distribute_parallel_for_simd:
10684     case OMPD_teams_distribute:
10685     case OMPD_teams_distribute_simd:
10686     case OMPD_teams_distribute_parallel_for:
10687     case OMPD_teams_distribute_parallel_for_simd:
10688     case OMPD_target_update:
10689     case OMPD_declare_simd:
10690     case OMPD_declare_variant:
10691     case OMPD_begin_declare_variant:
10692     case OMPD_end_declare_variant:
10693     case OMPD_declare_target:
10694     case OMPD_end_declare_target:
10695     case OMPD_declare_reduction:
10696     case OMPD_declare_mapper:
10697     case OMPD_taskloop:
10698     case OMPD_taskloop_simd:
10699     case OMPD_master_taskloop:
10700     case OMPD_master_taskloop_simd:
10701     case OMPD_parallel_master_taskloop:
10702     case OMPD_parallel_master_taskloop_simd:
10703     case OMPD_requires:
10704     case OMPD_unknown:
10705     default:
10706       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10707     }
10708     return;
10709   }
10710 
10711   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10712     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10713       return;
10714 
10715     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10716     return;
10717   }
10718 
10719   // If this is a lambda function, look into its body.
10720   if (const auto *L = dyn_cast<LambdaExpr>(S))
10721     S = L->getBody();
10722 
10723   // Keep looking for target regions recursively.
10724   for (const Stmt *II : S->children())
10725     scanForTargetRegionsFunctions(II, ParentName);
10726 }
10727 
10728 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10729   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10730       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10731   if (!DevTy)
10732     return false;
10733   // Do not emit device_type(nohost) functions for the host.
10734   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10735     return true;
10736   // Do not emit device_type(host) functions for the device.
10737   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10738     return true;
10739   return false;
10740 }
10741 
10742 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10743   // If emitting code for the host, we do not process FD here. Instead we do
10744   // the normal code generation.
10745   if (!CGM.getLangOpts().OpenMPIsDevice) {
10746     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10747       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10748                                   CGM.getLangOpts().OpenMPIsDevice))
10749         return true;
10750     return false;
10751   }
10752 
10753   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10754   // Try to detect target regions in the function.
10755   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10756     StringRef Name = CGM.getMangledName(GD);
10757     scanForTargetRegionsFunctions(FD->getBody(), Name);
10758     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10759                                 CGM.getLangOpts().OpenMPIsDevice))
10760       return true;
10761   }
10762 
10763   // Do not to emit function if it is not marked as declare target.
10764   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10765          AlreadyEmittedTargetDecls.count(VD) == 0;
10766 }
10767 
10768 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10769   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10770                               CGM.getLangOpts().OpenMPIsDevice))
10771     return true;
10772 
10773   if (!CGM.getLangOpts().OpenMPIsDevice)
10774     return false;
10775 
10776   // Check if there are Ctors/Dtors in this declaration and look for target
10777   // regions in it. We use the complete variant to produce the kernel name
10778   // mangling.
10779   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10780   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10781     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10782       StringRef ParentName =
10783           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10784       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10785     }
10786     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10787       StringRef ParentName =
10788           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10789       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10790     }
10791   }
10792 
10793   // Do not to emit variable if it is not marked as declare target.
10794   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10795       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10796           cast<VarDecl>(GD.getDecl()));
10797   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10798       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10799        HasRequiresUnifiedSharedMemory)) {
10800     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10801     return true;
10802   }
10803   return false;
10804 }
10805 
10806 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10807                                                    llvm::Constant *Addr) {
10808   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10809       !CGM.getLangOpts().OpenMPIsDevice)
10810     return;
10811 
10812   // If we have host/nohost variables, they do not need to be registered.
10813   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10814       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10815   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10816     return;
10817 
10818   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10819       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10820   if (!Res) {
10821     if (CGM.getLangOpts().OpenMPIsDevice) {
10822       // Register non-target variables being emitted in device code (debug info
10823       // may cause this).
10824       StringRef VarName = CGM.getMangledName(VD);
10825       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10826     }
10827     return;
10828   }
10829   // Register declare target variables.
10830   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10831   StringRef VarName;
10832   CharUnits VarSize;
10833   llvm::GlobalValue::LinkageTypes Linkage;
10834 
10835   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10836       !HasRequiresUnifiedSharedMemory) {
10837     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10838     VarName = CGM.getMangledName(VD);
10839     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10840       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10841       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10842     } else {
10843       VarSize = CharUnits::Zero();
10844     }
10845     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10846     // Temp solution to prevent optimizations of the internal variables.
10847     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10848       // Do not create a "ref-variable" if the original is not also available
10849       // on the host.
10850       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10851         return;
10852       std::string RefName = getName({VarName, "ref"});
10853       if (!CGM.GetGlobalValue(RefName)) {
10854         llvm::Constant *AddrRef =
10855             getOrCreateInternalVariable(Addr->getType(), RefName);
10856         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10857         GVAddrRef->setConstant(/*Val=*/true);
10858         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10859         GVAddrRef->setInitializer(Addr);
10860         CGM.addCompilerUsedGlobal(GVAddrRef);
10861       }
10862     }
10863   } else {
10864     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10865             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10866              HasRequiresUnifiedSharedMemory)) &&
10867            "Declare target attribute must link or to with unified memory.");
10868     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10869       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10870     else
10871       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10872 
10873     if (CGM.getLangOpts().OpenMPIsDevice) {
10874       VarName = Addr->getName();
10875       Addr = nullptr;
10876     } else {
10877       VarName = getAddrOfDeclareTargetVar(VD).getName();
10878       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10879     }
10880     VarSize = CGM.getPointerSize();
10881     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10882   }
10883 
10884   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10885       VarName, Addr, VarSize, Flags, Linkage);
10886 }
10887 
10888 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10889   if (isa<FunctionDecl>(GD.getDecl()) ||
10890       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10891     return emitTargetFunctions(GD);
10892 
10893   return emitTargetGlobalVariable(GD);
10894 }
10895 
10896 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10897   for (const VarDecl *VD : DeferredGlobalVariables) {
10898     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10899         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10900     if (!Res)
10901       continue;
10902     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10903         !HasRequiresUnifiedSharedMemory) {
10904       CGM.EmitGlobal(VD);
10905     } else {
10906       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10907               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10908                HasRequiresUnifiedSharedMemory)) &&
10909              "Expected link clause or to clause with unified memory.");
10910       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10911     }
10912   }
10913 }
10914 
10915 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10916     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10917   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10918          " Expected target-based directive.");
10919 }
10920 
10921 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10922   for (const OMPClause *Clause : D->clauselists()) {
10923     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10924       HasRequiresUnifiedSharedMemory = true;
10925     } else if (const auto *AC =
10926                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10927       switch (AC->getAtomicDefaultMemOrderKind()) {
10928       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10929         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10930         break;
10931       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10932         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10933         break;
10934       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10935         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10936         break;
10937       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10938         break;
10939       }
10940     }
10941   }
10942 }
10943 
10944 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10945   return RequiresAtomicOrdering;
10946 }
10947 
10948 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10949                                                        LangAS &AS) {
10950   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10951     return false;
10952   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10953   switch(A->getAllocatorType()) {
10954   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10955   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10956   // Not supported, fallback to the default mem space.
10957   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10958   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10959   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10960   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10961   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10962   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10963   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10964     AS = LangAS::Default;
10965     return true;
10966   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10967     llvm_unreachable("Expected predefined allocator for the variables with the "
10968                      "static storage.");
10969   }
10970   return false;
10971 }
10972 
10973 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10974   return HasRequiresUnifiedSharedMemory;
10975 }
10976 
10977 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10978     CodeGenModule &CGM)
10979     : CGM(CGM) {
10980   if (CGM.getLangOpts().OpenMPIsDevice) {
10981     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10982     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10983   }
10984 }
10985 
10986 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10987   if (CGM.getLangOpts().OpenMPIsDevice)
10988     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10989 }
10990 
10991 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10992   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10993     return true;
10994 
10995   const auto *D = cast<FunctionDecl>(GD.getDecl());
10996   // Do not to emit function if it is marked as declare target as it was already
10997   // emitted.
10998   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10999     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11000       if (auto *F = dyn_cast_or_null<llvm::Function>(
11001               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11002         return !F->isDeclaration();
11003       return false;
11004     }
11005     return true;
11006   }
11007 
11008   return !AlreadyEmittedTargetDecls.insert(D).second;
11009 }
11010 
11011 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11012   // If we don't have entries or if we are emitting code for the device, we
11013   // don't need to do anything.
11014   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11015       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11016       (OffloadEntriesInfoManager.empty() &&
11017        !HasEmittedDeclareTargetRegion &&
11018        !HasEmittedTargetRegion))
11019     return nullptr;
11020 
11021   // Create and register the function that handles the requires directives.
11022   ASTContext &C = CGM.getContext();
11023 
11024   llvm::Function *RequiresRegFn;
11025   {
11026     CodeGenFunction CGF(CGM);
11027     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11028     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11029     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11030     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11031     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11032     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11033     // TODO: check for other requires clauses.
11034     // The requires directive takes effect only when a target region is
11035     // present in the compilation unit. Otherwise it is ignored and not
11036     // passed to the runtime. This avoids the runtime from throwing an error
11037     // for mismatching requires clauses across compilation units that don't
11038     // contain at least 1 target region.
11039     assert((HasEmittedTargetRegion ||
11040             HasEmittedDeclareTargetRegion ||
11041             !OffloadEntriesInfoManager.empty()) &&
11042            "Target or declare target region expected.");
11043     if (HasRequiresUnifiedSharedMemory)
11044       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11045     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11046                             CGM.getModule(), OMPRTL___tgt_register_requires),
11047                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11048     CGF.FinishFunction();
11049   }
11050   return RequiresRegFn;
11051 }
11052 
11053 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11054                                     const OMPExecutableDirective &D,
11055                                     SourceLocation Loc,
11056                                     llvm::Function *OutlinedFn,
11057                                     ArrayRef<llvm::Value *> CapturedVars) {
11058   if (!CGF.HaveInsertPoint())
11059     return;
11060 
11061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11062   CodeGenFunction::RunCleanupsScope Scope(CGF);
11063 
11064   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11065   llvm::Value *Args[] = {
11066       RTLoc,
11067       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11068       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11069   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11070   RealArgs.append(std::begin(Args), std::end(Args));
11071   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11072 
11073   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11074       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11075   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11076 }
11077 
11078 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11079                                          const Expr *NumTeams,
11080                                          const Expr *ThreadLimit,
11081                                          SourceLocation Loc) {
11082   if (!CGF.HaveInsertPoint())
11083     return;
11084 
11085   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11086 
11087   llvm::Value *NumTeamsVal =
11088       NumTeams
11089           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11090                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11091           : CGF.Builder.getInt32(0);
11092 
11093   llvm::Value *ThreadLimitVal =
11094       ThreadLimit
11095           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11096                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11097           : CGF.Builder.getInt32(0);
11098 
11099   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11100   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11101                                      ThreadLimitVal};
11102   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11103                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11104                       PushNumTeamsArgs);
11105 }
11106 
11107 void CGOpenMPRuntime::emitTargetDataCalls(
11108     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11109     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11110   if (!CGF.HaveInsertPoint())
11111     return;
11112 
11113   // Action used to replace the default codegen action and turn privatization
11114   // off.
11115   PrePostActionTy NoPrivAction;
11116 
11117   // Generate the code for the opening of the data environment. Capture all the
11118   // arguments of the runtime call by reference because they are used in the
11119   // closing of the region.
11120   auto &&BeginThenGen = [this, &D, Device, &Info,
11121                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11122     // Fill up the arrays with all the mapped variables.
11123     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11124 
11125     // Get map clause information.
11126     MappableExprsHandler MEHandler(D, CGF);
11127     MEHandler.generateAllInfo(CombinedInfo);
11128 
11129     // Fill up the arrays and create the arguments.
11130     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11131                          /*IsNonContiguous=*/true);
11132 
11133     llvm::Value *BasePointersArrayArg = nullptr;
11134     llvm::Value *PointersArrayArg = nullptr;
11135     llvm::Value *SizesArrayArg = nullptr;
11136     llvm::Value *MapTypesArrayArg = nullptr;
11137     llvm::Value *MapNamesArrayArg = nullptr;
11138     llvm::Value *MappersArrayArg = nullptr;
11139     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11140                                  SizesArrayArg, MapTypesArrayArg,
11141                                  MapNamesArrayArg, MappersArrayArg, Info);
11142 
11143     // Emit device ID if any.
11144     llvm::Value *DeviceID = nullptr;
11145     if (Device) {
11146       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11147                                            CGF.Int64Ty, /*isSigned=*/true);
11148     } else {
11149       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11150     }
11151 
11152     // Emit the number of elements in the offloading arrays.
11153     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11154     //
11155     // Source location for the ident struct
11156     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11157 
11158     llvm::Value *OffloadingArgs[] = {RTLoc,
11159                                      DeviceID,
11160                                      PointerNum,
11161                                      BasePointersArrayArg,
11162                                      PointersArrayArg,
11163                                      SizesArrayArg,
11164                                      MapTypesArrayArg,
11165                                      MapNamesArrayArg,
11166                                      MappersArrayArg};
11167     CGF.EmitRuntimeCall(
11168         OMPBuilder.getOrCreateRuntimeFunction(
11169             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11170         OffloadingArgs);
11171 
11172     // If device pointer privatization is required, emit the body of the region
11173     // here. It will have to be duplicated: with and without privatization.
11174     if (!Info.CaptureDeviceAddrMap.empty())
11175       CodeGen(CGF);
11176   };
11177 
11178   // Generate code for the closing of the data region.
11179   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11180                                                 PrePostActionTy &) {
11181     assert(Info.isValid() && "Invalid data environment closing arguments.");
11182 
11183     llvm::Value *BasePointersArrayArg = nullptr;
11184     llvm::Value *PointersArrayArg = nullptr;
11185     llvm::Value *SizesArrayArg = nullptr;
11186     llvm::Value *MapTypesArrayArg = nullptr;
11187     llvm::Value *MapNamesArrayArg = nullptr;
11188     llvm::Value *MappersArrayArg = nullptr;
11189     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11190                                  SizesArrayArg, MapTypesArrayArg,
11191                                  MapNamesArrayArg, MappersArrayArg, Info,
11192                                  {/*ForEndCall=*/true});
11193 
11194     // Emit device ID if any.
11195     llvm::Value *DeviceID = nullptr;
11196     if (Device) {
11197       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11198                                            CGF.Int64Ty, /*isSigned=*/true);
11199     } else {
11200       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11201     }
11202 
11203     // Emit the number of elements in the offloading arrays.
11204     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11205 
11206     // Source location for the ident struct
11207     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11208 
11209     llvm::Value *OffloadingArgs[] = {RTLoc,
11210                                      DeviceID,
11211                                      PointerNum,
11212                                      BasePointersArrayArg,
11213                                      PointersArrayArg,
11214                                      SizesArrayArg,
11215                                      MapTypesArrayArg,
11216                                      MapNamesArrayArg,
11217                                      MappersArrayArg};
11218     CGF.EmitRuntimeCall(
11219         OMPBuilder.getOrCreateRuntimeFunction(
11220             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11221         OffloadingArgs);
11222   };
11223 
11224   // If we need device pointer privatization, we need to emit the body of the
11225   // region with no privatization in the 'else' branch of the conditional.
11226   // Otherwise, we don't have to do anything.
11227   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11228                                                          PrePostActionTy &) {
11229     if (!Info.CaptureDeviceAddrMap.empty()) {
11230       CodeGen.setAction(NoPrivAction);
11231       CodeGen(CGF);
11232     }
11233   };
11234 
11235   // We don't have to do anything to close the region if the if clause evaluates
11236   // to false.
11237   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11238 
11239   if (IfCond) {
11240     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11241   } else {
11242     RegionCodeGenTy RCG(BeginThenGen);
11243     RCG(CGF);
11244   }
11245 
11246   // If we don't require privatization of device pointers, we emit the body in
11247   // between the runtime calls. This avoids duplicating the body code.
11248   if (Info.CaptureDeviceAddrMap.empty()) {
11249     CodeGen.setAction(NoPrivAction);
11250     CodeGen(CGF);
11251   }
11252 
11253   if (IfCond) {
11254     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11255   } else {
11256     RegionCodeGenTy RCG(EndThenGen);
11257     RCG(CGF);
11258   }
11259 }
11260 
11261 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11262     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11263     const Expr *Device) {
11264   if (!CGF.HaveInsertPoint())
11265     return;
11266 
11267   assert((isa<OMPTargetEnterDataDirective>(D) ||
11268           isa<OMPTargetExitDataDirective>(D) ||
11269           isa<OMPTargetUpdateDirective>(D)) &&
11270          "Expecting either target enter, exit data, or update directives.");
11271 
11272   CodeGenFunction::OMPTargetDataInfo InputInfo;
11273   llvm::Value *MapTypesArray = nullptr;
11274   llvm::Value *MapNamesArray = nullptr;
11275   // Generate the code for the opening of the data environment.
11276   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11277                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11278     // Emit device ID if any.
11279     llvm::Value *DeviceID = nullptr;
11280     if (Device) {
11281       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11282                                            CGF.Int64Ty, /*isSigned=*/true);
11283     } else {
11284       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11285     }
11286 
11287     // Emit the number of elements in the offloading arrays.
11288     llvm::Constant *PointerNum =
11289         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11290 
11291     // Source location for the ident struct
11292     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11293 
11294     llvm::Value *OffloadingArgs[] = {RTLoc,
11295                                      DeviceID,
11296                                      PointerNum,
11297                                      InputInfo.BasePointersArray.getPointer(),
11298                                      InputInfo.PointersArray.getPointer(),
11299                                      InputInfo.SizesArray.getPointer(),
11300                                      MapTypesArray,
11301                                      MapNamesArray,
11302                                      InputInfo.MappersArray.getPointer()};
11303 
11304     // Select the right runtime function call for each standalone
11305     // directive.
11306     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11307     RuntimeFunction RTLFn;
11308     switch (D.getDirectiveKind()) {
11309     case OMPD_target_enter_data:
11310       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11311                         : OMPRTL___tgt_target_data_begin_mapper;
11312       break;
11313     case OMPD_target_exit_data:
11314       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11315                         : OMPRTL___tgt_target_data_end_mapper;
11316       break;
11317     case OMPD_target_update:
11318       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11319                         : OMPRTL___tgt_target_data_update_mapper;
11320       break;
11321     case OMPD_parallel:
11322     case OMPD_for:
11323     case OMPD_parallel_for:
11324     case OMPD_parallel_master:
11325     case OMPD_parallel_sections:
11326     case OMPD_for_simd:
11327     case OMPD_parallel_for_simd:
11328     case OMPD_cancel:
11329     case OMPD_cancellation_point:
11330     case OMPD_ordered:
11331     case OMPD_threadprivate:
11332     case OMPD_allocate:
11333     case OMPD_task:
11334     case OMPD_simd:
11335     case OMPD_tile:
11336     case OMPD_unroll:
11337     case OMPD_sections:
11338     case OMPD_section:
11339     case OMPD_single:
11340     case OMPD_master:
11341     case OMPD_critical:
11342     case OMPD_taskyield:
11343     case OMPD_barrier:
11344     case OMPD_taskwait:
11345     case OMPD_taskgroup:
11346     case OMPD_atomic:
11347     case OMPD_flush:
11348     case OMPD_depobj:
11349     case OMPD_scan:
11350     case OMPD_teams:
11351     case OMPD_target_data:
11352     case OMPD_distribute:
11353     case OMPD_distribute_simd:
11354     case OMPD_distribute_parallel_for:
11355     case OMPD_distribute_parallel_for_simd:
11356     case OMPD_teams_distribute:
11357     case OMPD_teams_distribute_simd:
11358     case OMPD_teams_distribute_parallel_for:
11359     case OMPD_teams_distribute_parallel_for_simd:
11360     case OMPD_declare_simd:
11361     case OMPD_declare_variant:
11362     case OMPD_begin_declare_variant:
11363     case OMPD_end_declare_variant:
11364     case OMPD_declare_target:
11365     case OMPD_end_declare_target:
11366     case OMPD_declare_reduction:
11367     case OMPD_declare_mapper:
11368     case OMPD_taskloop:
11369     case OMPD_taskloop_simd:
11370     case OMPD_master_taskloop:
11371     case OMPD_master_taskloop_simd:
11372     case OMPD_parallel_master_taskloop:
11373     case OMPD_parallel_master_taskloop_simd:
11374     case OMPD_target:
11375     case OMPD_target_simd:
11376     case OMPD_target_teams_distribute:
11377     case OMPD_target_teams_distribute_simd:
11378     case OMPD_target_teams_distribute_parallel_for:
11379     case OMPD_target_teams_distribute_parallel_for_simd:
11380     case OMPD_target_teams:
11381     case OMPD_target_parallel:
11382     case OMPD_target_parallel_for:
11383     case OMPD_target_parallel_for_simd:
11384     case OMPD_requires:
11385     case OMPD_unknown:
11386     default:
11387       llvm_unreachable("Unexpected standalone target data directive.");
11388       break;
11389     }
11390     CGF.EmitRuntimeCall(
11391         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11392         OffloadingArgs);
11393   };
11394 
11395   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11396                           &MapNamesArray](CodeGenFunction &CGF,
11397                                           PrePostActionTy &) {
11398     // Fill up the arrays with all the mapped variables.
11399     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11400 
11401     // Get map clause information.
11402     MappableExprsHandler MEHandler(D, CGF);
11403     MEHandler.generateAllInfo(CombinedInfo);
11404 
11405     TargetDataInfo Info;
11406     // Fill up the arrays and create the arguments.
11407     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11408                          /*IsNonContiguous=*/true);
11409     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11410                              D.hasClausesOfKind<OMPNowaitClause>();
11411     emitOffloadingArraysArgument(
11412         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11413         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11414         {/*ForEndTask=*/false});
11415     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11416     InputInfo.BasePointersArray =
11417         Address(Info.BasePointersArray, CGM.getPointerAlign());
11418     InputInfo.PointersArray =
11419         Address(Info.PointersArray, CGM.getPointerAlign());
11420     InputInfo.SizesArray =
11421         Address(Info.SizesArray, CGM.getPointerAlign());
11422     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11423     MapTypesArray = Info.MapTypesArray;
11424     MapNamesArray = Info.MapNamesArray;
11425     if (RequiresOuterTask)
11426       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11427     else
11428       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11429   };
11430 
11431   if (IfCond) {
11432     emitIfClause(CGF, IfCond, TargetThenGen,
11433                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11434   } else {
11435     RegionCodeGenTy ThenRCG(TargetThenGen);
11436     ThenRCG(CGF);
11437   }
11438 }
11439 
11440 namespace {
11441   /// Kind of parameter in a function with 'declare simd' directive.
11442   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11443   /// Attribute set of the parameter.
11444   struct ParamAttrTy {
11445     ParamKindTy Kind = Vector;
11446     llvm::APSInt StrideOrArg;
11447     llvm::APSInt Alignment;
11448   };
11449 } // namespace
11450 
11451 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11452                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11453   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11454   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11455   // of that clause. The VLEN value must be power of 2.
11456   // In other case the notion of the function`s "characteristic data type" (CDT)
11457   // is used to compute the vector length.
11458   // CDT is defined in the following order:
11459   //   a) For non-void function, the CDT is the return type.
11460   //   b) If the function has any non-uniform, non-linear parameters, then the
11461   //   CDT is the type of the first such parameter.
11462   //   c) If the CDT determined by a) or b) above is struct, union, or class
11463   //   type which is pass-by-value (except for the type that maps to the
11464   //   built-in complex data type), the characteristic data type is int.
11465   //   d) If none of the above three cases is applicable, the CDT is int.
11466   // The VLEN is then determined based on the CDT and the size of vector
11467   // register of that ISA for which current vector version is generated. The
11468   // VLEN is computed using the formula below:
11469   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11470   // where vector register size specified in section 3.2.1 Registers and the
11471   // Stack Frame of original AMD64 ABI document.
11472   QualType RetType = FD->getReturnType();
11473   if (RetType.isNull())
11474     return 0;
11475   ASTContext &C = FD->getASTContext();
11476   QualType CDT;
11477   if (!RetType.isNull() && !RetType->isVoidType()) {
11478     CDT = RetType;
11479   } else {
11480     unsigned Offset = 0;
11481     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11482       if (ParamAttrs[Offset].Kind == Vector)
11483         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11484       ++Offset;
11485     }
11486     if (CDT.isNull()) {
11487       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11488         if (ParamAttrs[I + Offset].Kind == Vector) {
11489           CDT = FD->getParamDecl(I)->getType();
11490           break;
11491         }
11492       }
11493     }
11494   }
11495   if (CDT.isNull())
11496     CDT = C.IntTy;
11497   CDT = CDT->getCanonicalTypeUnqualified();
11498   if (CDT->isRecordType() || CDT->isUnionType())
11499     CDT = C.IntTy;
11500   return C.getTypeSize(CDT);
11501 }
11502 
11503 static void
11504 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11505                            const llvm::APSInt &VLENVal,
11506                            ArrayRef<ParamAttrTy> ParamAttrs,
11507                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11508   struct ISADataTy {
11509     char ISA;
11510     unsigned VecRegSize;
11511   };
11512   ISADataTy ISAData[] = {
11513       {
11514           'b', 128
11515       }, // SSE
11516       {
11517           'c', 256
11518       }, // AVX
11519       {
11520           'd', 256
11521       }, // AVX2
11522       {
11523           'e', 512
11524       }, // AVX512
11525   };
11526   llvm::SmallVector<char, 2> Masked;
11527   switch (State) {
11528   case OMPDeclareSimdDeclAttr::BS_Undefined:
11529     Masked.push_back('N');
11530     Masked.push_back('M');
11531     break;
11532   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11533     Masked.push_back('N');
11534     break;
11535   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11536     Masked.push_back('M');
11537     break;
11538   }
11539   for (char Mask : Masked) {
11540     for (const ISADataTy &Data : ISAData) {
11541       SmallString<256> Buffer;
11542       llvm::raw_svector_ostream Out(Buffer);
11543       Out << "_ZGV" << Data.ISA << Mask;
11544       if (!VLENVal) {
11545         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11546         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11547         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11548       } else {
11549         Out << VLENVal;
11550       }
11551       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11552         switch (ParamAttr.Kind){
11553         case LinearWithVarStride:
11554           Out << 's' << ParamAttr.StrideOrArg;
11555           break;
11556         case Linear:
11557           Out << 'l';
11558           if (ParamAttr.StrideOrArg != 1)
11559             Out << ParamAttr.StrideOrArg;
11560           break;
11561         case Uniform:
11562           Out << 'u';
11563           break;
11564         case Vector:
11565           Out << 'v';
11566           break;
11567         }
11568         if (!!ParamAttr.Alignment)
11569           Out << 'a' << ParamAttr.Alignment;
11570       }
11571       Out << '_' << Fn->getName();
11572       Fn->addFnAttr(Out.str());
11573     }
11574   }
11575 }
11576 
11577 // This are the Functions that are needed to mangle the name of the
11578 // vector functions generated by the compiler, according to the rules
11579 // defined in the "Vector Function ABI specifications for AArch64",
11580 // available at
11581 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11582 
11583 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11584 ///
11585 /// TODO: Need to implement the behavior for reference marked with a
11586 /// var or no linear modifiers (1.b in the section). For this, we
11587 /// need to extend ParamKindTy to support the linear modifiers.
11588 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11589   QT = QT.getCanonicalType();
11590 
11591   if (QT->isVoidType())
11592     return false;
11593 
11594   if (Kind == ParamKindTy::Uniform)
11595     return false;
11596 
11597   if (Kind == ParamKindTy::Linear)
11598     return false;
11599 
11600   // TODO: Handle linear references with modifiers
11601 
11602   if (Kind == ParamKindTy::LinearWithVarStride)
11603     return false;
11604 
11605   return true;
11606 }
11607 
11608 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11609 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11610   QT = QT.getCanonicalType();
11611   unsigned Size = C.getTypeSize(QT);
11612 
11613   // Only scalars and complex within 16 bytes wide set PVB to true.
11614   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11615     return false;
11616 
11617   if (QT->isFloatingType())
11618     return true;
11619 
11620   if (QT->isIntegerType())
11621     return true;
11622 
11623   if (QT->isPointerType())
11624     return true;
11625 
11626   // TODO: Add support for complex types (section 3.1.2, item 2).
11627 
11628   return false;
11629 }
11630 
11631 /// Computes the lane size (LS) of a return type or of an input parameter,
11632 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11633 /// TODO: Add support for references, section 3.2.1, item 1.
11634 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11635   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11636     QualType PTy = QT.getCanonicalType()->getPointeeType();
11637     if (getAArch64PBV(PTy, C))
11638       return C.getTypeSize(PTy);
11639   }
11640   if (getAArch64PBV(QT, C))
11641     return C.getTypeSize(QT);
11642 
11643   return C.getTypeSize(C.getUIntPtrType());
11644 }
11645 
11646 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11647 // signature of the scalar function, as defined in 3.2.2 of the
11648 // AAVFABI.
11649 static std::tuple<unsigned, unsigned, bool>
11650 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11651   QualType RetType = FD->getReturnType().getCanonicalType();
11652 
11653   ASTContext &C = FD->getASTContext();
11654 
11655   bool OutputBecomesInput = false;
11656 
11657   llvm::SmallVector<unsigned, 8> Sizes;
11658   if (!RetType->isVoidType()) {
11659     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11660     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11661       OutputBecomesInput = true;
11662   }
11663   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11664     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11665     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11666   }
11667 
11668   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11669   // The LS of a function parameter / return value can only be a power
11670   // of 2, starting from 8 bits, up to 128.
11671   assert(std::all_of(Sizes.begin(), Sizes.end(),
11672                      [](unsigned Size) {
11673                        return Size == 8 || Size == 16 || Size == 32 ||
11674                               Size == 64 || Size == 128;
11675                      }) &&
11676          "Invalid size");
11677 
11678   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11679                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11680                          OutputBecomesInput);
11681 }
11682 
11683 /// Mangle the parameter part of the vector function name according to
11684 /// their OpenMP classification. The mangling function is defined in
11685 /// section 3.5 of the AAVFABI.
11686 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11687   SmallString<256> Buffer;
11688   llvm::raw_svector_ostream Out(Buffer);
11689   for (const auto &ParamAttr : ParamAttrs) {
11690     switch (ParamAttr.Kind) {
11691     case LinearWithVarStride:
11692       Out << "ls" << ParamAttr.StrideOrArg;
11693       break;
11694     case Linear:
11695       Out << 'l';
11696       // Don't print the step value if it is not present or if it is
11697       // equal to 1.
11698       if (ParamAttr.StrideOrArg != 1)
11699         Out << ParamAttr.StrideOrArg;
11700       break;
11701     case Uniform:
11702       Out << 'u';
11703       break;
11704     case Vector:
11705       Out << 'v';
11706       break;
11707     }
11708 
11709     if (!!ParamAttr.Alignment)
11710       Out << 'a' << ParamAttr.Alignment;
11711   }
11712 
11713   return std::string(Out.str());
11714 }
11715 
11716 // Function used to add the attribute. The parameter `VLEN` is
11717 // templated to allow the use of "x" when targeting scalable functions
11718 // for SVE.
11719 template <typename T>
11720 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11721                                  char ISA, StringRef ParSeq,
11722                                  StringRef MangledName, bool OutputBecomesInput,
11723                                  llvm::Function *Fn) {
11724   SmallString<256> Buffer;
11725   llvm::raw_svector_ostream Out(Buffer);
11726   Out << Prefix << ISA << LMask << VLEN;
11727   if (OutputBecomesInput)
11728     Out << "v";
11729   Out << ParSeq << "_" << MangledName;
11730   Fn->addFnAttr(Out.str());
11731 }
11732 
11733 // Helper function to generate the Advanced SIMD names depending on
11734 // the value of the NDS when simdlen is not present.
11735 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11736                                       StringRef Prefix, char ISA,
11737                                       StringRef ParSeq, StringRef MangledName,
11738                                       bool OutputBecomesInput,
11739                                       llvm::Function *Fn) {
11740   switch (NDS) {
11741   case 8:
11742     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11743                          OutputBecomesInput, Fn);
11744     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11745                          OutputBecomesInput, Fn);
11746     break;
11747   case 16:
11748     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11749                          OutputBecomesInput, Fn);
11750     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11751                          OutputBecomesInput, Fn);
11752     break;
11753   case 32:
11754     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11755                          OutputBecomesInput, Fn);
11756     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11757                          OutputBecomesInput, Fn);
11758     break;
11759   case 64:
11760   case 128:
11761     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11762                          OutputBecomesInput, Fn);
11763     break;
11764   default:
11765     llvm_unreachable("Scalar type is too wide.");
11766   }
11767 }
11768 
11769 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11770 static void emitAArch64DeclareSimdFunction(
11771     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11772     ArrayRef<ParamAttrTy> ParamAttrs,
11773     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11774     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11775 
11776   // Get basic data for building the vector signature.
11777   const auto Data = getNDSWDS(FD, ParamAttrs);
11778   const unsigned NDS = std::get<0>(Data);
11779   const unsigned WDS = std::get<1>(Data);
11780   const bool OutputBecomesInput = std::get<2>(Data);
11781 
11782   // Check the values provided via `simdlen` by the user.
11783   // 1. A `simdlen(1)` doesn't produce vector signatures,
11784   if (UserVLEN == 1) {
11785     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11786         DiagnosticsEngine::Warning,
11787         "The clause simdlen(1) has no effect when targeting aarch64.");
11788     CGM.getDiags().Report(SLoc, DiagID);
11789     return;
11790   }
11791 
11792   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11793   // Advanced SIMD output.
11794   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11795     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11796         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11797                                     "power of 2 when targeting Advanced SIMD.");
11798     CGM.getDiags().Report(SLoc, DiagID);
11799     return;
11800   }
11801 
11802   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11803   // limits.
11804   if (ISA == 's' && UserVLEN != 0) {
11805     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11806       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11807           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11808                                       "lanes in the architectural constraints "
11809                                       "for SVE (min is 128-bit, max is "
11810                                       "2048-bit, by steps of 128-bit)");
11811       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11812       return;
11813     }
11814   }
11815 
11816   // Sort out parameter sequence.
11817   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11818   StringRef Prefix = "_ZGV";
11819   // Generate simdlen from user input (if any).
11820   if (UserVLEN) {
11821     if (ISA == 's') {
11822       // SVE generates only a masked function.
11823       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11824                            OutputBecomesInput, Fn);
11825     } else {
11826       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11827       // Advanced SIMD generates one or two functions, depending on
11828       // the `[not]inbranch` clause.
11829       switch (State) {
11830       case OMPDeclareSimdDeclAttr::BS_Undefined:
11831         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11832                              OutputBecomesInput, Fn);
11833         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11834                              OutputBecomesInput, Fn);
11835         break;
11836       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11837         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11838                              OutputBecomesInput, Fn);
11839         break;
11840       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11841         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11842                              OutputBecomesInput, Fn);
11843         break;
11844       }
11845     }
11846   } else {
11847     // If no user simdlen is provided, follow the AAVFABI rules for
11848     // generating the vector length.
11849     if (ISA == 's') {
11850       // SVE, section 3.4.1, item 1.
11851       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11852                            OutputBecomesInput, Fn);
11853     } else {
11854       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11855       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11856       // two vector names depending on the use of the clause
11857       // `[not]inbranch`.
11858       switch (State) {
11859       case OMPDeclareSimdDeclAttr::BS_Undefined:
11860         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11861                                   OutputBecomesInput, Fn);
11862         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11863                                   OutputBecomesInput, Fn);
11864         break;
11865       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11866         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11867                                   OutputBecomesInput, Fn);
11868         break;
11869       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11870         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11871                                   OutputBecomesInput, Fn);
11872         break;
11873       }
11874     }
11875   }
11876 }
11877 
11878 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11879                                               llvm::Function *Fn) {
11880   ASTContext &C = CGM.getContext();
11881   FD = FD->getMostRecentDecl();
11882   // Map params to their positions in function decl.
11883   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11884   if (isa<CXXMethodDecl>(FD))
11885     ParamPositions.try_emplace(FD, 0);
11886   unsigned ParamPos = ParamPositions.size();
11887   for (const ParmVarDecl *P : FD->parameters()) {
11888     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11889     ++ParamPos;
11890   }
11891   while (FD) {
11892     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11893       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11894       // Mark uniform parameters.
11895       for (const Expr *E : Attr->uniforms()) {
11896         E = E->IgnoreParenImpCasts();
11897         unsigned Pos;
11898         if (isa<CXXThisExpr>(E)) {
11899           Pos = ParamPositions[FD];
11900         } else {
11901           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11902                                 ->getCanonicalDecl();
11903           Pos = ParamPositions[PVD];
11904         }
11905         ParamAttrs[Pos].Kind = Uniform;
11906       }
11907       // Get alignment info.
11908       auto NI = Attr->alignments_begin();
11909       for (const Expr *E : Attr->aligneds()) {
11910         E = E->IgnoreParenImpCasts();
11911         unsigned Pos;
11912         QualType ParmTy;
11913         if (isa<CXXThisExpr>(E)) {
11914           Pos = ParamPositions[FD];
11915           ParmTy = E->getType();
11916         } else {
11917           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11918                                 ->getCanonicalDecl();
11919           Pos = ParamPositions[PVD];
11920           ParmTy = PVD->getType();
11921         }
11922         ParamAttrs[Pos].Alignment =
11923             (*NI)
11924                 ? (*NI)->EvaluateKnownConstInt(C)
11925                 : llvm::APSInt::getUnsigned(
11926                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11927                           .getQuantity());
11928         ++NI;
11929       }
11930       // Mark linear parameters.
11931       auto SI = Attr->steps_begin();
11932       auto MI = Attr->modifiers_begin();
11933       for (const Expr *E : Attr->linears()) {
11934         E = E->IgnoreParenImpCasts();
11935         unsigned Pos;
11936         // Rescaling factor needed to compute the linear parameter
11937         // value in the mangled name.
11938         unsigned PtrRescalingFactor = 1;
11939         if (isa<CXXThisExpr>(E)) {
11940           Pos = ParamPositions[FD];
11941         } else {
11942           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11943                                 ->getCanonicalDecl();
11944           Pos = ParamPositions[PVD];
11945           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11946             PtrRescalingFactor = CGM.getContext()
11947                                      .getTypeSizeInChars(P->getPointeeType())
11948                                      .getQuantity();
11949         }
11950         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11951         ParamAttr.Kind = Linear;
11952         // Assuming a stride of 1, for `linear` without modifiers.
11953         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11954         if (*SI) {
11955           Expr::EvalResult Result;
11956           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11957             if (const auto *DRE =
11958                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11959               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11960                 ParamAttr.Kind = LinearWithVarStride;
11961                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11962                     ParamPositions[StridePVD->getCanonicalDecl()]);
11963               }
11964             }
11965           } else {
11966             ParamAttr.StrideOrArg = Result.Val.getInt();
11967           }
11968         }
11969         // If we are using a linear clause on a pointer, we need to
11970         // rescale the value of linear_step with the byte size of the
11971         // pointee type.
11972         if (Linear == ParamAttr.Kind)
11973           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11974         ++SI;
11975         ++MI;
11976       }
11977       llvm::APSInt VLENVal;
11978       SourceLocation ExprLoc;
11979       const Expr *VLENExpr = Attr->getSimdlen();
11980       if (VLENExpr) {
11981         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11982         ExprLoc = VLENExpr->getExprLoc();
11983       }
11984       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11985       if (CGM.getTriple().isX86()) {
11986         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11987       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11988         unsigned VLEN = VLENVal.getExtValue();
11989         StringRef MangledName = Fn->getName();
11990         if (CGM.getTarget().hasFeature("sve"))
11991           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11992                                          MangledName, 's', 128, Fn, ExprLoc);
11993         if (CGM.getTarget().hasFeature("neon"))
11994           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11995                                          MangledName, 'n', 128, Fn, ExprLoc);
11996       }
11997     }
11998     FD = FD->getPreviousDecl();
11999   }
12000 }
12001 
12002 namespace {
12003 /// Cleanup action for doacross support.
12004 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12005 public:
12006   static const int DoacrossFinArgs = 2;
12007 
12008 private:
12009   llvm::FunctionCallee RTLFn;
12010   llvm::Value *Args[DoacrossFinArgs];
12011 
12012 public:
12013   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12014                     ArrayRef<llvm::Value *> CallArgs)
12015       : RTLFn(RTLFn) {
12016     assert(CallArgs.size() == DoacrossFinArgs);
12017     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12018   }
12019   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12020     if (!CGF.HaveInsertPoint())
12021       return;
12022     CGF.EmitRuntimeCall(RTLFn, Args);
12023   }
12024 };
12025 } // namespace
12026 
12027 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12028                                        const OMPLoopDirective &D,
12029                                        ArrayRef<Expr *> NumIterations) {
12030   if (!CGF.HaveInsertPoint())
12031     return;
12032 
12033   ASTContext &C = CGM.getContext();
12034   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12035   RecordDecl *RD;
12036   if (KmpDimTy.isNull()) {
12037     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12038     //  kmp_int64 lo; // lower
12039     //  kmp_int64 up; // upper
12040     //  kmp_int64 st; // stride
12041     // };
12042     RD = C.buildImplicitRecord("kmp_dim");
12043     RD->startDefinition();
12044     addFieldToRecordDecl(C, RD, Int64Ty);
12045     addFieldToRecordDecl(C, RD, Int64Ty);
12046     addFieldToRecordDecl(C, RD, Int64Ty);
12047     RD->completeDefinition();
12048     KmpDimTy = C.getRecordType(RD);
12049   } else {
12050     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12051   }
12052   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12053   QualType ArrayTy =
12054       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12055 
12056   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12057   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12058   enum { LowerFD = 0, UpperFD, StrideFD };
12059   // Fill dims with data.
12060   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12061     LValue DimsLVal = CGF.MakeAddrLValue(
12062         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12063     // dims.upper = num_iterations;
12064     LValue UpperLVal = CGF.EmitLValueForField(
12065         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12066     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12067         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12068         Int64Ty, NumIterations[I]->getExprLoc());
12069     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12070     // dims.stride = 1;
12071     LValue StrideLVal = CGF.EmitLValueForField(
12072         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12073     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12074                           StrideLVal);
12075   }
12076 
12077   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12078   // kmp_int32 num_dims, struct kmp_dim * dims);
12079   llvm::Value *Args[] = {
12080       emitUpdateLocation(CGF, D.getBeginLoc()),
12081       getThreadID(CGF, D.getBeginLoc()),
12082       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12083       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12084           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12085           CGM.VoidPtrTy)};
12086 
12087   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12088       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12089   CGF.EmitRuntimeCall(RTLFn, Args);
12090   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12091       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12092   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12093       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12094   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12095                                              llvm::makeArrayRef(FiniArgs));
12096 }
12097 
12098 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12099                                           const OMPDependClause *C) {
12100   QualType Int64Ty =
12101       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12102   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12103   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12104       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12105   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12106   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12107     const Expr *CounterVal = C->getLoopData(I);
12108     assert(CounterVal);
12109     llvm::Value *CntVal = CGF.EmitScalarConversion(
12110         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12111         CounterVal->getExprLoc());
12112     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12113                           /*Volatile=*/false, Int64Ty);
12114   }
12115   llvm::Value *Args[] = {
12116       emitUpdateLocation(CGF, C->getBeginLoc()),
12117       getThreadID(CGF, C->getBeginLoc()),
12118       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12119   llvm::FunctionCallee RTLFn;
12120   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12121     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12122                                                   OMPRTL___kmpc_doacross_post);
12123   } else {
12124     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12125     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12126                                                   OMPRTL___kmpc_doacross_wait);
12127   }
12128   CGF.EmitRuntimeCall(RTLFn, Args);
12129 }
12130 
12131 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12132                                llvm::FunctionCallee Callee,
12133                                ArrayRef<llvm::Value *> Args) const {
12134   assert(Loc.isValid() && "Outlined function call location must be valid.");
12135   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12136 
12137   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12138     if (Fn->doesNotThrow()) {
12139       CGF.EmitNounwindRuntimeCall(Fn, Args);
12140       return;
12141     }
12142   }
12143   CGF.EmitRuntimeCall(Callee, Args);
12144 }
12145 
12146 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12147     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12148     ArrayRef<llvm::Value *> Args) const {
12149   emitCall(CGF, Loc, OutlinedFn, Args);
12150 }
12151 
12152 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12153   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12154     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12155       HasEmittedDeclareTargetRegion = true;
12156 }
12157 
12158 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12159                                              const VarDecl *NativeParam,
12160                                              const VarDecl *TargetParam) const {
12161   return CGF.GetAddrOfLocalVar(NativeParam);
12162 }
12163 
12164 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12165                                                    const VarDecl *VD) {
12166   if (!VD)
12167     return Address::invalid();
12168   Address UntiedAddr = Address::invalid();
12169   Address UntiedRealAddr = Address::invalid();
12170   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12171   if (It != FunctionToUntiedTaskStackMap.end()) {
12172     const UntiedLocalVarsAddressesMap &UntiedData =
12173         UntiedLocalVarsStack[It->second];
12174     auto I = UntiedData.find(VD);
12175     if (I != UntiedData.end()) {
12176       UntiedAddr = I->second.first;
12177       UntiedRealAddr = I->second.second;
12178     }
12179   }
12180   const VarDecl *CVD = VD->getCanonicalDecl();
12181   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12182     // Use the default allocation.
12183     if (!isAllocatableDecl(VD))
12184       return UntiedAddr;
12185     llvm::Value *Size;
12186     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12187     if (CVD->getType()->isVariablyModifiedType()) {
12188       Size = CGF.getTypeSize(CVD->getType());
12189       // Align the size: ((size + align - 1) / align) * align
12190       Size = CGF.Builder.CreateNUWAdd(
12191           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12192       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12193       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12194     } else {
12195       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12196       Size = CGM.getSize(Sz.alignTo(Align));
12197     }
12198     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12199     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12200     assert(AA->getAllocator() &&
12201            "Expected allocator expression for non-default allocator.");
12202     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12203     // According to the standard, the original allocator type is a enum
12204     // (integer). Convert to pointer type, if required.
12205     Allocator = CGF.EmitScalarConversion(
12206         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12207         AA->getAllocator()->getExprLoc());
12208     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12209 
12210     llvm::Value *Addr =
12211         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12212                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12213                             Args, getName({CVD->getName(), ".void.addr"}));
12214     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12215         CGM.getModule(), OMPRTL___kmpc_free);
12216     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12217     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12218         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12219     if (UntiedAddr.isValid())
12220       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12221 
12222     // Cleanup action for allocate support.
12223     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12224       llvm::FunctionCallee RTLFn;
12225       SourceLocation::UIntTy LocEncoding;
12226       Address Addr;
12227       const Expr *Allocator;
12228 
12229     public:
12230       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12231                            SourceLocation::UIntTy LocEncoding, Address Addr,
12232                            const Expr *Allocator)
12233           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12234             Allocator(Allocator) {}
12235       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12236         if (!CGF.HaveInsertPoint())
12237           return;
12238         llvm::Value *Args[3];
12239         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12240             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12241         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12242             Addr.getPointer(), CGF.VoidPtrTy);
12243         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12244         // According to the standard, the original allocator type is a enum
12245         // (integer). Convert to pointer type, if required.
12246         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12247                                             CGF.getContext().VoidPtrTy,
12248                                             Allocator->getExprLoc());
12249         Args[2] = AllocVal;
12250 
12251         CGF.EmitRuntimeCall(RTLFn, Args);
12252       }
12253     };
12254     Address VDAddr =
12255         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12256     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12257         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12258         VDAddr, AA->getAllocator());
12259     if (UntiedRealAddr.isValid())
12260       if (auto *Region =
12261               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12262         Region->emitUntiedSwitch(CGF);
12263     return VDAddr;
12264   }
12265   return UntiedAddr;
12266 }
12267 
12268 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12269                                              const VarDecl *VD) const {
12270   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12271   if (It == FunctionToUntiedTaskStackMap.end())
12272     return false;
12273   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12274 }
12275 
12276 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12277     CodeGenModule &CGM, const OMPLoopDirective &S)
12278     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12279   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12280   if (!NeedToPush)
12281     return;
12282   NontemporalDeclsSet &DS =
12283       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12284   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12285     for (const Stmt *Ref : C->private_refs()) {
12286       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12287       const ValueDecl *VD;
12288       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12289         VD = DRE->getDecl();
12290       } else {
12291         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12292         assert((ME->isImplicitCXXThis() ||
12293                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12294                "Expected member of current class.");
12295         VD = ME->getMemberDecl();
12296       }
12297       DS.insert(VD);
12298     }
12299   }
12300 }
12301 
12302 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12303   if (!NeedToPush)
12304     return;
12305   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12306 }
12307 
12308 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12309     CodeGenFunction &CGF,
12310     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12311                           std::pair<Address, Address>> &LocalVars)
12312     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12313   if (!NeedToPush)
12314     return;
12315   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12316       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12317   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12318 }
12319 
12320 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12321   if (!NeedToPush)
12322     return;
12323   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12324 }
12325 
12326 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12327   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12328 
12329   return llvm::any_of(
12330       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12331       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12332 }
12333 
12334 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12335     const OMPExecutableDirective &S,
12336     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12337     const {
12338   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12339   // Vars in target/task regions must be excluded completely.
12340   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12341       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12342     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12343     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12344     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12345     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12346       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12347         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12348     }
12349   }
12350   // Exclude vars in private clauses.
12351   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12352     for (const Expr *Ref : C->varlists()) {
12353       if (!Ref->getType()->isScalarType())
12354         continue;
12355       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12356       if (!DRE)
12357         continue;
12358       NeedToCheckForLPCs.insert(DRE->getDecl());
12359     }
12360   }
12361   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12362     for (const Expr *Ref : C->varlists()) {
12363       if (!Ref->getType()->isScalarType())
12364         continue;
12365       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12366       if (!DRE)
12367         continue;
12368       NeedToCheckForLPCs.insert(DRE->getDecl());
12369     }
12370   }
12371   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12372     for (const Expr *Ref : C->varlists()) {
12373       if (!Ref->getType()->isScalarType())
12374         continue;
12375       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12376       if (!DRE)
12377         continue;
12378       NeedToCheckForLPCs.insert(DRE->getDecl());
12379     }
12380   }
12381   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12382     for (const Expr *Ref : C->varlists()) {
12383       if (!Ref->getType()->isScalarType())
12384         continue;
12385       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12386       if (!DRE)
12387         continue;
12388       NeedToCheckForLPCs.insert(DRE->getDecl());
12389     }
12390   }
12391   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12392     for (const Expr *Ref : C->varlists()) {
12393       if (!Ref->getType()->isScalarType())
12394         continue;
12395       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12396       if (!DRE)
12397         continue;
12398       NeedToCheckForLPCs.insert(DRE->getDecl());
12399     }
12400   }
12401   for (const Decl *VD : NeedToCheckForLPCs) {
12402     for (const LastprivateConditionalData &Data :
12403          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12404       if (Data.DeclToUniqueName.count(VD) > 0) {
12405         if (!Data.Disabled)
12406           NeedToAddForLPCsAsDisabled.insert(VD);
12407         break;
12408       }
12409     }
12410   }
12411 }
12412 
12413 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12414     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12415     : CGM(CGF.CGM),
12416       Action((CGM.getLangOpts().OpenMP >= 50 &&
12417               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12418                            [](const OMPLastprivateClause *C) {
12419                              return C->getKind() ==
12420                                     OMPC_LASTPRIVATE_conditional;
12421                            }))
12422                  ? ActionToDo::PushAsLastprivateConditional
12423                  : ActionToDo::DoNotPush) {
12424   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12425   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12426     return;
12427   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12428          "Expected a push action.");
12429   LastprivateConditionalData &Data =
12430       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12431   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12432     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12433       continue;
12434 
12435     for (const Expr *Ref : C->varlists()) {
12436       Data.DeclToUniqueName.insert(std::make_pair(
12437           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12438           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12439     }
12440   }
12441   Data.IVLVal = IVLVal;
12442   Data.Fn = CGF.CurFn;
12443 }
12444 
12445 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12446     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12447     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12448   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12449   if (CGM.getLangOpts().OpenMP < 50)
12450     return;
12451   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12452   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12453   if (!NeedToAddForLPCsAsDisabled.empty()) {
12454     Action = ActionToDo::DisableLastprivateConditional;
12455     LastprivateConditionalData &Data =
12456         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12457     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12458       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12459     Data.Fn = CGF.CurFn;
12460     Data.Disabled = true;
12461   }
12462 }
12463 
12464 CGOpenMPRuntime::LastprivateConditionalRAII
12465 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12466     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12467   return LastprivateConditionalRAII(CGF, S);
12468 }
12469 
12470 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12471   if (CGM.getLangOpts().OpenMP < 50)
12472     return;
12473   if (Action == ActionToDo::DisableLastprivateConditional) {
12474     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12475            "Expected list of disabled private vars.");
12476     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12477   }
12478   if (Action == ActionToDo::PushAsLastprivateConditional) {
12479     assert(
12480         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12481         "Expected list of lastprivate conditional vars.");
12482     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12483   }
12484 }
12485 
12486 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12487                                                         const VarDecl *VD) {
12488   ASTContext &C = CGM.getContext();
12489   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12490   if (I == LastprivateConditionalToTypes.end())
12491     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12492   QualType NewType;
12493   const FieldDecl *VDField;
12494   const FieldDecl *FiredField;
12495   LValue BaseLVal;
12496   auto VI = I->getSecond().find(VD);
12497   if (VI == I->getSecond().end()) {
12498     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12499     RD->startDefinition();
12500     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12501     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12502     RD->completeDefinition();
12503     NewType = C.getRecordType(RD);
12504     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12505     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12506     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12507   } else {
12508     NewType = std::get<0>(VI->getSecond());
12509     VDField = std::get<1>(VI->getSecond());
12510     FiredField = std::get<2>(VI->getSecond());
12511     BaseLVal = std::get<3>(VI->getSecond());
12512   }
12513   LValue FiredLVal =
12514       CGF.EmitLValueForField(BaseLVal, FiredField);
12515   CGF.EmitStoreOfScalar(
12516       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12517       FiredLVal);
12518   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12519 }
12520 
12521 namespace {
12522 /// Checks if the lastprivate conditional variable is referenced in LHS.
12523 class LastprivateConditionalRefChecker final
12524     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12525   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12526   const Expr *FoundE = nullptr;
12527   const Decl *FoundD = nullptr;
12528   StringRef UniqueDeclName;
12529   LValue IVLVal;
12530   llvm::Function *FoundFn = nullptr;
12531   SourceLocation Loc;
12532 
12533 public:
12534   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12535     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12536          llvm::reverse(LPM)) {
12537       auto It = D.DeclToUniqueName.find(E->getDecl());
12538       if (It == D.DeclToUniqueName.end())
12539         continue;
12540       if (D.Disabled)
12541         return false;
12542       FoundE = E;
12543       FoundD = E->getDecl()->getCanonicalDecl();
12544       UniqueDeclName = It->second;
12545       IVLVal = D.IVLVal;
12546       FoundFn = D.Fn;
12547       break;
12548     }
12549     return FoundE == E;
12550   }
12551   bool VisitMemberExpr(const MemberExpr *E) {
12552     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12553       return false;
12554     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12555          llvm::reverse(LPM)) {
12556       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12557       if (It == D.DeclToUniqueName.end())
12558         continue;
12559       if (D.Disabled)
12560         return false;
12561       FoundE = E;
12562       FoundD = E->getMemberDecl()->getCanonicalDecl();
12563       UniqueDeclName = It->second;
12564       IVLVal = D.IVLVal;
12565       FoundFn = D.Fn;
12566       break;
12567     }
12568     return FoundE == E;
12569   }
12570   bool VisitStmt(const Stmt *S) {
12571     for (const Stmt *Child : S->children()) {
12572       if (!Child)
12573         continue;
12574       if (const auto *E = dyn_cast<Expr>(Child))
12575         if (!E->isGLValue())
12576           continue;
12577       if (Visit(Child))
12578         return true;
12579     }
12580     return false;
12581   }
12582   explicit LastprivateConditionalRefChecker(
12583       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12584       : LPM(LPM) {}
12585   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12586   getFoundData() const {
12587     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12588   }
12589 };
12590 } // namespace
12591 
12592 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12593                                                        LValue IVLVal,
12594                                                        StringRef UniqueDeclName,
12595                                                        LValue LVal,
12596                                                        SourceLocation Loc) {
12597   // Last updated loop counter for the lastprivate conditional var.
12598   // int<xx> last_iv = 0;
12599   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12600   llvm::Constant *LastIV =
12601       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12602   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12603       IVLVal.getAlignment().getAsAlign());
12604   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12605 
12606   // Last value of the lastprivate conditional.
12607   // decltype(priv_a) last_a;
12608   llvm::Constant *Last = getOrCreateInternalVariable(
12609       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12610   cast<llvm::GlobalVariable>(Last)->setAlignment(
12611       LVal.getAlignment().getAsAlign());
12612   LValue LastLVal =
12613       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12614 
12615   // Global loop counter. Required to handle inner parallel-for regions.
12616   // iv
12617   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12618 
12619   // #pragma omp critical(a)
12620   // if (last_iv <= iv) {
12621   //   last_iv = iv;
12622   //   last_a = priv_a;
12623   // }
12624   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12625                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12626     Action.Enter(CGF);
12627     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12628     // (last_iv <= iv) ? Check if the variable is updated and store new
12629     // value in global var.
12630     llvm::Value *CmpRes;
12631     if (IVLVal.getType()->isSignedIntegerType()) {
12632       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12633     } else {
12634       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12635              "Loop iteration variable must be integer.");
12636       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12637     }
12638     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12639     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12640     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12641     // {
12642     CGF.EmitBlock(ThenBB);
12643 
12644     //   last_iv = iv;
12645     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12646 
12647     //   last_a = priv_a;
12648     switch (CGF.getEvaluationKind(LVal.getType())) {
12649     case TEK_Scalar: {
12650       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12651       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12652       break;
12653     }
12654     case TEK_Complex: {
12655       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12656       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12657       break;
12658     }
12659     case TEK_Aggregate:
12660       llvm_unreachable(
12661           "Aggregates are not supported in lastprivate conditional.");
12662     }
12663     // }
12664     CGF.EmitBranch(ExitBB);
12665     // There is no need to emit line number for unconditional branch.
12666     (void)ApplyDebugLocation::CreateEmpty(CGF);
12667     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12668   };
12669 
12670   if (CGM.getLangOpts().OpenMPSimd) {
12671     // Do not emit as a critical region as no parallel region could be emitted.
12672     RegionCodeGenTy ThenRCG(CodeGen);
12673     ThenRCG(CGF);
12674   } else {
12675     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12676   }
12677 }
12678 
12679 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12680                                                          const Expr *LHS) {
12681   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12682     return;
12683   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12684   if (!Checker.Visit(LHS))
12685     return;
12686   const Expr *FoundE;
12687   const Decl *FoundD;
12688   StringRef UniqueDeclName;
12689   LValue IVLVal;
12690   llvm::Function *FoundFn;
12691   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12692       Checker.getFoundData();
12693   if (FoundFn != CGF.CurFn) {
12694     // Special codegen for inner parallel regions.
12695     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12696     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12697     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12698            "Lastprivate conditional is not found in outer region.");
12699     QualType StructTy = std::get<0>(It->getSecond());
12700     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12701     LValue PrivLVal = CGF.EmitLValue(FoundE);
12702     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12703         PrivLVal.getAddress(CGF),
12704         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12705     LValue BaseLVal =
12706         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12707     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12708     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12709                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12710                         FiredLVal, llvm::AtomicOrdering::Unordered,
12711                         /*IsVolatile=*/true, /*isInit=*/false);
12712     return;
12713   }
12714 
12715   // Private address of the lastprivate conditional in the current context.
12716   // priv_a
12717   LValue LVal = CGF.EmitLValue(FoundE);
12718   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12719                                    FoundE->getExprLoc());
12720 }
12721 
12722 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12723     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12724     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12725   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12726     return;
12727   auto Range = llvm::reverse(LastprivateConditionalStack);
12728   auto It = llvm::find_if(
12729       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12730   if (It == Range.end() || It->Fn != CGF.CurFn)
12731     return;
12732   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12733   assert(LPCI != LastprivateConditionalToTypes.end() &&
12734          "Lastprivates must be registered already.");
12735   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12736   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12737   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12738   for (const auto &Pair : It->DeclToUniqueName) {
12739     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12740     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12741       continue;
12742     auto I = LPCI->getSecond().find(Pair.first);
12743     assert(I != LPCI->getSecond().end() &&
12744            "Lastprivate must be rehistered already.");
12745     // bool Cmp = priv_a.Fired != 0;
12746     LValue BaseLVal = std::get<3>(I->getSecond());
12747     LValue FiredLVal =
12748         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12749     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12750     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12751     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12752     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12753     // if (Cmp) {
12754     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12755     CGF.EmitBlock(ThenBB);
12756     Address Addr = CGF.GetAddrOfLocalVar(VD);
12757     LValue LVal;
12758     if (VD->getType()->isReferenceType())
12759       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12760                                            AlignmentSource::Decl);
12761     else
12762       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12763                                 AlignmentSource::Decl);
12764     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12765                                      D.getBeginLoc());
12766     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12767     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12768     // }
12769   }
12770 }
12771 
12772 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12773     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12774     SourceLocation Loc) {
12775   if (CGF.getLangOpts().OpenMP < 50)
12776     return;
12777   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12778   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12779          "Unknown lastprivate conditional variable.");
12780   StringRef UniqueName = It->second;
12781   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12782   // The variable was not updated in the region - exit.
12783   if (!GV)
12784     return;
12785   LValue LPLVal = CGF.MakeAddrLValue(
12786       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12787   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12788   CGF.EmitStoreOfScalar(Res, PrivLVal);
12789 }
12790 
12791 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12792     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12793     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12794   llvm_unreachable("Not supported in SIMD-only mode");
12795 }
12796 
12797 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12798     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12799     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12800   llvm_unreachable("Not supported in SIMD-only mode");
12801 }
12802 
12803 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12804     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12805     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12806     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12807     bool Tied, unsigned &NumberOfParts) {
12808   llvm_unreachable("Not supported in SIMD-only mode");
12809 }
12810 
12811 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12812                                            SourceLocation Loc,
12813                                            llvm::Function *OutlinedFn,
12814                                            ArrayRef<llvm::Value *> CapturedVars,
12815                                            const Expr *IfCond) {
12816   llvm_unreachable("Not supported in SIMD-only mode");
12817 }
12818 
12819 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12820     CodeGenFunction &CGF, StringRef CriticalName,
12821     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12822     const Expr *Hint) {
12823   llvm_unreachable("Not supported in SIMD-only mode");
12824 }
12825 
12826 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12827                                            const RegionCodeGenTy &MasterOpGen,
12828                                            SourceLocation Loc) {
12829   llvm_unreachable("Not supported in SIMD-only mode");
12830 }
12831 
12832 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12833                                            const RegionCodeGenTy &MasterOpGen,
12834                                            SourceLocation Loc,
12835                                            const Expr *Filter) {
12836   llvm_unreachable("Not supported in SIMD-only mode");
12837 }
12838 
12839 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12840                                             SourceLocation Loc) {
12841   llvm_unreachable("Not supported in SIMD-only mode");
12842 }
12843 
12844 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12845     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12846     SourceLocation Loc) {
12847   llvm_unreachable("Not supported in SIMD-only mode");
12848 }
12849 
12850 void CGOpenMPSIMDRuntime::emitSingleRegion(
12851     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12852     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12853     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12854     ArrayRef<const Expr *> AssignmentOps) {
12855   llvm_unreachable("Not supported in SIMD-only mode");
12856 }
12857 
12858 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12859                                             const RegionCodeGenTy &OrderedOpGen,
12860                                             SourceLocation Loc,
12861                                             bool IsThreads) {
12862   llvm_unreachable("Not supported in SIMD-only mode");
12863 }
12864 
12865 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12866                                           SourceLocation Loc,
12867                                           OpenMPDirectiveKind Kind,
12868                                           bool EmitChecks,
12869                                           bool ForceSimpleCall) {
12870   llvm_unreachable("Not supported in SIMD-only mode");
12871 }
12872 
12873 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12874     CodeGenFunction &CGF, SourceLocation Loc,
12875     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12876     bool Ordered, const DispatchRTInput &DispatchValues) {
12877   llvm_unreachable("Not supported in SIMD-only mode");
12878 }
12879 
12880 void CGOpenMPSIMDRuntime::emitForStaticInit(
12881     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12882     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12883   llvm_unreachable("Not supported in SIMD-only mode");
12884 }
12885 
12886 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12887     CodeGenFunction &CGF, SourceLocation Loc,
12888     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12889   llvm_unreachable("Not supported in SIMD-only mode");
12890 }
12891 
12892 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12893                                                      SourceLocation Loc,
12894                                                      unsigned IVSize,
12895                                                      bool IVSigned) {
12896   llvm_unreachable("Not supported in SIMD-only mode");
12897 }
12898 
12899 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12900                                               SourceLocation Loc,
12901                                               OpenMPDirectiveKind DKind) {
12902   llvm_unreachable("Not supported in SIMD-only mode");
12903 }
12904 
12905 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12906                                               SourceLocation Loc,
12907                                               unsigned IVSize, bool IVSigned,
12908                                               Address IL, Address LB,
12909                                               Address UB, Address ST) {
12910   llvm_unreachable("Not supported in SIMD-only mode");
12911 }
12912 
12913 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12914                                                llvm::Value *NumThreads,
12915                                                SourceLocation Loc) {
12916   llvm_unreachable("Not supported in SIMD-only mode");
12917 }
12918 
12919 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12920                                              ProcBindKind ProcBind,
12921                                              SourceLocation Loc) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12926                                                     const VarDecl *VD,
12927                                                     Address VDAddr,
12928                                                     SourceLocation Loc) {
12929   llvm_unreachable("Not supported in SIMD-only mode");
12930 }
12931 
12932 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12933     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12934     CodeGenFunction *CGF) {
12935   llvm_unreachable("Not supported in SIMD-only mode");
12936 }
12937 
12938 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12939     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12940   llvm_unreachable("Not supported in SIMD-only mode");
12941 }
12942 
12943 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12944                                     ArrayRef<const Expr *> Vars,
12945                                     SourceLocation Loc,
12946                                     llvm::AtomicOrdering AO) {
12947   llvm_unreachable("Not supported in SIMD-only mode");
12948 }
12949 
12950 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12951                                        const OMPExecutableDirective &D,
12952                                        llvm::Function *TaskFunction,
12953                                        QualType SharedsTy, Address Shareds,
12954                                        const Expr *IfCond,
12955                                        const OMPTaskDataTy &Data) {
12956   llvm_unreachable("Not supported in SIMD-only mode");
12957 }
12958 
12959 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12960     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12961     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12962     const Expr *IfCond, const OMPTaskDataTy &Data) {
12963   llvm_unreachable("Not supported in SIMD-only mode");
12964 }
12965 
12966 void CGOpenMPSIMDRuntime::emitReduction(
12967     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12968     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12969     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12970   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12971   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12972                                  ReductionOps, Options);
12973 }
12974 
12975 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12976     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12977     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12978   llvm_unreachable("Not supported in SIMD-only mode");
12979 }
12980 
12981 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12982                                                 SourceLocation Loc,
12983                                                 bool IsWorksharingReduction) {
12984   llvm_unreachable("Not supported in SIMD-only mode");
12985 }
12986 
12987 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12988                                                   SourceLocation Loc,
12989                                                   ReductionCodeGen &RCG,
12990                                                   unsigned N) {
12991   llvm_unreachable("Not supported in SIMD-only mode");
12992 }
12993 
12994 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12995                                                   SourceLocation Loc,
12996                                                   llvm::Value *ReductionsPtr,
12997                                                   LValue SharedLVal) {
12998   llvm_unreachable("Not supported in SIMD-only mode");
12999 }
13000 
13001 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13002                                            SourceLocation Loc) {
13003   llvm_unreachable("Not supported in SIMD-only mode");
13004 }
13005 
13006 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13007     CodeGenFunction &CGF, SourceLocation Loc,
13008     OpenMPDirectiveKind CancelRegion) {
13009   llvm_unreachable("Not supported in SIMD-only mode");
13010 }
13011 
13012 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13013                                          SourceLocation Loc, const Expr *IfCond,
13014                                          OpenMPDirectiveKind CancelRegion) {
13015   llvm_unreachable("Not supported in SIMD-only mode");
13016 }
13017 
13018 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13019     const OMPExecutableDirective &D, StringRef ParentName,
13020     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13021     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13022   llvm_unreachable("Not supported in SIMD-only mode");
13023 }
13024 
13025 void CGOpenMPSIMDRuntime::emitTargetCall(
13026     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13027     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13028     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13029     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13030                                      const OMPLoopDirective &D)>
13031         SizeEmitter) {
13032   llvm_unreachable("Not supported in SIMD-only mode");
13033 }
13034 
13035 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13036   llvm_unreachable("Not supported in SIMD-only mode");
13037 }
13038 
13039 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13040   llvm_unreachable("Not supported in SIMD-only mode");
13041 }
13042 
13043 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13044   return false;
13045 }
13046 
13047 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13048                                         const OMPExecutableDirective &D,
13049                                         SourceLocation Loc,
13050                                         llvm::Function *OutlinedFn,
13051                                         ArrayRef<llvm::Value *> CapturedVars) {
13052   llvm_unreachable("Not supported in SIMD-only mode");
13053 }
13054 
13055 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13056                                              const Expr *NumTeams,
13057                                              const Expr *ThreadLimit,
13058                                              SourceLocation Loc) {
13059   llvm_unreachable("Not supported in SIMD-only mode");
13060 }
13061 
13062 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13063     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13064     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13065   llvm_unreachable("Not supported in SIMD-only mode");
13066 }
13067 
13068 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13069     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13070     const Expr *Device) {
13071   llvm_unreachable("Not supported in SIMD-only mode");
13072 }
13073 
13074 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13075                                            const OMPLoopDirective &D,
13076                                            ArrayRef<Expr *> NumIterations) {
13077   llvm_unreachable("Not supported in SIMD-only mode");
13078 }
13079 
13080 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13081                                               const OMPDependClause *C) {
13082   llvm_unreachable("Not supported in SIMD-only mode");
13083 }
13084 
13085 const VarDecl *
13086 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13087                                         const VarDecl *NativeParam) const {
13088   llvm_unreachable("Not supported in SIMD-only mode");
13089 }
13090 
13091 Address
13092 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13093                                          const VarDecl *NativeParam,
13094                                          const VarDecl *TargetParam) const {
13095   llvm_unreachable("Not supported in SIMD-only mode");
13096 }
13097