1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1452                                                 Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1564   assert((IVSize == 32 || IVSize == 64) &&
1565          "IV size is not compatible with the omp runtime");
1566   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567                                             : "__kmpc_for_static_init_4u")
1568                                 : (IVSigned ? "__kmpc_for_static_init_8"
1569                                             : "__kmpc_for_static_init_8u");
1570   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1571   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1572   llvm::Type *TypeParams[] = {
1573     getIdentTyPointerTy(),                     // loc
1574     CGM.Int32Ty,                               // tid
1575     CGM.Int32Ty,                               // schedtype
1576     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1577     PtrTy,                                     // p_lower
1578     PtrTy,                                     // p_upper
1579     PtrTy,                                     // p_stride
1580     ITy,                                       // incr
1581     ITy                                        // chunk
1582   };
1583   auto *FnTy =
1584       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1585   return CGM.CreateRuntimeFunction(FnTy, Name);
1586 }
1587 
1588 llvm::FunctionCallee
1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1590   assert((IVSize == 32 || IVSize == 64) &&
1591          "IV size is not compatible with the omp runtime");
1592   StringRef Name =
1593       IVSize == 32
1594           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1595           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1596   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1597   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1598                                CGM.Int32Ty,           // tid
1599                                CGM.Int32Ty,           // schedtype
1600                                ITy,                   // lower
1601                                ITy,                   // upper
1602                                ITy,                   // stride
1603                                ITy                    // chunk
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 llvm::FunctionCallee
1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1612   assert((IVSize == 32 || IVSize == 64) &&
1613          "IV size is not compatible with the omp runtime");
1614   StringRef Name =
1615       IVSize == 32
1616           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1617           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1618   llvm::Type *TypeParams[] = {
1619       getIdentTyPointerTy(), // loc
1620       CGM.Int32Ty,           // tid
1621   };
1622   auto *FnTy =
1623       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1624   return CGM.CreateRuntimeFunction(FnTy, Name);
1625 }
1626 
1627 llvm::FunctionCallee
1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1629   assert((IVSize == 32 || IVSize == 64) &&
1630          "IV size is not compatible with the omp runtime");
1631   StringRef Name =
1632       IVSize == 32
1633           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1634           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1635   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1636   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1637   llvm::Type *TypeParams[] = {
1638     getIdentTyPointerTy(),                     // loc
1639     CGM.Int32Ty,                               // tid
1640     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1641     PtrTy,                                     // p_lower
1642     PtrTy,                                     // p_upper
1643     PtrTy                                      // p_stride
1644   };
1645   auto *FnTy =
1646       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1647   return CGM.CreateRuntimeFunction(FnTy, Name);
1648 }
1649 
1650 /// Obtain information that uniquely identifies a target entry. This
1651 /// consists of the file and device IDs as well as line number associated with
1652 /// the relevant entry source location.
1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1654                                      unsigned &DeviceID, unsigned &FileID,
1655                                      unsigned &LineNum) {
1656   SourceManager &SM = C.getSourceManager();
1657 
1658   // The loc should be always valid and have a file ID (the user cannot use
1659   // #pragma directives in macros)
1660 
1661   assert(Loc.isValid() && "Source location is expected to be always valid.");
1662 
1663   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1664   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1665 
1666   llvm::sys::fs::UniqueID ID;
1667   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1668     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1669     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1671       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1672           << PLoc.getFilename() << EC.message();
1673   }
1674 
1675   DeviceID = ID.getDevice();
1676   FileID = ID.getFile();
1677   LineNum = PLoc.getLine();
1678 }
1679 
1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1681   if (CGM.getLangOpts().OpenMPSimd)
1682     return Address::invalid();
1683   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1684       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1685   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1686               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1687                HasRequiresUnifiedSharedMemory))) {
1688     SmallString<64> PtrName;
1689     {
1690       llvm::raw_svector_ostream OS(PtrName);
1691       OS << CGM.getMangledName(GlobalDecl(VD));
1692       if (!VD->isExternallyVisible()) {
1693         unsigned DeviceID, FileID, Line;
1694         getTargetEntryUniqueInfo(CGM.getContext(),
1695                                  VD->getCanonicalDecl()->getBeginLoc(),
1696                                  DeviceID, FileID, Line);
1697         OS << llvm::format("_%x", FileID);
1698       }
1699       OS << "_decl_tgt_ref_ptr";
1700     }
1701     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1702     if (!Ptr) {
1703       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1704       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1705                                         PtrName);
1706 
1707       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1708       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1709 
1710       if (!CGM.getLangOpts().OpenMPIsDevice)
1711         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1712       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1713     }
1714     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1715   }
1716   return Address::invalid();
1717 }
1718 
1719 llvm::Constant *
1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1721   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1722          !CGM.getContext().getTargetInfo().isTLSSupported());
1723   // Lookup the entry, lazily creating it if necessary.
1724   std::string Suffix = getName({"cache", ""});
1725   return getOrCreateInternalVariable(
1726       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1727 }
1728 
1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1730                                                 const VarDecl *VD,
1731                                                 Address VDAddr,
1732                                                 SourceLocation Loc) {
1733   if (CGM.getLangOpts().OpenMPUseTLS &&
1734       CGM.getContext().getTargetInfo().isTLSSupported())
1735     return VDAddr;
1736 
1737   llvm::Type *VarTy = VDAddr.getElementType();
1738   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1739                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1740                                                        CGM.Int8PtrTy),
1741                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1742                          getOrCreateThreadPrivateCache(VD)};
1743   return Address(CGF.EmitRuntimeCall(
1744                      OMPBuilder.getOrCreateRuntimeFunction(
1745                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1746                      Args),
1747                  VDAddr.getAlignment());
1748 }
1749 
1750 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1751     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1752     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1753   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1754   // library.
1755   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1756   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1757                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1758                       OMPLoc);
1759   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1760   // to register constructor/destructor for variable.
1761   llvm::Value *Args[] = {
1762       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1763       Ctor, CopyCtor, Dtor};
1764   CGF.EmitRuntimeCall(
1765       OMPBuilder.getOrCreateRuntimeFunction(
1766           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1767       Args);
1768 }
1769 
1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772     bool PerformInit, CodeGenFunction *CGF) {
1773   if (CGM.getLangOpts().OpenMPUseTLS &&
1774       CGM.getContext().getTargetInfo().isTLSSupported())
1775     return nullptr;
1776 
1777   VD = VD->getDefinition(CGM.getContext());
1778   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1779     QualType ASTTy = VD->getType();
1780 
1781     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1782     const Expr *Init = VD->getAnyInitializer();
1783     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1784       // Generate function that re-emits the declaration's initializer into the
1785       // threadprivate copy of the variable VD
1786       CodeGenFunction CtorCGF(CGM);
1787       FunctionArgList Args;
1788       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1789                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1790                             ImplicitParamDecl::Other);
1791       Args.push_back(&Dst);
1792 
1793       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794           CGM.getContext().VoidPtrTy, Args);
1795       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1796       std::string Name = getName({"__kmpc_global_ctor_", ""});
1797       llvm::Function *Fn =
1798           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1799       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1800                             Args, Loc, Loc);
1801       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1802           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1803           CGM.getContext().VoidPtrTy, Dst.getLocation());
1804       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1805       Arg = CtorCGF.Builder.CreateElementBitCast(
1806           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1807       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1808                                /*IsInitializer=*/true);
1809       ArgVal = CtorCGF.EmitLoadOfScalar(
1810           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1811           CGM.getContext().VoidPtrTy, Dst.getLocation());
1812       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1813       CtorCGF.FinishFunction();
1814       Ctor = Fn;
1815     }
1816     if (VD->getType().isDestructedType() != QualType::DK_none) {
1817       // Generate function that emits destructor call for the threadprivate copy
1818       // of the variable VD
1819       CodeGenFunction DtorCGF(CGM);
1820       FunctionArgList Args;
1821       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1822                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1823                             ImplicitParamDecl::Other);
1824       Args.push_back(&Dst);
1825 
1826       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1827           CGM.getContext().VoidTy, Args);
1828       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1829       std::string Name = getName({"__kmpc_global_dtor_", ""});
1830       llvm::Function *Fn =
1831           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1832       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1833       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1834                             Loc, Loc);
1835       // Create a scope with an artificial location for the body of this function.
1836       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1837       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1838           DtorCGF.GetAddrOfLocalVar(&Dst),
1839           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1840       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1841                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1842                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1843       DtorCGF.FinishFunction();
1844       Dtor = Fn;
1845     }
1846     // Do not emit init function if it is not required.
1847     if (!Ctor && !Dtor)
1848       return nullptr;
1849 
1850     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1851     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1852                                                /*isVarArg=*/false)
1853                            ->getPointerTo();
1854     // Copying constructor for the threadprivate variable.
1855     // Must be NULL - reserved by runtime, but currently it requires that this
1856     // parameter is always NULL. Otherwise it fires assertion.
1857     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1858     if (Ctor == nullptr) {
1859       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1860                                              /*isVarArg=*/false)
1861                          ->getPointerTo();
1862       Ctor = llvm::Constant::getNullValue(CtorTy);
1863     }
1864     if (Dtor == nullptr) {
1865       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1866                                              /*isVarArg=*/false)
1867                          ->getPointerTo();
1868       Dtor = llvm::Constant::getNullValue(DtorTy);
1869     }
1870     if (!CGF) {
1871       auto *InitFunctionTy =
1872           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1873       std::string Name = getName({"__omp_threadprivate_init_", ""});
1874       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1875           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1876       CodeGenFunction InitCGF(CGM);
1877       FunctionArgList ArgList;
1878       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1879                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1880                             Loc, Loc);
1881       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882       InitCGF.FinishFunction();
1883       return InitFunction;
1884     }
1885     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886   }
1887   return nullptr;
1888 }
1889 
1890 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1891                                                      llvm::GlobalVariable *Addr,
1892                                                      bool PerformInit) {
1893   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1894       !CGM.getLangOpts().OpenMPIsDevice)
1895     return false;
1896   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1897       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1898   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1899       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1900        HasRequiresUnifiedSharedMemory))
1901     return CGM.getLangOpts().OpenMPIsDevice;
1902   VD = VD->getDefinition(CGM.getContext());
1903   assert(VD && "Unknown VarDecl");
1904 
1905   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1906     return CGM.getLangOpts().OpenMPIsDevice;
1907 
1908   QualType ASTTy = VD->getType();
1909   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1910 
1911   // Produce the unique prefix to identify the new target regions. We use
1912   // the source location of the variable declaration which we know to not
1913   // conflict with any target region.
1914   unsigned DeviceID;
1915   unsigned FileID;
1916   unsigned Line;
1917   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1918   SmallString<128> Buffer, Out;
1919   {
1920     llvm::raw_svector_ostream OS(Buffer);
1921     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1922        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1923   }
1924 
1925   const Expr *Init = VD->getAnyInitializer();
1926   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1927     llvm::Constant *Ctor;
1928     llvm::Constant *ID;
1929     if (CGM.getLangOpts().OpenMPIsDevice) {
1930       // Generate function that re-emits the declaration's initializer into
1931       // the threadprivate copy of the variable VD
1932       CodeGenFunction CtorCGF(CGM);
1933 
1934       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1935       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1936       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1937           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1938       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1939       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1940                             FunctionArgList(), Loc, Loc);
1941       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1942       CtorCGF.EmitAnyExprToMem(Init,
1943                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1944                                Init->getType().getQualifiers(),
1945                                /*IsInitializer=*/true);
1946       CtorCGF.FinishFunction();
1947       Ctor = Fn;
1948       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1949       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1950     } else {
1951       Ctor = new llvm::GlobalVariable(
1952           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1953           llvm::GlobalValue::PrivateLinkage,
1954           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1955       ID = Ctor;
1956     }
1957 
1958     // Register the information for the entry associated with the constructor.
1959     Out.clear();
1960     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1961         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1962         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1963   }
1964   if (VD->getType().isDestructedType() != QualType::DK_none) {
1965     llvm::Constant *Dtor;
1966     llvm::Constant *ID;
1967     if (CGM.getLangOpts().OpenMPIsDevice) {
1968       // Generate function that emits destructor call for the threadprivate
1969       // copy of the variable VD
1970       CodeGenFunction DtorCGF(CGM);
1971 
1972       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1973       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1974       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1975           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1976       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1977       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1978                             FunctionArgList(), Loc, Loc);
1979       // Create a scope with an artificial location for the body of this
1980       // function.
1981       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1982       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1983                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1984                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1985       DtorCGF.FinishFunction();
1986       Dtor = Fn;
1987       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1988       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1989     } else {
1990       Dtor = new llvm::GlobalVariable(
1991           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1992           llvm::GlobalValue::PrivateLinkage,
1993           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1994       ID = Dtor;
1995     }
1996     // Register the information for the entry associated with the destructor.
1997     Out.clear();
1998     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1999         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2000         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2001   }
2002   return CGM.getLangOpts().OpenMPIsDevice;
2003 }
2004 
2005 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2006                                                           QualType VarType,
2007                                                           StringRef Name) {
2008   std::string Suffix = getName({"artificial", ""});
2009   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2010   llvm::Value *GAddr =
2011       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2012   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2013       CGM.getTarget().isTLSSupported()) {
2014     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2015     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2016   }
2017   std::string CacheSuffix = getName({"cache", ""});
2018   llvm::Value *Args[] = {
2019       emitUpdateLocation(CGF, SourceLocation()),
2020       getThreadID(CGF, SourceLocation()),
2021       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2022       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2023                                 /*isSigned=*/false),
2024       getOrCreateInternalVariable(
2025           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2026   return Address(
2027       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2028           CGF.EmitRuntimeCall(
2029               OMPBuilder.getOrCreateRuntimeFunction(
2030                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2031               Args),
2032           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2033       CGM.getContext().getTypeAlignInChars(VarType));
2034 }
2035 
2036 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2037                                    const RegionCodeGenTy &ThenGen,
2038                                    const RegionCodeGenTy &ElseGen) {
2039   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2040 
2041   // If the condition constant folds and can be elided, try to avoid emitting
2042   // the condition and the dead arm of the if/else.
2043   bool CondConstant;
2044   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2045     if (CondConstant)
2046       ThenGen(CGF);
2047     else
2048       ElseGen(CGF);
2049     return;
2050   }
2051 
2052   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2053   // emit the conditional branch.
2054   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2055   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2056   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2057   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2058 
2059   // Emit the 'then' code.
2060   CGF.EmitBlock(ThenBlock);
2061   ThenGen(CGF);
2062   CGF.EmitBranch(ContBlock);
2063   // Emit the 'else' code if present.
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBlock(ElseBlock);
2067   ElseGen(CGF);
2068   // There is no need to emit line number for unconditional branch.
2069   (void)ApplyDebugLocation::CreateEmpty(CGF);
2070   CGF.EmitBranch(ContBlock);
2071   // Emit the continuation block for code after the if.
2072   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2073 }
2074 
2075 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2076                                        llvm::Function *OutlinedFn,
2077                                        ArrayRef<llvm::Value *> CapturedVars,
2078                                        const Expr *IfCond) {
2079   if (!CGF.HaveInsertPoint())
2080     return;
2081   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2082   auto &M = CGM.getModule();
2083   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2084                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2085     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2086     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2087     llvm::Value *Args[] = {
2088         RTLoc,
2089         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2090         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2091     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2092     RealArgs.append(std::begin(Args), std::end(Args));
2093     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2094 
2095     llvm::FunctionCallee RTLFn =
2096         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2097     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2098   };
2099   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2100                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2101     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2102     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2103     // Build calls:
2104     // __kmpc_serialized_parallel(&Loc, GTid);
2105     llvm::Value *Args[] = {RTLoc, ThreadID};
2106     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2107                             M, OMPRTL___kmpc_serialized_parallel),
2108                         Args);
2109 
2110     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2111     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2112     Address ZeroAddrBound =
2113         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2114                                          /*Name=*/".bound.zero.addr");
2115     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2116     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2117     // ThreadId for serialized parallels is 0.
2118     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2119     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2120     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2121 
2122     // Ensure we do not inline the function. This is trivially true for the ones
2123     // passed to __kmpc_fork_call but the ones calles in serialized regions
2124     // could be inlined. This is not a perfect but it is closer to the invariant
2125     // we want, namely, every data environment starts with a new function.
2126     // TODO: We should pass the if condition to the runtime function and do the
2127     //       handling there. Much cleaner code.
2128     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2129     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2130 
2131     // __kmpc_end_serialized_parallel(&Loc, GTid);
2132     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2133     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2134                             M, OMPRTL___kmpc_end_serialized_parallel),
2135                         EndArgs);
2136   };
2137   if (IfCond) {
2138     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2139   } else {
2140     RegionCodeGenTy ThenRCG(ThenGen);
2141     ThenRCG(CGF);
2142   }
2143 }
2144 
2145 // If we're inside an (outlined) parallel region, use the region info's
2146 // thread-ID variable (it is passed in a first argument of the outlined function
2147 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2148 // regular serial code region, get thread ID by calling kmp_int32
2149 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2150 // return the address of that temp.
2151 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2152                                              SourceLocation Loc) {
2153   if (auto *OMPRegionInfo =
2154           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2155     if (OMPRegionInfo->getThreadIDVariable())
2156       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2157 
2158   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2159   QualType Int32Ty =
2160       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2161   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2162   CGF.EmitStoreOfScalar(ThreadID,
2163                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2164 
2165   return ThreadIDTemp;
2166 }
2167 
2168 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2169     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2170   SmallString<256> Buffer;
2171   llvm::raw_svector_ostream Out(Buffer);
2172   Out << Name;
2173   StringRef RuntimeName = Out.str();
2174   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2175   if (Elem.second) {
2176     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2177            "OMP internal variable has different type than requested");
2178     return &*Elem.second;
2179   }
2180 
2181   return Elem.second = new llvm::GlobalVariable(
2182              CGM.getModule(), Ty, /*IsConstant*/ false,
2183              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2184              Elem.first(), /*InsertBefore=*/nullptr,
2185              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2186 }
2187 
2188 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2189   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2190   std::string Name = getName({Prefix, "var"});
2191   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2192 }
2193 
2194 namespace {
2195 /// Common pre(post)-action for different OpenMP constructs.
2196 class CommonActionTy final : public PrePostActionTy {
2197   llvm::FunctionCallee EnterCallee;
2198   ArrayRef<llvm::Value *> EnterArgs;
2199   llvm::FunctionCallee ExitCallee;
2200   ArrayRef<llvm::Value *> ExitArgs;
2201   bool Conditional;
2202   llvm::BasicBlock *ContBlock = nullptr;
2203 
2204 public:
2205   CommonActionTy(llvm::FunctionCallee EnterCallee,
2206                  ArrayRef<llvm::Value *> EnterArgs,
2207                  llvm::FunctionCallee ExitCallee,
2208                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2209       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2210         ExitArgs(ExitArgs), Conditional(Conditional) {}
2211   void Enter(CodeGenFunction &CGF) override {
2212     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2213     if (Conditional) {
2214       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2215       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2216       ContBlock = CGF.createBasicBlock("omp_if.end");
2217       // Generate the branch (If-stmt)
2218       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2219       CGF.EmitBlock(ThenBlock);
2220     }
2221   }
2222   void Done(CodeGenFunction &CGF) {
2223     // Emit the rest of blocks/branches
2224     CGF.EmitBranch(ContBlock);
2225     CGF.EmitBlock(ContBlock, true);
2226   }
2227   void Exit(CodeGenFunction &CGF) override {
2228     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2229   }
2230 };
2231 } // anonymous namespace
2232 
2233 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2234                                          StringRef CriticalName,
2235                                          const RegionCodeGenTy &CriticalOpGen,
2236                                          SourceLocation Loc, const Expr *Hint) {
2237   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2238   // CriticalOpGen();
2239   // __kmpc_end_critical(ident_t *, gtid, Lock);
2240   // Prepare arguments and build a call to __kmpc_critical
2241   if (!CGF.HaveInsertPoint())
2242     return;
2243   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2244                          getCriticalRegionLock(CriticalName)};
2245   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2246                                                 std::end(Args));
2247   if (Hint) {
2248     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2249         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2250   }
2251   CommonActionTy Action(
2252       OMPBuilder.getOrCreateRuntimeFunction(
2253           CGM.getModule(),
2254           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2255       EnterArgs,
2256       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2257                                             OMPRTL___kmpc_end_critical),
2258       Args);
2259   CriticalOpGen.setAction(Action);
2260   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2261 }
2262 
2263 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2264                                        const RegionCodeGenTy &MasterOpGen,
2265                                        SourceLocation Loc) {
2266   if (!CGF.HaveInsertPoint())
2267     return;
2268   // if(__kmpc_master(ident_t *, gtid)) {
2269   //   MasterOpGen();
2270   //   __kmpc_end_master(ident_t *, gtid);
2271   // }
2272   // Prepare arguments and build a call to __kmpc_master
2273   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2274   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2275                             CGM.getModule(), OMPRTL___kmpc_master),
2276                         Args,
2277                         OMPBuilder.getOrCreateRuntimeFunction(
2278                             CGM.getModule(), OMPRTL___kmpc_end_master),
2279                         Args,
2280                         /*Conditional=*/true);
2281   MasterOpGen.setAction(Action);
2282   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2283   Action.Done(CGF);
2284 }
2285 
2286 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2287                                        const RegionCodeGenTy &MaskedOpGen,
2288                                        SourceLocation Loc, const Expr *Filter) {
2289   if (!CGF.HaveInsertPoint())
2290     return;
2291   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2292   //   MaskedOpGen();
2293   //   __kmpc_end_masked(iden_t *, gtid);
2294   // }
2295   // Prepare arguments and build a call to __kmpc_masked
2296   llvm::Value *FilterVal = Filter
2297                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2298                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2299   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2300                          FilterVal};
2301   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2302                             getThreadID(CGF, Loc)};
2303   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2304                             CGM.getModule(), OMPRTL___kmpc_masked),
2305                         Args,
2306                         OMPBuilder.getOrCreateRuntimeFunction(
2307                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2308                         ArgsEnd,
2309                         /*Conditional=*/true);
2310   MaskedOpGen.setAction(Action);
2311   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2312   Action.Done(CGF);
2313 }
2314 
2315 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2316                                         SourceLocation Loc) {
2317   if (!CGF.HaveInsertPoint())
2318     return;
2319   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2320     OMPBuilder.createTaskyield(CGF.Builder);
2321   } else {
2322     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2323     llvm::Value *Args[] = {
2324         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2325         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2326     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2327                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2328                         Args);
2329   }
2330 
2331   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2332     Region->emitUntiedSwitch(CGF);
2333 }
2334 
2335 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2336                                           const RegionCodeGenTy &TaskgroupOpGen,
2337                                           SourceLocation Loc) {
2338   if (!CGF.HaveInsertPoint())
2339     return;
2340   // __kmpc_taskgroup(ident_t *, gtid);
2341   // TaskgroupOpGen();
2342   // __kmpc_end_taskgroup(ident_t *, gtid);
2343   // Prepare arguments and build a call to __kmpc_taskgroup
2344   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2345   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2346                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2347                         Args,
2348                         OMPBuilder.getOrCreateRuntimeFunction(
2349                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2350                         Args);
2351   TaskgroupOpGen.setAction(Action);
2352   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2353 }
2354 
2355 /// Given an array of pointers to variables, project the address of a
2356 /// given variable.
2357 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2358                                       unsigned Index, const VarDecl *Var) {
2359   // Pull out the pointer to the variable.
2360   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2361   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2362 
2363   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2364   Addr = CGF.Builder.CreateElementBitCast(
2365       Addr, CGF.ConvertTypeForMem(Var->getType()));
2366   return Addr;
2367 }
2368 
2369 static llvm::Value *emitCopyprivateCopyFunction(
2370     CodeGenModule &CGM, llvm::Type *ArgsType,
2371     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2372     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2373     SourceLocation Loc) {
2374   ASTContext &C = CGM.getContext();
2375   // void copy_func(void *LHSArg, void *RHSArg);
2376   FunctionArgList Args;
2377   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2378                            ImplicitParamDecl::Other);
2379   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2380                            ImplicitParamDecl::Other);
2381   Args.push_back(&LHSArg);
2382   Args.push_back(&RHSArg);
2383   const auto &CGFI =
2384       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2385   std::string Name =
2386       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2387   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2388                                     llvm::GlobalValue::InternalLinkage, Name,
2389                                     &CGM.getModule());
2390   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2391   Fn->setDoesNotRecurse();
2392   CodeGenFunction CGF(CGM);
2393   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2394   // Dest = (void*[n])(LHSArg);
2395   // Src = (void*[n])(RHSArg);
2396   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2397       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2398       ArgsType), CGF.getPointerAlign());
2399   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2400       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2401       ArgsType), CGF.getPointerAlign());
2402   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2403   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2404   // ...
2405   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2406   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2407     const auto *DestVar =
2408         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2409     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2410 
2411     const auto *SrcVar =
2412         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2413     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2414 
2415     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2416     QualType Type = VD->getType();
2417     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2418   }
2419   CGF.FinishFunction();
2420   return Fn;
2421 }
2422 
2423 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2424                                        const RegionCodeGenTy &SingleOpGen,
2425                                        SourceLocation Loc,
2426                                        ArrayRef<const Expr *> CopyprivateVars,
2427                                        ArrayRef<const Expr *> SrcExprs,
2428                                        ArrayRef<const Expr *> DstExprs,
2429                                        ArrayRef<const Expr *> AssignmentOps) {
2430   if (!CGF.HaveInsertPoint())
2431     return;
2432   assert(CopyprivateVars.size() == SrcExprs.size() &&
2433          CopyprivateVars.size() == DstExprs.size() &&
2434          CopyprivateVars.size() == AssignmentOps.size());
2435   ASTContext &C = CGM.getContext();
2436   // int32 did_it = 0;
2437   // if(__kmpc_single(ident_t *, gtid)) {
2438   //   SingleOpGen();
2439   //   __kmpc_end_single(ident_t *, gtid);
2440   //   did_it = 1;
2441   // }
2442   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2443   // <copy_func>, did_it);
2444 
2445   Address DidIt = Address::invalid();
2446   if (!CopyprivateVars.empty()) {
2447     // int32 did_it = 0;
2448     QualType KmpInt32Ty =
2449         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2450     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2451     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2452   }
2453   // Prepare arguments and build a call to __kmpc_single
2454   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2455   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2456                             CGM.getModule(), OMPRTL___kmpc_single),
2457                         Args,
2458                         OMPBuilder.getOrCreateRuntimeFunction(
2459                             CGM.getModule(), OMPRTL___kmpc_end_single),
2460                         Args,
2461                         /*Conditional=*/true);
2462   SingleOpGen.setAction(Action);
2463   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2464   if (DidIt.isValid()) {
2465     // did_it = 1;
2466     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2467   }
2468   Action.Done(CGF);
2469   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2470   // <copy_func>, did_it);
2471   if (DidIt.isValid()) {
2472     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2473     QualType CopyprivateArrayTy = C.getConstantArrayType(
2474         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2475         /*IndexTypeQuals=*/0);
2476     // Create a list of all private variables for copyprivate.
2477     Address CopyprivateList =
2478         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2479     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2480       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2481       CGF.Builder.CreateStore(
2482           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2483               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2484               CGF.VoidPtrTy),
2485           Elem);
2486     }
2487     // Build function that copies private values from single region to all other
2488     // threads in the corresponding parallel region.
2489     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2490         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2491         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2492     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2493     Address CL =
2494       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2495                                                       CGF.VoidPtrTy);
2496     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2497     llvm::Value *Args[] = {
2498         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2499         getThreadID(CGF, Loc),        // i32 <gtid>
2500         BufSize,                      // size_t <buf_size>
2501         CL.getPointer(),              // void *<copyprivate list>
2502         CpyFn,                        // void (*) (void *, void *) <copy_func>
2503         DidItVal                      // i32 did_it
2504     };
2505     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2506                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2507                         Args);
2508   }
2509 }
2510 
2511 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2512                                         const RegionCodeGenTy &OrderedOpGen,
2513                                         SourceLocation Loc, bool IsThreads) {
2514   if (!CGF.HaveInsertPoint())
2515     return;
2516   // __kmpc_ordered(ident_t *, gtid);
2517   // OrderedOpGen();
2518   // __kmpc_end_ordered(ident_t *, gtid);
2519   // Prepare arguments and build a call to __kmpc_ordered
2520   if (IsThreads) {
2521     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2522     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2523                               CGM.getModule(), OMPRTL___kmpc_ordered),
2524                           Args,
2525                           OMPBuilder.getOrCreateRuntimeFunction(
2526                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2527                           Args);
2528     OrderedOpGen.setAction(Action);
2529     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2530     return;
2531   }
2532   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2533 }
2534 
2535 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2536   unsigned Flags;
2537   if (Kind == OMPD_for)
2538     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2539   else if (Kind == OMPD_sections)
2540     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2541   else if (Kind == OMPD_single)
2542     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2543   else if (Kind == OMPD_barrier)
2544     Flags = OMP_IDENT_BARRIER_EXPL;
2545   else
2546     Flags = OMP_IDENT_BARRIER_IMPL;
2547   return Flags;
2548 }
2549 
2550 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2551     CodeGenFunction &CGF, const OMPLoopDirective &S,
2552     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2553   // Check if the loop directive is actually a doacross loop directive. In this
2554   // case choose static, 1 schedule.
2555   if (llvm::any_of(
2556           S.getClausesOfKind<OMPOrderedClause>(),
2557           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2558     ScheduleKind = OMPC_SCHEDULE_static;
2559     // Chunk size is 1 in this case.
2560     llvm::APInt ChunkSize(32, 1);
2561     ChunkExpr = IntegerLiteral::Create(
2562         CGF.getContext(), ChunkSize,
2563         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2564         SourceLocation());
2565   }
2566 }
2567 
2568 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2569                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2570                                       bool ForceSimpleCall) {
2571   // Check if we should use the OMPBuilder
2572   auto *OMPRegionInfo =
2573       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2574   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2575     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2576         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2577     return;
2578   }
2579 
2580   if (!CGF.HaveInsertPoint())
2581     return;
2582   // Build call __kmpc_cancel_barrier(loc, thread_id);
2583   // Build call __kmpc_barrier(loc, thread_id);
2584   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2585   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2586   // thread_id);
2587   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2588                          getThreadID(CGF, Loc)};
2589   if (OMPRegionInfo) {
2590     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2591       llvm::Value *Result = CGF.EmitRuntimeCall(
2592           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2593                                                 OMPRTL___kmpc_cancel_barrier),
2594           Args);
2595       if (EmitChecks) {
2596         // if (__kmpc_cancel_barrier()) {
2597         //   exit from construct;
2598         // }
2599         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2600         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2601         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2602         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2603         CGF.EmitBlock(ExitBB);
2604         //   exit from construct;
2605         CodeGenFunction::JumpDest CancelDestination =
2606             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2607         CGF.EmitBranchThroughCleanup(CancelDestination);
2608         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2609       }
2610       return;
2611     }
2612   }
2613   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2614                           CGM.getModule(), OMPRTL___kmpc_barrier),
2615                       Args);
2616 }
2617 
2618 /// Map the OpenMP loop schedule to the runtime enumeration.
2619 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2620                                           bool Chunked, bool Ordered) {
2621   switch (ScheduleKind) {
2622   case OMPC_SCHEDULE_static:
2623     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2624                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2625   case OMPC_SCHEDULE_dynamic:
2626     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2627   case OMPC_SCHEDULE_guided:
2628     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2629   case OMPC_SCHEDULE_runtime:
2630     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2631   case OMPC_SCHEDULE_auto:
2632     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2633   case OMPC_SCHEDULE_unknown:
2634     assert(!Chunked && "chunk was specified but schedule kind not known");
2635     return Ordered ? OMP_ord_static : OMP_sch_static;
2636   }
2637   llvm_unreachable("Unexpected runtime schedule");
2638 }
2639 
2640 /// Map the OpenMP distribute schedule to the runtime enumeration.
2641 static OpenMPSchedType
2642 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2643   // only static is allowed for dist_schedule
2644   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2645 }
2646 
2647 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2648                                          bool Chunked) const {
2649   OpenMPSchedType Schedule =
2650       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2651   return Schedule == OMP_sch_static;
2652 }
2653 
2654 bool CGOpenMPRuntime::isStaticNonchunked(
2655     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2656   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2657   return Schedule == OMP_dist_sch_static;
2658 }
2659 
2660 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2661                                       bool Chunked) const {
2662   OpenMPSchedType Schedule =
2663       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2664   return Schedule == OMP_sch_static_chunked;
2665 }
2666 
2667 bool CGOpenMPRuntime::isStaticChunked(
2668     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2669   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2670   return Schedule == OMP_dist_sch_static_chunked;
2671 }
2672 
2673 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2674   OpenMPSchedType Schedule =
2675       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2676   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2677   return Schedule != OMP_sch_static;
2678 }
2679 
2680 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2681                                   OpenMPScheduleClauseModifier M1,
2682                                   OpenMPScheduleClauseModifier M2) {
2683   int Modifier = 0;
2684   switch (M1) {
2685   case OMPC_SCHEDULE_MODIFIER_monotonic:
2686     Modifier = OMP_sch_modifier_monotonic;
2687     break;
2688   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2689     Modifier = OMP_sch_modifier_nonmonotonic;
2690     break;
2691   case OMPC_SCHEDULE_MODIFIER_simd:
2692     if (Schedule == OMP_sch_static_chunked)
2693       Schedule = OMP_sch_static_balanced_chunked;
2694     break;
2695   case OMPC_SCHEDULE_MODIFIER_last:
2696   case OMPC_SCHEDULE_MODIFIER_unknown:
2697     break;
2698   }
2699   switch (M2) {
2700   case OMPC_SCHEDULE_MODIFIER_monotonic:
2701     Modifier = OMP_sch_modifier_monotonic;
2702     break;
2703   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2704     Modifier = OMP_sch_modifier_nonmonotonic;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_simd:
2707     if (Schedule == OMP_sch_static_chunked)
2708       Schedule = OMP_sch_static_balanced_chunked;
2709     break;
2710   case OMPC_SCHEDULE_MODIFIER_last:
2711   case OMPC_SCHEDULE_MODIFIER_unknown:
2712     break;
2713   }
2714   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2715   // If the static schedule kind is specified or if the ordered clause is
2716   // specified, and if the nonmonotonic modifier is not specified, the effect is
2717   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2718   // modifier is specified, the effect is as if the nonmonotonic modifier is
2719   // specified.
2720   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2721     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2722           Schedule == OMP_sch_static_balanced_chunked ||
2723           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2724           Schedule == OMP_dist_sch_static_chunked ||
2725           Schedule == OMP_dist_sch_static))
2726       Modifier = OMP_sch_modifier_nonmonotonic;
2727   }
2728   return Schedule | Modifier;
2729 }
2730 
2731 void CGOpenMPRuntime::emitForDispatchInit(
2732     CodeGenFunction &CGF, SourceLocation Loc,
2733     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2734     bool Ordered, const DispatchRTInput &DispatchValues) {
2735   if (!CGF.HaveInsertPoint())
2736     return;
2737   OpenMPSchedType Schedule = getRuntimeSchedule(
2738       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2739   assert(Ordered ||
2740          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2741           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2742           Schedule != OMP_sch_static_balanced_chunked));
2743   // Call __kmpc_dispatch_init(
2744   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2745   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2746   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2747 
2748   // If the Chunk was not specified in the clause - use default value 1.
2749   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2750                                             : CGF.Builder.getIntN(IVSize, 1);
2751   llvm::Value *Args[] = {
2752       emitUpdateLocation(CGF, Loc),
2753       getThreadID(CGF, Loc),
2754       CGF.Builder.getInt32(addMonoNonMonoModifier(
2755           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2756       DispatchValues.LB,                                     // Lower
2757       DispatchValues.UB,                                     // Upper
2758       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2759       Chunk                                                  // Chunk
2760   };
2761   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2762 }
2763 
2764 static void emitForStaticInitCall(
2765     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2766     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2767     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2768     const CGOpenMPRuntime::StaticRTInput &Values) {
2769   if (!CGF.HaveInsertPoint())
2770     return;
2771 
2772   assert(!Values.Ordered);
2773   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2774          Schedule == OMP_sch_static_balanced_chunked ||
2775          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2776          Schedule == OMP_dist_sch_static ||
2777          Schedule == OMP_dist_sch_static_chunked);
2778 
2779   // Call __kmpc_for_static_init(
2780   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2781   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2782   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2783   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2784   llvm::Value *Chunk = Values.Chunk;
2785   if (Chunk == nullptr) {
2786     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2787             Schedule == OMP_dist_sch_static) &&
2788            "expected static non-chunked schedule");
2789     // If the Chunk was not specified in the clause - use default value 1.
2790     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2791   } else {
2792     assert((Schedule == OMP_sch_static_chunked ||
2793             Schedule == OMP_sch_static_balanced_chunked ||
2794             Schedule == OMP_ord_static_chunked ||
2795             Schedule == OMP_dist_sch_static_chunked) &&
2796            "expected static chunked schedule");
2797   }
2798   llvm::Value *Args[] = {
2799       UpdateLocation,
2800       ThreadId,
2801       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2802                                                   M2)), // Schedule type
2803       Values.IL.getPointer(),                           // &isLastIter
2804       Values.LB.getPointer(),                           // &LB
2805       Values.UB.getPointer(),                           // &UB
2806       Values.ST.getPointer(),                           // &Stride
2807       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2808       Chunk                                             // Chunk
2809   };
2810   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2811 }
2812 
2813 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2814                                         SourceLocation Loc,
2815                                         OpenMPDirectiveKind DKind,
2816                                         const OpenMPScheduleTy &ScheduleKind,
2817                                         const StaticRTInput &Values) {
2818   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2819       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2820   assert(isOpenMPWorksharingDirective(DKind) &&
2821          "Expected loop-based or sections-based directive.");
2822   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2823                                              isOpenMPLoopDirective(DKind)
2824                                                  ? OMP_IDENT_WORK_LOOP
2825                                                  : OMP_IDENT_WORK_SECTIONS);
2826   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2827   llvm::FunctionCallee StaticInitFunction =
2828       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2829   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2830   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2831                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2832 }
2833 
2834 void CGOpenMPRuntime::emitDistributeStaticInit(
2835     CodeGenFunction &CGF, SourceLocation Loc,
2836     OpenMPDistScheduleClauseKind SchedKind,
2837     const CGOpenMPRuntime::StaticRTInput &Values) {
2838   OpenMPSchedType ScheduleNum =
2839       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2840   llvm::Value *UpdatedLocation =
2841       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2842   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2843   llvm::FunctionCallee StaticInitFunction =
2844       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2845   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2846                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2847                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2848 }
2849 
2850 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2851                                           SourceLocation Loc,
2852                                           OpenMPDirectiveKind DKind) {
2853   if (!CGF.HaveInsertPoint())
2854     return;
2855   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2856   llvm::Value *Args[] = {
2857       emitUpdateLocation(CGF, Loc,
2858                          isOpenMPDistributeDirective(DKind)
2859                              ? OMP_IDENT_WORK_DISTRIBUTE
2860                              : isOpenMPLoopDirective(DKind)
2861                                    ? OMP_IDENT_WORK_LOOP
2862                                    : OMP_IDENT_WORK_SECTIONS),
2863       getThreadID(CGF, Loc)};
2864   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2865   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2866                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2867                       Args);
2868 }
2869 
2870 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2871                                                  SourceLocation Loc,
2872                                                  unsigned IVSize,
2873                                                  bool IVSigned) {
2874   if (!CGF.HaveInsertPoint())
2875     return;
2876   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2877   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2878   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2879 }
2880 
2881 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2882                                           SourceLocation Loc, unsigned IVSize,
2883                                           bool IVSigned, Address IL,
2884                                           Address LB, Address UB,
2885                                           Address ST) {
2886   // Call __kmpc_dispatch_next(
2887   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2888   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2889   //          kmp_int[32|64] *p_stride);
2890   llvm::Value *Args[] = {
2891       emitUpdateLocation(CGF, Loc),
2892       getThreadID(CGF, Loc),
2893       IL.getPointer(), // &isLastIter
2894       LB.getPointer(), // &Lower
2895       UB.getPointer(), // &Upper
2896       ST.getPointer()  // &Stride
2897   };
2898   llvm::Value *Call =
2899       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2900   return CGF.EmitScalarConversion(
2901       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2902       CGF.getContext().BoolTy, Loc);
2903 }
2904 
2905 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2906                                            llvm::Value *NumThreads,
2907                                            SourceLocation Loc) {
2908   if (!CGF.HaveInsertPoint())
2909     return;
2910   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2911   llvm::Value *Args[] = {
2912       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2913       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2914   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2915                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2916                       Args);
2917 }
2918 
2919 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2920                                          ProcBindKind ProcBind,
2921                                          SourceLocation Loc) {
2922   if (!CGF.HaveInsertPoint())
2923     return;
2924   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2925   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2926   llvm::Value *Args[] = {
2927       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2928       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2929   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2930                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2931                       Args);
2932 }
2933 
2934 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2935                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2936   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2937     OMPBuilder.createFlush(CGF.Builder);
2938   } else {
2939     if (!CGF.HaveInsertPoint())
2940       return;
2941     // Build call void __kmpc_flush(ident_t *loc)
2942     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2943                             CGM.getModule(), OMPRTL___kmpc_flush),
2944                         emitUpdateLocation(CGF, Loc));
2945   }
2946 }
2947 
2948 namespace {
2949 /// Indexes of fields for type kmp_task_t.
2950 enum KmpTaskTFields {
2951   /// List of shared variables.
2952   KmpTaskTShareds,
2953   /// Task routine.
2954   KmpTaskTRoutine,
2955   /// Partition id for the untied tasks.
2956   KmpTaskTPartId,
2957   /// Function with call of destructors for private variables.
2958   Data1,
2959   /// Task priority.
2960   Data2,
2961   /// (Taskloops only) Lower bound.
2962   KmpTaskTLowerBound,
2963   /// (Taskloops only) Upper bound.
2964   KmpTaskTUpperBound,
2965   /// (Taskloops only) Stride.
2966   KmpTaskTStride,
2967   /// (Taskloops only) Is last iteration flag.
2968   KmpTaskTLastIter,
2969   /// (Taskloops only) Reduction data.
2970   KmpTaskTReductions,
2971 };
2972 } // anonymous namespace
2973 
2974 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2975   return OffloadEntriesTargetRegion.empty() &&
2976          OffloadEntriesDeviceGlobalVar.empty();
2977 }
2978 
2979 /// Initialize target region entry.
2980 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2981     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2982                                     StringRef ParentName, unsigned LineNum,
2983                                     unsigned Order) {
2984   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2985                                              "only required for the device "
2986                                              "code generation.");
2987   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2988       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2989                                    OMPTargetRegionEntryTargetRegion);
2990   ++OffloadingEntriesNum;
2991 }
2992 
2993 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2994     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2995                                   StringRef ParentName, unsigned LineNum,
2996                                   llvm::Constant *Addr, llvm::Constant *ID,
2997                                   OMPTargetRegionEntryKind Flags) {
2998   // If we are emitting code for a target, the entry is already initialized,
2999   // only has to be registered.
3000   if (CGM.getLangOpts().OpenMPIsDevice) {
3001     // This could happen if the device compilation is invoked standalone.
3002     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3003       return;
3004     auto &Entry =
3005         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3006     Entry.setAddress(Addr);
3007     Entry.setID(ID);
3008     Entry.setFlags(Flags);
3009   } else {
3010     if (Flags ==
3011             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3012         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3013                                  /*IgnoreAddressId*/ true))
3014       return;
3015     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3016            "Target region entry already registered!");
3017     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3018     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3019     ++OffloadingEntriesNum;
3020   }
3021 }
3022 
3023 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3024     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3025     bool IgnoreAddressId) const {
3026   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3027   if (PerDevice == OffloadEntriesTargetRegion.end())
3028     return false;
3029   auto PerFile = PerDevice->second.find(FileID);
3030   if (PerFile == PerDevice->second.end())
3031     return false;
3032   auto PerParentName = PerFile->second.find(ParentName);
3033   if (PerParentName == PerFile->second.end())
3034     return false;
3035   auto PerLine = PerParentName->second.find(LineNum);
3036   if (PerLine == PerParentName->second.end())
3037     return false;
3038   // Fail if this entry is already registered.
3039   if (!IgnoreAddressId &&
3040       (PerLine->second.getAddress() || PerLine->second.getID()))
3041     return false;
3042   return true;
3043 }
3044 
3045 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3046     const OffloadTargetRegionEntryInfoActTy &Action) {
3047   // Scan all target region entries and perform the provided action.
3048   for (const auto &D : OffloadEntriesTargetRegion)
3049     for (const auto &F : D.second)
3050       for (const auto &P : F.second)
3051         for (const auto &L : P.second)
3052           Action(D.first, F.first, P.first(), L.first, L.second);
3053 }
3054 
3055 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3056     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3057                                        OMPTargetGlobalVarEntryKind Flags,
3058                                        unsigned Order) {
3059   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3060                                              "only required for the device "
3061                                              "code generation.");
3062   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3063   ++OffloadingEntriesNum;
3064 }
3065 
3066 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3067     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3068                                      CharUnits VarSize,
3069                                      OMPTargetGlobalVarEntryKind Flags,
3070                                      llvm::GlobalValue::LinkageTypes Linkage) {
3071   if (CGM.getLangOpts().OpenMPIsDevice) {
3072     // This could happen if the device compilation is invoked standalone.
3073     if (!hasDeviceGlobalVarEntryInfo(VarName))
3074       return;
3075     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3076     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3077       if (Entry.getVarSize().isZero()) {
3078         Entry.setVarSize(VarSize);
3079         Entry.setLinkage(Linkage);
3080       }
3081       return;
3082     }
3083     Entry.setVarSize(VarSize);
3084     Entry.setLinkage(Linkage);
3085     Entry.setAddress(Addr);
3086   } else {
3087     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3088       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3089       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3090              "Entry not initialized!");
3091       if (Entry.getVarSize().isZero()) {
3092         Entry.setVarSize(VarSize);
3093         Entry.setLinkage(Linkage);
3094       }
3095       return;
3096     }
3097     OffloadEntriesDeviceGlobalVar.try_emplace(
3098         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3099     ++OffloadingEntriesNum;
3100   }
3101 }
3102 
3103 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3104     actOnDeviceGlobalVarEntriesInfo(
3105         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3106   // Scan all target region entries and perform the provided action.
3107   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3108     Action(E.getKey(), E.getValue());
3109 }
3110 
3111 void CGOpenMPRuntime::createOffloadEntry(
3112     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3113     llvm::GlobalValue::LinkageTypes Linkage) {
3114   StringRef Name = Addr->getName();
3115   llvm::Module &M = CGM.getModule();
3116   llvm::LLVMContext &C = M.getContext();
3117 
3118   // Create constant string with the name.
3119   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3120 
3121   std::string StringName = getName({"omp_offloading", "entry_name"});
3122   auto *Str = new llvm::GlobalVariable(
3123       M, StrPtrInit->getType(), /*isConstant=*/true,
3124       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3125   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3126 
3127   llvm::Constant *Data[] = {
3128       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3129       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3130       llvm::ConstantInt::get(CGM.SizeTy, Size),
3131       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3132       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3133   std::string EntryName = getName({"omp_offloading", "entry", ""});
3134   llvm::GlobalVariable *Entry = createGlobalStruct(
3135       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3136       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3137 
3138   // The entry has to be created in the section the linker expects it to be.
3139   Entry->setSection("omp_offloading_entries");
3140 }
3141 
3142 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3143   // Emit the offloading entries and metadata so that the device codegen side
3144   // can easily figure out what to emit. The produced metadata looks like
3145   // this:
3146   //
3147   // !omp_offload.info = !{!1, ...}
3148   //
3149   // Right now we only generate metadata for function that contain target
3150   // regions.
3151 
3152   // If we are in simd mode or there are no entries, we don't need to do
3153   // anything.
3154   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3155     return;
3156 
3157   llvm::Module &M = CGM.getModule();
3158   llvm::LLVMContext &C = M.getContext();
3159   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3160                          SourceLocation, StringRef>,
3161               16>
3162       OrderedEntries(OffloadEntriesInfoManager.size());
3163   llvm::SmallVector<StringRef, 16> ParentFunctions(
3164       OffloadEntriesInfoManager.size());
3165 
3166   // Auxiliary methods to create metadata values and strings.
3167   auto &&GetMDInt = [this](unsigned V) {
3168     return llvm::ConstantAsMetadata::get(
3169         llvm::ConstantInt::get(CGM.Int32Ty, V));
3170   };
3171 
3172   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3173 
3174   // Create the offloading info metadata node.
3175   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3176 
3177   // Create function that emits metadata for each target region entry;
3178   auto &&TargetRegionMetadataEmitter =
3179       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3180        &GetMDString](
3181           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3182           unsigned Line,
3183           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3184         // Generate metadata for target regions. Each entry of this metadata
3185         // contains:
3186         // - Entry 0 -> Kind of this type of metadata (0).
3187         // - Entry 1 -> Device ID of the file where the entry was identified.
3188         // - Entry 2 -> File ID of the file where the entry was identified.
3189         // - Entry 3 -> Mangled name of the function where the entry was
3190         // identified.
3191         // - Entry 4 -> Line in the file where the entry was identified.
3192         // - Entry 5 -> Order the entry was created.
3193         // The first element of the metadata node is the kind.
3194         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3195                                  GetMDInt(FileID),      GetMDString(ParentName),
3196                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3197 
3198         SourceLocation Loc;
3199         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3200                   E = CGM.getContext().getSourceManager().fileinfo_end();
3201              I != E; ++I) {
3202           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3203               I->getFirst()->getUniqueID().getFile() == FileID) {
3204             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3205                 I->getFirst(), Line, 1);
3206             break;
3207           }
3208         }
3209         // Save this entry in the right position of the ordered entries array.
3210         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3211         ParentFunctions[E.getOrder()] = ParentName;
3212 
3213         // Add metadata to the named metadata node.
3214         MD->addOperand(llvm::MDNode::get(C, Ops));
3215       };
3216 
3217   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3218       TargetRegionMetadataEmitter);
3219 
3220   // Create function that emits metadata for each device global variable entry;
3221   auto &&DeviceGlobalVarMetadataEmitter =
3222       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3223        MD](StringRef MangledName,
3224            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3225                &E) {
3226         // Generate metadata for global variables. Each entry of this metadata
3227         // contains:
3228         // - Entry 0 -> Kind of this type of metadata (1).
3229         // - Entry 1 -> Mangled name of the variable.
3230         // - Entry 2 -> Declare target kind.
3231         // - Entry 3 -> Order the entry was created.
3232         // The first element of the metadata node is the kind.
3233         llvm::Metadata *Ops[] = {
3234             GetMDInt(E.getKind()), GetMDString(MangledName),
3235             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3236 
3237         // Save this entry in the right position of the ordered entries array.
3238         OrderedEntries[E.getOrder()] =
3239             std::make_tuple(&E, SourceLocation(), MangledName);
3240 
3241         // Add metadata to the named metadata node.
3242         MD->addOperand(llvm::MDNode::get(C, Ops));
3243       };
3244 
3245   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3246       DeviceGlobalVarMetadataEmitter);
3247 
3248   for (const auto &E : OrderedEntries) {
3249     assert(std::get<0>(E) && "All ordered entries must exist!");
3250     if (const auto *CE =
3251             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3252                 std::get<0>(E))) {
3253       if (!CE->getID() || !CE->getAddress()) {
3254         // Do not blame the entry if the parent funtion is not emitted.
3255         StringRef FnName = ParentFunctions[CE->getOrder()];
3256         if (!CGM.GetGlobalValue(FnName))
3257           continue;
3258         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3259             DiagnosticsEngine::Error,
3260             "Offloading entry for target region in %0 is incorrect: either the "
3261             "address or the ID is invalid.");
3262         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3263         continue;
3264       }
3265       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3266                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3267     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3268                                              OffloadEntryInfoDeviceGlobalVar>(
3269                    std::get<0>(E))) {
3270       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3271           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3272               CE->getFlags());
3273       switch (Flags) {
3274       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3275         if (CGM.getLangOpts().OpenMPIsDevice &&
3276             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3277           continue;
3278         if (!CE->getAddress()) {
3279           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3280               DiagnosticsEngine::Error, "Offloading entry for declare target "
3281                                         "variable %0 is incorrect: the "
3282                                         "address is invalid.");
3283           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3284           continue;
3285         }
3286         // The vaiable has no definition - no need to add the entry.
3287         if (CE->getVarSize().isZero())
3288           continue;
3289         break;
3290       }
3291       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3292         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3293                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3294                "Declaret target link address is set.");
3295         if (CGM.getLangOpts().OpenMPIsDevice)
3296           continue;
3297         if (!CE->getAddress()) {
3298           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3299               DiagnosticsEngine::Error,
3300               "Offloading entry for declare target variable is incorrect: the "
3301               "address is invalid.");
3302           CGM.getDiags().Report(DiagID);
3303           continue;
3304         }
3305         break;
3306       }
3307       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3308                          CE->getVarSize().getQuantity(), Flags,
3309                          CE->getLinkage());
3310     } else {
3311       llvm_unreachable("Unsupported entry kind.");
3312     }
3313   }
3314 }
3315 
3316 /// Loads all the offload entries information from the host IR
3317 /// metadata.
3318 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3319   // If we are in target mode, load the metadata from the host IR. This code has
3320   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3321 
3322   if (!CGM.getLangOpts().OpenMPIsDevice)
3323     return;
3324 
3325   if (CGM.getLangOpts().OMPHostIRFile.empty())
3326     return;
3327 
3328   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3329   if (auto EC = Buf.getError()) {
3330     CGM.getDiags().Report(diag::err_cannot_open_file)
3331         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3332     return;
3333   }
3334 
3335   llvm::LLVMContext C;
3336   auto ME = expectedToErrorOrAndEmitErrors(
3337       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3338 
3339   if (auto EC = ME.getError()) {
3340     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3341         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3342     CGM.getDiags().Report(DiagID)
3343         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3344     return;
3345   }
3346 
3347   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3348   if (!MD)
3349     return;
3350 
3351   for (llvm::MDNode *MN : MD->operands()) {
3352     auto &&GetMDInt = [MN](unsigned Idx) {
3353       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3354       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3355     };
3356 
3357     auto &&GetMDString = [MN](unsigned Idx) {
3358       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3359       return V->getString();
3360     };
3361 
3362     switch (GetMDInt(0)) {
3363     default:
3364       llvm_unreachable("Unexpected metadata!");
3365       break;
3366     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3367         OffloadingEntryInfoTargetRegion:
3368       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3369           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3370           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3371           /*Order=*/GetMDInt(5));
3372       break;
3373     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3374         OffloadingEntryInfoDeviceGlobalVar:
3375       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3376           /*MangledName=*/GetMDString(1),
3377           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3378               /*Flags=*/GetMDInt(2)),
3379           /*Order=*/GetMDInt(3));
3380       break;
3381     }
3382   }
3383 }
3384 
3385 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3386   if (!KmpRoutineEntryPtrTy) {
3387     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3388     ASTContext &C = CGM.getContext();
3389     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3390     FunctionProtoType::ExtProtoInfo EPI;
3391     KmpRoutineEntryPtrQTy = C.getPointerType(
3392         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3393     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3394   }
3395 }
3396 
3397 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3398   // Make sure the type of the entry is already created. This is the type we
3399   // have to create:
3400   // struct __tgt_offload_entry{
3401   //   void      *addr;       // Pointer to the offload entry info.
3402   //                          // (function or global)
3403   //   char      *name;       // Name of the function or global.
3404   //   size_t     size;       // Size of the entry info (0 if it a function).
3405   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3406   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3407   // };
3408   if (TgtOffloadEntryQTy.isNull()) {
3409     ASTContext &C = CGM.getContext();
3410     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3411     RD->startDefinition();
3412     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3413     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3414     addFieldToRecordDecl(C, RD, C.getSizeType());
3415     addFieldToRecordDecl(
3416         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3417     addFieldToRecordDecl(
3418         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3419     RD->completeDefinition();
3420     RD->addAttr(PackedAttr::CreateImplicit(C));
3421     TgtOffloadEntryQTy = C.getRecordType(RD);
3422   }
3423   return TgtOffloadEntryQTy;
3424 }
3425 
3426 namespace {
3427 struct PrivateHelpersTy {
3428   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3429                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3430       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3431         PrivateElemInit(PrivateElemInit) {}
3432   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3433   const Expr *OriginalRef = nullptr;
3434   const VarDecl *Original = nullptr;
3435   const VarDecl *PrivateCopy = nullptr;
3436   const VarDecl *PrivateElemInit = nullptr;
3437   bool isLocalPrivate() const {
3438     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3439   }
3440 };
3441 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3442 } // anonymous namespace
3443 
3444 static bool isAllocatableDecl(const VarDecl *VD) {
3445   const VarDecl *CVD = VD->getCanonicalDecl();
3446   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3447     return false;
3448   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3449   // Use the default allocation.
3450   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3451             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3452            !AA->getAllocator());
3453 }
3454 
3455 static RecordDecl *
3456 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3457   if (!Privates.empty()) {
3458     ASTContext &C = CGM.getContext();
3459     // Build struct .kmp_privates_t. {
3460     //         /*  private vars  */
3461     //       };
3462     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3463     RD->startDefinition();
3464     for (const auto &Pair : Privates) {
3465       const VarDecl *VD = Pair.second.Original;
3466       QualType Type = VD->getType().getNonReferenceType();
3467       // If the private variable is a local variable with lvalue ref type,
3468       // allocate the pointer instead of the pointee type.
3469       if (Pair.second.isLocalPrivate()) {
3470         if (VD->getType()->isLValueReferenceType())
3471           Type = C.getPointerType(Type);
3472         if (isAllocatableDecl(VD))
3473           Type = C.getPointerType(Type);
3474       }
3475       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3476       if (VD->hasAttrs()) {
3477         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3478              E(VD->getAttrs().end());
3479              I != E; ++I)
3480           FD->addAttr(*I);
3481       }
3482     }
3483     RD->completeDefinition();
3484     return RD;
3485   }
3486   return nullptr;
3487 }
3488 
3489 static RecordDecl *
3490 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3491                          QualType KmpInt32Ty,
3492                          QualType KmpRoutineEntryPointerQTy) {
3493   ASTContext &C = CGM.getContext();
3494   // Build struct kmp_task_t {
3495   //         void *              shareds;
3496   //         kmp_routine_entry_t routine;
3497   //         kmp_int32           part_id;
3498   //         kmp_cmplrdata_t data1;
3499   //         kmp_cmplrdata_t data2;
3500   // For taskloops additional fields:
3501   //         kmp_uint64          lb;
3502   //         kmp_uint64          ub;
3503   //         kmp_int64           st;
3504   //         kmp_int32           liter;
3505   //         void *              reductions;
3506   //       };
3507   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3508   UD->startDefinition();
3509   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3510   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3511   UD->completeDefinition();
3512   QualType KmpCmplrdataTy = C.getRecordType(UD);
3513   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3514   RD->startDefinition();
3515   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3516   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3517   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3518   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3519   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3520   if (isOpenMPTaskLoopDirective(Kind)) {
3521     QualType KmpUInt64Ty =
3522         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3523     QualType KmpInt64Ty =
3524         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3525     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3526     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3527     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3528     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3529     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3530   }
3531   RD->completeDefinition();
3532   return RD;
3533 }
3534 
3535 static RecordDecl *
3536 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3537                                      ArrayRef<PrivateDataTy> Privates) {
3538   ASTContext &C = CGM.getContext();
3539   // Build struct kmp_task_t_with_privates {
3540   //         kmp_task_t task_data;
3541   //         .kmp_privates_t. privates;
3542   //       };
3543   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3544   RD->startDefinition();
3545   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3546   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3547     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3548   RD->completeDefinition();
3549   return RD;
3550 }
3551 
3552 /// Emit a proxy function which accepts kmp_task_t as the second
3553 /// argument.
3554 /// \code
3555 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3556 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3557 ///   For taskloops:
3558 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3559 ///   tt->reductions, tt->shareds);
3560 ///   return 0;
3561 /// }
3562 /// \endcode
3563 static llvm::Function *
3564 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3565                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3566                       QualType KmpTaskTWithPrivatesPtrQTy,
3567                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3568                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3569                       llvm::Value *TaskPrivatesMap) {
3570   ASTContext &C = CGM.getContext();
3571   FunctionArgList Args;
3572   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3573                             ImplicitParamDecl::Other);
3574   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3575                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3576                                 ImplicitParamDecl::Other);
3577   Args.push_back(&GtidArg);
3578   Args.push_back(&TaskTypeArg);
3579   const auto &TaskEntryFnInfo =
3580       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3581   llvm::FunctionType *TaskEntryTy =
3582       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3583   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3584   auto *TaskEntry = llvm::Function::Create(
3585       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3586   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3587   TaskEntry->setDoesNotRecurse();
3588   CodeGenFunction CGF(CGM);
3589   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3590                     Loc, Loc);
3591 
3592   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3593   // tt,
3594   // For taskloops:
3595   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3596   // tt->task_data.shareds);
3597   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3598       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3599   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3600       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3601       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3602   const auto *KmpTaskTWithPrivatesQTyRD =
3603       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3604   LValue Base =
3605       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3606   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3607   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3608   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3609   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3610 
3611   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3612   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3613   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3614       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3615       CGF.ConvertTypeForMem(SharedsPtrTy));
3616 
3617   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3618   llvm::Value *PrivatesParam;
3619   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3620     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3621     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3622         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3623   } else {
3624     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3625   }
3626 
3627   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3628                                TaskPrivatesMap,
3629                                CGF.Builder
3630                                    .CreatePointerBitCastOrAddrSpaceCast(
3631                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3632                                    .getPointer()};
3633   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3634                                           std::end(CommonArgs));
3635   if (isOpenMPTaskLoopDirective(Kind)) {
3636     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3637     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3638     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3639     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3640     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3641     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3642     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3643     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3644     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3645     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3646     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3647     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3648     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3649     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3650     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3651     CallArgs.push_back(LBParam);
3652     CallArgs.push_back(UBParam);
3653     CallArgs.push_back(StParam);
3654     CallArgs.push_back(LIParam);
3655     CallArgs.push_back(RParam);
3656   }
3657   CallArgs.push_back(SharedsParam);
3658 
3659   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3660                                                   CallArgs);
3661   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3662                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3663   CGF.FinishFunction();
3664   return TaskEntry;
3665 }
3666 
3667 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3668                                             SourceLocation Loc,
3669                                             QualType KmpInt32Ty,
3670                                             QualType KmpTaskTWithPrivatesPtrQTy,
3671                                             QualType KmpTaskTWithPrivatesQTy) {
3672   ASTContext &C = CGM.getContext();
3673   FunctionArgList Args;
3674   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3675                             ImplicitParamDecl::Other);
3676   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3677                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3678                                 ImplicitParamDecl::Other);
3679   Args.push_back(&GtidArg);
3680   Args.push_back(&TaskTypeArg);
3681   const auto &DestructorFnInfo =
3682       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3683   llvm::FunctionType *DestructorFnTy =
3684       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3685   std::string Name =
3686       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3687   auto *DestructorFn =
3688       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3689                              Name, &CGM.getModule());
3690   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3691                                     DestructorFnInfo);
3692   DestructorFn->setDoesNotRecurse();
3693   CodeGenFunction CGF(CGM);
3694   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3695                     Args, Loc, Loc);
3696 
3697   LValue Base = CGF.EmitLoadOfPointerLValue(
3698       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3699       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3700   const auto *KmpTaskTWithPrivatesQTyRD =
3701       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3702   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3703   Base = CGF.EmitLValueForField(Base, *FI);
3704   for (const auto *Field :
3705        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3706     if (QualType::DestructionKind DtorKind =
3707             Field->getType().isDestructedType()) {
3708       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3709       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3710     }
3711   }
3712   CGF.FinishFunction();
3713   return DestructorFn;
3714 }
3715 
3716 /// Emit a privates mapping function for correct handling of private and
3717 /// firstprivate variables.
3718 /// \code
3719 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3720 /// **noalias priv1,...,  <tyn> **noalias privn) {
3721 ///   *priv1 = &.privates.priv1;
3722 ///   ...;
3723 ///   *privn = &.privates.privn;
3724 /// }
3725 /// \endcode
3726 static llvm::Value *
3727 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3728                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3729                                ArrayRef<PrivateDataTy> Privates) {
3730   ASTContext &C = CGM.getContext();
3731   FunctionArgList Args;
3732   ImplicitParamDecl TaskPrivatesArg(
3733       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3734       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3735       ImplicitParamDecl::Other);
3736   Args.push_back(&TaskPrivatesArg);
3737   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3738   unsigned Counter = 1;
3739   for (const Expr *E : Data.PrivateVars) {
3740     Args.push_back(ImplicitParamDecl::Create(
3741         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3742         C.getPointerType(C.getPointerType(E->getType()))
3743             .withConst()
3744             .withRestrict(),
3745         ImplicitParamDecl::Other));
3746     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3747     PrivateVarsPos[VD] = Counter;
3748     ++Counter;
3749   }
3750   for (const Expr *E : Data.FirstprivateVars) {
3751     Args.push_back(ImplicitParamDecl::Create(
3752         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3753         C.getPointerType(C.getPointerType(E->getType()))
3754             .withConst()
3755             .withRestrict(),
3756         ImplicitParamDecl::Other));
3757     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3758     PrivateVarsPos[VD] = Counter;
3759     ++Counter;
3760   }
3761   for (const Expr *E : Data.LastprivateVars) {
3762     Args.push_back(ImplicitParamDecl::Create(
3763         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3764         C.getPointerType(C.getPointerType(E->getType()))
3765             .withConst()
3766             .withRestrict(),
3767         ImplicitParamDecl::Other));
3768     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3769     PrivateVarsPos[VD] = Counter;
3770     ++Counter;
3771   }
3772   for (const VarDecl *VD : Data.PrivateLocals) {
3773     QualType Ty = VD->getType().getNonReferenceType();
3774     if (VD->getType()->isLValueReferenceType())
3775       Ty = C.getPointerType(Ty);
3776     if (isAllocatableDecl(VD))
3777       Ty = C.getPointerType(Ty);
3778     Args.push_back(ImplicitParamDecl::Create(
3779         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3780         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3781         ImplicitParamDecl::Other));
3782     PrivateVarsPos[VD] = Counter;
3783     ++Counter;
3784   }
3785   const auto &TaskPrivatesMapFnInfo =
3786       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3787   llvm::FunctionType *TaskPrivatesMapTy =
3788       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3789   std::string Name =
3790       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3791   auto *TaskPrivatesMap = llvm::Function::Create(
3792       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3793       &CGM.getModule());
3794   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3795                                     TaskPrivatesMapFnInfo);
3796   if (CGM.getLangOpts().Optimize) {
3797     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3798     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3799     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3800   }
3801   CodeGenFunction CGF(CGM);
3802   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3803                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3804 
3805   // *privi = &.privates.privi;
3806   LValue Base = CGF.EmitLoadOfPointerLValue(
3807       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3808       TaskPrivatesArg.getType()->castAs<PointerType>());
3809   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3810   Counter = 0;
3811   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3812     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3813     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3814     LValue RefLVal =
3815         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3816     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3817         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3818     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3819     ++Counter;
3820   }
3821   CGF.FinishFunction();
3822   return TaskPrivatesMap;
3823 }
3824 
3825 /// Emit initialization for private variables in task-based directives.
3826 static void emitPrivatesInit(CodeGenFunction &CGF,
3827                              const OMPExecutableDirective &D,
3828                              Address KmpTaskSharedsPtr, LValue TDBase,
3829                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3830                              QualType SharedsTy, QualType SharedsPtrTy,
3831                              const OMPTaskDataTy &Data,
3832                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3833   ASTContext &C = CGF.getContext();
3834   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3835   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3836   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3837                                  ? OMPD_taskloop
3838                                  : OMPD_task;
3839   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3840   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3841   LValue SrcBase;
3842   bool IsTargetTask =
3843       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3844       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3845   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3846   // PointersArray, SizesArray, and MappersArray. The original variables for
3847   // these arrays are not captured and we get their addresses explicitly.
3848   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3849       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3850     SrcBase = CGF.MakeAddrLValue(
3851         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3852             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3853         SharedsTy);
3854   }
3855   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3856   for (const PrivateDataTy &Pair : Privates) {
3857     // Do not initialize private locals.
3858     if (Pair.second.isLocalPrivate()) {
3859       ++FI;
3860       continue;
3861     }
3862     const VarDecl *VD = Pair.second.PrivateCopy;
3863     const Expr *Init = VD->getAnyInitializer();
3864     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3865                              !CGF.isTrivialInitializer(Init)))) {
3866       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3867       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3868         const VarDecl *OriginalVD = Pair.second.Original;
3869         // Check if the variable is the target-based BasePointersArray,
3870         // PointersArray, SizesArray, or MappersArray.
3871         LValue SharedRefLValue;
3872         QualType Type = PrivateLValue.getType();
3873         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3874         if (IsTargetTask && !SharedField) {
3875           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3876                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3877                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3878                          ->getNumParams() == 0 &&
3879                  isa<TranslationUnitDecl>(
3880                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3881                          ->getDeclContext()) &&
3882                  "Expected artificial target data variable.");
3883           SharedRefLValue =
3884               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3885         } else if (ForDup) {
3886           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3887           SharedRefLValue = CGF.MakeAddrLValue(
3888               Address(SharedRefLValue.getPointer(CGF),
3889                       C.getDeclAlign(OriginalVD)),
3890               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3891               SharedRefLValue.getTBAAInfo());
3892         } else if (CGF.LambdaCaptureFields.count(
3893                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3894                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3895           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3896         } else {
3897           // Processing for implicitly captured variables.
3898           InlinedOpenMPRegionRAII Region(
3899               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3900               /*HasCancel=*/false, /*NoInheritance=*/true);
3901           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3902         }
3903         if (Type->isArrayType()) {
3904           // Initialize firstprivate array.
3905           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3906             // Perform simple memcpy.
3907             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3908           } else {
3909             // Initialize firstprivate array using element-by-element
3910             // initialization.
3911             CGF.EmitOMPAggregateAssign(
3912                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3913                 Type,
3914                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3915                                                   Address SrcElement) {
3916                   // Clean up any temporaries needed by the initialization.
3917                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3918                   InitScope.addPrivate(
3919                       Elem, [SrcElement]() -> Address { return SrcElement; });
3920                   (void)InitScope.Privatize();
3921                   // Emit initialization for single element.
3922                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3923                       CGF, &CapturesInfo);
3924                   CGF.EmitAnyExprToMem(Init, DestElement,
3925                                        Init->getType().getQualifiers(),
3926                                        /*IsInitializer=*/false);
3927                 });
3928           }
3929         } else {
3930           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3931           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3932             return SharedRefLValue.getAddress(CGF);
3933           });
3934           (void)InitScope.Privatize();
3935           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3936           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3937                              /*capturedByInit=*/false);
3938         }
3939       } else {
3940         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3941       }
3942     }
3943     ++FI;
3944   }
3945 }
3946 
3947 /// Check if duplication function is required for taskloops.
3948 static bool checkInitIsRequired(CodeGenFunction &CGF,
3949                                 ArrayRef<PrivateDataTy> Privates) {
3950   bool InitRequired = false;
3951   for (const PrivateDataTy &Pair : Privates) {
3952     if (Pair.second.isLocalPrivate())
3953       continue;
3954     const VarDecl *VD = Pair.second.PrivateCopy;
3955     const Expr *Init = VD->getAnyInitializer();
3956     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3957                                     !CGF.isTrivialInitializer(Init));
3958     if (InitRequired)
3959       break;
3960   }
3961   return InitRequired;
3962 }
3963 
3964 
3965 /// Emit task_dup function (for initialization of
3966 /// private/firstprivate/lastprivate vars and last_iter flag)
3967 /// \code
3968 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3969 /// lastpriv) {
3970 /// // setup lastprivate flag
3971 ///    task_dst->last = lastpriv;
3972 /// // could be constructor calls here...
3973 /// }
3974 /// \endcode
3975 static llvm::Value *
3976 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3977                     const OMPExecutableDirective &D,
3978                     QualType KmpTaskTWithPrivatesPtrQTy,
3979                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3980                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3981                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3982                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3983   ASTContext &C = CGM.getContext();
3984   FunctionArgList Args;
3985   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3986                            KmpTaskTWithPrivatesPtrQTy,
3987                            ImplicitParamDecl::Other);
3988   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3989                            KmpTaskTWithPrivatesPtrQTy,
3990                            ImplicitParamDecl::Other);
3991   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3992                                 ImplicitParamDecl::Other);
3993   Args.push_back(&DstArg);
3994   Args.push_back(&SrcArg);
3995   Args.push_back(&LastprivArg);
3996   const auto &TaskDupFnInfo =
3997       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3998   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3999   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4000   auto *TaskDup = llvm::Function::Create(
4001       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4002   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4003   TaskDup->setDoesNotRecurse();
4004   CodeGenFunction CGF(CGM);
4005   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4006                     Loc);
4007 
4008   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4009       CGF.GetAddrOfLocalVar(&DstArg),
4010       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4011   // task_dst->liter = lastpriv;
4012   if (WithLastIter) {
4013     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4014     LValue Base = CGF.EmitLValueForField(
4015         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4016     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4017     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4018         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4019     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4020   }
4021 
4022   // Emit initial values for private copies (if any).
4023   assert(!Privates.empty());
4024   Address KmpTaskSharedsPtr = Address::invalid();
4025   if (!Data.FirstprivateVars.empty()) {
4026     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4027         CGF.GetAddrOfLocalVar(&SrcArg),
4028         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4029     LValue Base = CGF.EmitLValueForField(
4030         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4031     KmpTaskSharedsPtr = Address(
4032         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4033                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4034                                                   KmpTaskTShareds)),
4035                              Loc),
4036         CGM.getNaturalTypeAlignment(SharedsTy));
4037   }
4038   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4039                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4040   CGF.FinishFunction();
4041   return TaskDup;
4042 }
4043 
4044 /// Checks if destructor function is required to be generated.
4045 /// \return true if cleanups are required, false otherwise.
4046 static bool
4047 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4048                          ArrayRef<PrivateDataTy> Privates) {
4049   for (const PrivateDataTy &P : Privates) {
4050     if (P.second.isLocalPrivate())
4051       continue;
4052     QualType Ty = P.second.Original->getType().getNonReferenceType();
4053     if (Ty.isDestructedType())
4054       return true;
4055   }
4056   return false;
4057 }
4058 
4059 namespace {
4060 /// Loop generator for OpenMP iterator expression.
4061 class OMPIteratorGeneratorScope final
4062     : public CodeGenFunction::OMPPrivateScope {
4063   CodeGenFunction &CGF;
4064   const OMPIteratorExpr *E = nullptr;
4065   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4066   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4067   OMPIteratorGeneratorScope() = delete;
4068   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4069 
4070 public:
4071   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4072       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4073     if (!E)
4074       return;
4075     SmallVector<llvm::Value *, 4> Uppers;
4076     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4077       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4078       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4079       addPrivate(VD, [&CGF, VD]() {
4080         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4081       });
4082       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4083       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4084         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4085                                  "counter.addr");
4086       });
4087     }
4088     Privatize();
4089 
4090     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4091       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4092       LValue CLVal =
4093           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4094                              HelperData.CounterVD->getType());
4095       // Counter = 0;
4096       CGF.EmitStoreOfScalar(
4097           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4098           CLVal);
4099       CodeGenFunction::JumpDest &ContDest =
4100           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4101       CodeGenFunction::JumpDest &ExitDest =
4102           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4103       // N = <number-of_iterations>;
4104       llvm::Value *N = Uppers[I];
4105       // cont:
4106       // if (Counter < N) goto body; else goto exit;
4107       CGF.EmitBlock(ContDest.getBlock());
4108       auto *CVal =
4109           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4110       llvm::Value *Cmp =
4111           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4112               ? CGF.Builder.CreateICmpSLT(CVal, N)
4113               : CGF.Builder.CreateICmpULT(CVal, N);
4114       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4115       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4116       // body:
4117       CGF.EmitBlock(BodyBB);
4118       // Iteri = Begini + Counter * Stepi;
4119       CGF.EmitIgnoredExpr(HelperData.Update);
4120     }
4121   }
4122   ~OMPIteratorGeneratorScope() {
4123     if (!E)
4124       return;
4125     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4126       // Counter = Counter + 1;
4127       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4128       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4129       // goto cont;
4130       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4131       // exit:
4132       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4133     }
4134   }
4135 };
4136 } // namespace
4137 
4138 static std::pair<llvm::Value *, llvm::Value *>
4139 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4140   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4141   llvm::Value *Addr;
4142   if (OASE) {
4143     const Expr *Base = OASE->getBase();
4144     Addr = CGF.EmitScalarExpr(Base);
4145   } else {
4146     Addr = CGF.EmitLValue(E).getPointer(CGF);
4147   }
4148   llvm::Value *SizeVal;
4149   QualType Ty = E->getType();
4150   if (OASE) {
4151     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4152     for (const Expr *SE : OASE->getDimensions()) {
4153       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4154       Sz = CGF.EmitScalarConversion(
4155           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4156       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4157     }
4158   } else if (const auto *ASE =
4159                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4160     LValue UpAddrLVal =
4161         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4162     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4163     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4164         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4165     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4166     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4167     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4168   } else {
4169     SizeVal = CGF.getTypeSize(Ty);
4170   }
4171   return std::make_pair(Addr, SizeVal);
4172 }
4173 
4174 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4175 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4176   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4177   if (KmpTaskAffinityInfoTy.isNull()) {
4178     RecordDecl *KmpAffinityInfoRD =
4179         C.buildImplicitRecord("kmp_task_affinity_info_t");
4180     KmpAffinityInfoRD->startDefinition();
4181     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4182     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4183     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4184     KmpAffinityInfoRD->completeDefinition();
4185     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4186   }
4187 }
4188 
4189 CGOpenMPRuntime::TaskResultTy
4190 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4191                               const OMPExecutableDirective &D,
4192                               llvm::Function *TaskFunction, QualType SharedsTy,
4193                               Address Shareds, const OMPTaskDataTy &Data) {
4194   ASTContext &C = CGM.getContext();
4195   llvm::SmallVector<PrivateDataTy, 4> Privates;
4196   // Aggregate privates and sort them by the alignment.
4197   const auto *I = Data.PrivateCopies.begin();
4198   for (const Expr *E : Data.PrivateVars) {
4199     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4200     Privates.emplace_back(
4201         C.getDeclAlign(VD),
4202         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4203                          /*PrivateElemInit=*/nullptr));
4204     ++I;
4205   }
4206   I = Data.FirstprivateCopies.begin();
4207   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4208   for (const Expr *E : Data.FirstprivateVars) {
4209     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4210     Privates.emplace_back(
4211         C.getDeclAlign(VD),
4212         PrivateHelpersTy(
4213             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4214             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4215     ++I;
4216     ++IElemInitRef;
4217   }
4218   I = Data.LastprivateCopies.begin();
4219   for (const Expr *E : Data.LastprivateVars) {
4220     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4221     Privates.emplace_back(
4222         C.getDeclAlign(VD),
4223         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4224                          /*PrivateElemInit=*/nullptr));
4225     ++I;
4226   }
4227   for (const VarDecl *VD : Data.PrivateLocals) {
4228     if (isAllocatableDecl(VD))
4229       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4230     else
4231       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4232   }
4233   llvm::stable_sort(Privates,
4234                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4235                       return L.first > R.first;
4236                     });
4237   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4238   // Build type kmp_routine_entry_t (if not built yet).
4239   emitKmpRoutineEntryT(KmpInt32Ty);
4240   // Build type kmp_task_t (if not built yet).
4241   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4242     if (SavedKmpTaskloopTQTy.isNull()) {
4243       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4244           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4245     }
4246     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4247   } else {
4248     assert((D.getDirectiveKind() == OMPD_task ||
4249             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4250             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4251            "Expected taskloop, task or target directive");
4252     if (SavedKmpTaskTQTy.isNull()) {
4253       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4254           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4255     }
4256     KmpTaskTQTy = SavedKmpTaskTQTy;
4257   }
4258   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4259   // Build particular struct kmp_task_t for the given task.
4260   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4261       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4262   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4263   QualType KmpTaskTWithPrivatesPtrQTy =
4264       C.getPointerType(KmpTaskTWithPrivatesQTy);
4265   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4266   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4267       KmpTaskTWithPrivatesTy->getPointerTo();
4268   llvm::Value *KmpTaskTWithPrivatesTySize =
4269       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4270   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4271 
4272   // Emit initial values for private copies (if any).
4273   llvm::Value *TaskPrivatesMap = nullptr;
4274   llvm::Type *TaskPrivatesMapTy =
4275       std::next(TaskFunction->arg_begin(), 3)->getType();
4276   if (!Privates.empty()) {
4277     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4278     TaskPrivatesMap =
4279         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4280     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4281         TaskPrivatesMap, TaskPrivatesMapTy);
4282   } else {
4283     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4284         cast<llvm::PointerType>(TaskPrivatesMapTy));
4285   }
4286   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4287   // kmp_task_t *tt);
4288   llvm::Function *TaskEntry = emitProxyTaskFunction(
4289       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4290       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4291       TaskPrivatesMap);
4292 
4293   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4294   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4295   // kmp_routine_entry_t *task_entry);
4296   // Task flags. Format is taken from
4297   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4298   // description of kmp_tasking_flags struct.
4299   enum {
4300     TiedFlag = 0x1,
4301     FinalFlag = 0x2,
4302     DestructorsFlag = 0x8,
4303     PriorityFlag = 0x20,
4304     DetachableFlag = 0x40,
4305   };
4306   unsigned Flags = Data.Tied ? TiedFlag : 0;
4307   bool NeedsCleanup = false;
4308   if (!Privates.empty()) {
4309     NeedsCleanup =
4310         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4311     if (NeedsCleanup)
4312       Flags = Flags | DestructorsFlag;
4313   }
4314   if (Data.Priority.getInt())
4315     Flags = Flags | PriorityFlag;
4316   if (D.hasClausesOfKind<OMPDetachClause>())
4317     Flags = Flags | DetachableFlag;
4318   llvm::Value *TaskFlags =
4319       Data.Final.getPointer()
4320           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4321                                      CGF.Builder.getInt32(FinalFlag),
4322                                      CGF.Builder.getInt32(/*C=*/0))
4323           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4324   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4325   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4326   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4327       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4328       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4329           TaskEntry, KmpRoutineEntryPtrTy)};
4330   llvm::Value *NewTask;
4331   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4332     // Check if we have any device clause associated with the directive.
4333     const Expr *Device = nullptr;
4334     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4335       Device = C->getDevice();
4336     // Emit device ID if any otherwise use default value.
4337     llvm::Value *DeviceID;
4338     if (Device)
4339       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4340                                            CGF.Int64Ty, /*isSigned=*/true);
4341     else
4342       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4343     AllocArgs.push_back(DeviceID);
4344     NewTask = CGF.EmitRuntimeCall(
4345         OMPBuilder.getOrCreateRuntimeFunction(
4346             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4347         AllocArgs);
4348   } else {
4349     NewTask =
4350         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4351                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4352                             AllocArgs);
4353   }
4354   // Emit detach clause initialization.
4355   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4356   // task_descriptor);
4357   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4358     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4359     LValue EvtLVal = CGF.EmitLValue(Evt);
4360 
4361     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4362     // int gtid, kmp_task_t *task);
4363     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4364     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4365     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4366     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4367         OMPBuilder.getOrCreateRuntimeFunction(
4368             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4369         {Loc, Tid, NewTask});
4370     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4371                                       Evt->getExprLoc());
4372     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4373   }
4374   // Process affinity clauses.
4375   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4376     // Process list of affinity data.
4377     ASTContext &C = CGM.getContext();
4378     Address AffinitiesArray = Address::invalid();
4379     // Calculate number of elements to form the array of affinity data.
4380     llvm::Value *NumOfElements = nullptr;
4381     unsigned NumAffinities = 0;
4382     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4383       if (const Expr *Modifier = C->getModifier()) {
4384         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4385         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4386           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4387           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4388           NumOfElements =
4389               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4390         }
4391       } else {
4392         NumAffinities += C->varlist_size();
4393       }
4394     }
4395     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4396     // Fields ids in kmp_task_affinity_info record.
4397     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4398 
4399     QualType KmpTaskAffinityInfoArrayTy;
4400     if (NumOfElements) {
4401       NumOfElements = CGF.Builder.CreateNUWAdd(
4402           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4403       OpaqueValueExpr OVE(
4404           Loc,
4405           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4406           VK_PRValue);
4407       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4408                                                     RValue::get(NumOfElements));
4409       KmpTaskAffinityInfoArrayTy =
4410           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4411                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4412       // Properly emit variable-sized array.
4413       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4414                                            ImplicitParamDecl::Other);
4415       CGF.EmitVarDecl(*PD);
4416       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4417       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4418                                                 /*isSigned=*/false);
4419     } else {
4420       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4421           KmpTaskAffinityInfoTy,
4422           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4423           ArrayType::Normal, /*IndexTypeQuals=*/0);
4424       AffinitiesArray =
4425           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4426       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4427       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4428                                              /*isSigned=*/false);
4429     }
4430 
4431     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4432     // Fill array by elements without iterators.
4433     unsigned Pos = 0;
4434     bool HasIterator = false;
4435     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4436       if (C->getModifier()) {
4437         HasIterator = true;
4438         continue;
4439       }
4440       for (const Expr *E : C->varlists()) {
4441         llvm::Value *Addr;
4442         llvm::Value *Size;
4443         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4444         LValue Base =
4445             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4446                                KmpTaskAffinityInfoTy);
4447         // affs[i].base_addr = &<Affinities[i].second>;
4448         LValue BaseAddrLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4450         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4451                               BaseAddrLVal);
4452         // affs[i].len = sizeof(<Affinities[i].second>);
4453         LValue LenLVal = CGF.EmitLValueForField(
4454             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4455         CGF.EmitStoreOfScalar(Size, LenLVal);
4456         ++Pos;
4457       }
4458     }
4459     LValue PosLVal;
4460     if (HasIterator) {
4461       PosLVal = CGF.MakeAddrLValue(
4462           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4463           C.getSizeType());
4464       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4465     }
4466     // Process elements with iterators.
4467     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4468       const Expr *Modifier = C->getModifier();
4469       if (!Modifier)
4470         continue;
4471       OMPIteratorGeneratorScope IteratorScope(
4472           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4473       for (const Expr *E : C->varlists()) {
4474         llvm::Value *Addr;
4475         llvm::Value *Size;
4476         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4477         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4478         LValue Base = CGF.MakeAddrLValue(
4479             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4480                                           AffinitiesArray.getPointer(), Idx),
4481                     AffinitiesArray.getAlignment()),
4482             KmpTaskAffinityInfoTy);
4483         // affs[i].base_addr = &<Affinities[i].second>;
4484         LValue BaseAddrLVal = CGF.EmitLValueForField(
4485             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4486         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4487                               BaseAddrLVal);
4488         // affs[i].len = sizeof(<Affinities[i].second>);
4489         LValue LenLVal = CGF.EmitLValueForField(
4490             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4491         CGF.EmitStoreOfScalar(Size, LenLVal);
4492         Idx = CGF.Builder.CreateNUWAdd(
4493             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4494         CGF.EmitStoreOfScalar(Idx, PosLVal);
4495       }
4496     }
4497     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4498     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4499     // naffins, kmp_task_affinity_info_t *affin_list);
4500     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4501     llvm::Value *GTid = getThreadID(CGF, Loc);
4502     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4503         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4504     // FIXME: Emit the function and ignore its result for now unless the
4505     // runtime function is properly implemented.
4506     (void)CGF.EmitRuntimeCall(
4507         OMPBuilder.getOrCreateRuntimeFunction(
4508             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4509         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4510   }
4511   llvm::Value *NewTaskNewTaskTTy =
4512       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4513           NewTask, KmpTaskTWithPrivatesPtrTy);
4514   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4515                                                KmpTaskTWithPrivatesQTy);
4516   LValue TDBase =
4517       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4518   // Fill the data in the resulting kmp_task_t record.
4519   // Copy shareds if there are any.
4520   Address KmpTaskSharedsPtr = Address::invalid();
4521   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4522     KmpTaskSharedsPtr =
4523         Address(CGF.EmitLoadOfScalar(
4524                     CGF.EmitLValueForField(
4525                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4526                                            KmpTaskTShareds)),
4527                     Loc),
4528                 CGM.getNaturalTypeAlignment(SharedsTy));
4529     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4530     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4531     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4532   }
4533   // Emit initial values for private copies (if any).
4534   TaskResultTy Result;
4535   if (!Privates.empty()) {
4536     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4537                      SharedsTy, SharedsPtrTy, Data, Privates,
4538                      /*ForDup=*/false);
4539     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4540         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4541       Result.TaskDupFn = emitTaskDupFunction(
4542           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4543           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4544           /*WithLastIter=*/!Data.LastprivateVars.empty());
4545     }
4546   }
4547   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4548   enum { Priority = 0, Destructors = 1 };
4549   // Provide pointer to function with destructors for privates.
4550   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4551   const RecordDecl *KmpCmplrdataUD =
4552       (*FI)->getType()->getAsUnionType()->getDecl();
4553   if (NeedsCleanup) {
4554     llvm::Value *DestructorFn = emitDestructorsFunction(
4555         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4556         KmpTaskTWithPrivatesQTy);
4557     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4558     LValue DestructorsLV = CGF.EmitLValueForField(
4559         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4560     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4561                               DestructorFn, KmpRoutineEntryPtrTy),
4562                           DestructorsLV);
4563   }
4564   // Set priority.
4565   if (Data.Priority.getInt()) {
4566     LValue Data2LV = CGF.EmitLValueForField(
4567         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4568     LValue PriorityLV = CGF.EmitLValueForField(
4569         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4570     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4571   }
4572   Result.NewTask = NewTask;
4573   Result.TaskEntry = TaskEntry;
4574   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4575   Result.TDBase = TDBase;
4576   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4577   return Result;
4578 }
4579 
4580 namespace {
4581 /// Dependence kind for RTL.
4582 enum RTLDependenceKindTy {
4583   DepIn = 0x01,
4584   DepInOut = 0x3,
4585   DepMutexInOutSet = 0x4
4586 };
4587 /// Fields ids in kmp_depend_info record.
4588 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4589 } // namespace
4590 
4591 /// Translates internal dependency kind into the runtime kind.
4592 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4593   RTLDependenceKindTy DepKind;
4594   switch (K) {
4595   case OMPC_DEPEND_in:
4596     DepKind = DepIn;
4597     break;
4598   // Out and InOut dependencies must use the same code.
4599   case OMPC_DEPEND_out:
4600   case OMPC_DEPEND_inout:
4601     DepKind = DepInOut;
4602     break;
4603   case OMPC_DEPEND_mutexinoutset:
4604     DepKind = DepMutexInOutSet;
4605     break;
4606   case OMPC_DEPEND_source:
4607   case OMPC_DEPEND_sink:
4608   case OMPC_DEPEND_depobj:
4609   case OMPC_DEPEND_unknown:
4610     llvm_unreachable("Unknown task dependence type");
4611   }
4612   return DepKind;
4613 }
4614 
4615 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4616 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4617                            QualType &FlagsTy) {
4618   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4619   if (KmpDependInfoTy.isNull()) {
4620     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4621     KmpDependInfoRD->startDefinition();
4622     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4623     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4624     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4625     KmpDependInfoRD->completeDefinition();
4626     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4627   }
4628 }
4629 
4630 std::pair<llvm::Value *, LValue>
4631 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4632                                    SourceLocation Loc) {
4633   ASTContext &C = CGM.getContext();
4634   QualType FlagsTy;
4635   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4636   RecordDecl *KmpDependInfoRD =
4637       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4638   LValue Base = CGF.EmitLoadOfPointerLValue(
4639       DepobjLVal.getAddress(CGF),
4640       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4641   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4642   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4643           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4644   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4645                             Base.getTBAAInfo());
4646   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4647       Addr.getElementType(), Addr.getPointer(),
4648       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4649   LValue NumDepsBase = CGF.MakeAddrLValue(
4650       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4651       Base.getBaseInfo(), Base.getTBAAInfo());
4652   // NumDeps = deps[i].base_addr;
4653   LValue BaseAddrLVal = CGF.EmitLValueForField(
4654       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4655   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4656   return std::make_pair(NumDeps, Base);
4657 }
4658 
4659 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4660                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4661                            const OMPTaskDataTy::DependData &Data,
4662                            Address DependenciesArray) {
4663   CodeGenModule &CGM = CGF.CGM;
4664   ASTContext &C = CGM.getContext();
4665   QualType FlagsTy;
4666   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4667   RecordDecl *KmpDependInfoRD =
4668       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4669   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4670 
4671   OMPIteratorGeneratorScope IteratorScope(
4672       CGF, cast_or_null<OMPIteratorExpr>(
4673                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4674                                  : nullptr));
4675   for (const Expr *E : Data.DepExprs) {
4676     llvm::Value *Addr;
4677     llvm::Value *Size;
4678     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4679     LValue Base;
4680     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4681       Base = CGF.MakeAddrLValue(
4682           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4683     } else {
4684       LValue &PosLVal = *Pos.get<LValue *>();
4685       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4686       Base = CGF.MakeAddrLValue(
4687           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4688                                         DependenciesArray.getPointer(), Idx),
4689                   DependenciesArray.getAlignment()),
4690           KmpDependInfoTy);
4691     }
4692     // deps[i].base_addr = &<Dependencies[i].second>;
4693     LValue BaseAddrLVal = CGF.EmitLValueForField(
4694         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4695     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4696                           BaseAddrLVal);
4697     // deps[i].len = sizeof(<Dependencies[i].second>);
4698     LValue LenLVal = CGF.EmitLValueForField(
4699         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4700     CGF.EmitStoreOfScalar(Size, LenLVal);
4701     // deps[i].flags = <Dependencies[i].first>;
4702     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4703     LValue FlagsLVal = CGF.EmitLValueForField(
4704         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4705     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4706                           FlagsLVal);
4707     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4708       ++(*P);
4709     } else {
4710       LValue &PosLVal = *Pos.get<LValue *>();
4711       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4712       Idx = CGF.Builder.CreateNUWAdd(Idx,
4713                                      llvm::ConstantInt::get(Idx->getType(), 1));
4714       CGF.EmitStoreOfScalar(Idx, PosLVal);
4715     }
4716   }
4717 }
4718 
4719 static SmallVector<llvm::Value *, 4>
4720 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4721                         const OMPTaskDataTy::DependData &Data) {
4722   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4723          "Expected depobj dependecy kind.");
4724   SmallVector<llvm::Value *, 4> Sizes;
4725   SmallVector<LValue, 4> SizeLVals;
4726   ASTContext &C = CGF.getContext();
4727   QualType FlagsTy;
4728   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4729   RecordDecl *KmpDependInfoRD =
4730       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4731   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4732   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4733   {
4734     OMPIteratorGeneratorScope IteratorScope(
4735         CGF, cast_or_null<OMPIteratorExpr>(
4736                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4737                                    : nullptr));
4738     for (const Expr *E : Data.DepExprs) {
4739       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4740       LValue Base = CGF.EmitLoadOfPointerLValue(
4741           DepobjLVal.getAddress(CGF),
4742           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4743       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4744           Base.getAddress(CGF), KmpDependInfoPtrT);
4745       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4746                                 Base.getTBAAInfo());
4747       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4748           Addr.getElementType(), Addr.getPointer(),
4749           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4750       LValue NumDepsBase = CGF.MakeAddrLValue(
4751           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4752           Base.getBaseInfo(), Base.getTBAAInfo());
4753       // NumDeps = deps[i].base_addr;
4754       LValue BaseAddrLVal = CGF.EmitLValueForField(
4755           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4756       llvm::Value *NumDeps =
4757           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4758       LValue NumLVal = CGF.MakeAddrLValue(
4759           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4760           C.getUIntPtrType());
4761       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4762                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4763       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4764       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4765       CGF.EmitStoreOfScalar(Add, NumLVal);
4766       SizeLVals.push_back(NumLVal);
4767     }
4768   }
4769   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4770     llvm::Value *Size =
4771         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4772     Sizes.push_back(Size);
4773   }
4774   return Sizes;
4775 }
4776 
4777 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4778                                LValue PosLVal,
4779                                const OMPTaskDataTy::DependData &Data,
4780                                Address DependenciesArray) {
4781   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4782          "Expected depobj dependecy kind.");
4783   ASTContext &C = CGF.getContext();
4784   QualType FlagsTy;
4785   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4786   RecordDecl *KmpDependInfoRD =
4787       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4788   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4789   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4790   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4791   {
4792     OMPIteratorGeneratorScope IteratorScope(
4793         CGF, cast_or_null<OMPIteratorExpr>(
4794                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4795                                    : nullptr));
4796     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4797       const Expr *E = Data.DepExprs[I];
4798       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4799       LValue Base = CGF.EmitLoadOfPointerLValue(
4800           DepobjLVal.getAddress(CGF),
4801           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4802       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4803           Base.getAddress(CGF), KmpDependInfoPtrT);
4804       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4805                                 Base.getTBAAInfo());
4806 
4807       // Get number of elements in a single depobj.
4808       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4809           Addr.getElementType(), Addr.getPointer(),
4810           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4811       LValue NumDepsBase = CGF.MakeAddrLValue(
4812           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4813           Base.getBaseInfo(), Base.getTBAAInfo());
4814       // NumDeps = deps[i].base_addr;
4815       LValue BaseAddrLVal = CGF.EmitLValueForField(
4816           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4817       llvm::Value *NumDeps =
4818           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4819 
4820       // memcopy dependency data.
4821       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4822           ElSize,
4823           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4824       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4825       Address DepAddr =
4826           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4827                                         DependenciesArray.getPointer(), Pos),
4828                   DependenciesArray.getAlignment());
4829       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4830 
4831       // Increase pos.
4832       // pos += size;
4833       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4834       CGF.EmitStoreOfScalar(Add, PosLVal);
4835     }
4836   }
4837 }
4838 
4839 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4840     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4841     SourceLocation Loc) {
4842   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4843         return D.DepExprs.empty();
4844       }))
4845     return std::make_pair(nullptr, Address::invalid());
4846   // Process list of dependencies.
4847   ASTContext &C = CGM.getContext();
4848   Address DependenciesArray = Address::invalid();
4849   llvm::Value *NumOfElements = nullptr;
4850   unsigned NumDependencies = std::accumulate(
4851       Dependencies.begin(), Dependencies.end(), 0,
4852       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4853         return D.DepKind == OMPC_DEPEND_depobj
4854                    ? V
4855                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4856       });
4857   QualType FlagsTy;
4858   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4859   bool HasDepobjDeps = false;
4860   bool HasRegularWithIterators = false;
4861   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4862   llvm::Value *NumOfRegularWithIterators =
4863       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4864   // Calculate number of depobj dependecies and regular deps with the iterators.
4865   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4866     if (D.DepKind == OMPC_DEPEND_depobj) {
4867       SmallVector<llvm::Value *, 4> Sizes =
4868           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4869       for (llvm::Value *Size : Sizes) {
4870         NumOfDepobjElements =
4871             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4872       }
4873       HasDepobjDeps = true;
4874       continue;
4875     }
4876     // Include number of iterations, if any.
4877     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4878       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4879         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4880         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4881         NumOfRegularWithIterators =
4882             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4883       }
4884       HasRegularWithIterators = true;
4885       continue;
4886     }
4887   }
4888 
4889   QualType KmpDependInfoArrayTy;
4890   if (HasDepobjDeps || HasRegularWithIterators) {
4891     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4892                                            /*isSigned=*/false);
4893     if (HasDepobjDeps) {
4894       NumOfElements =
4895           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4896     }
4897     if (HasRegularWithIterators) {
4898       NumOfElements =
4899           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4900     }
4901     OpaqueValueExpr OVE(Loc,
4902                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4903                         VK_PRValue);
4904     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4905                                                   RValue::get(NumOfElements));
4906     KmpDependInfoArrayTy =
4907         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4908                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4909     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4910     // Properly emit variable-sized array.
4911     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4912                                          ImplicitParamDecl::Other);
4913     CGF.EmitVarDecl(*PD);
4914     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4915     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4916                                               /*isSigned=*/false);
4917   } else {
4918     KmpDependInfoArrayTy = C.getConstantArrayType(
4919         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4920         ArrayType::Normal, /*IndexTypeQuals=*/0);
4921     DependenciesArray =
4922         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4923     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4924     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4925                                            /*isSigned=*/false);
4926   }
4927   unsigned Pos = 0;
4928   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4929     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4930         Dependencies[I].IteratorExpr)
4931       continue;
4932     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4933                    DependenciesArray);
4934   }
4935   // Copy regular dependecies with iterators.
4936   LValue PosLVal = CGF.MakeAddrLValue(
4937       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4938   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4939   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4940     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4941         !Dependencies[I].IteratorExpr)
4942       continue;
4943     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4944                    DependenciesArray);
4945   }
4946   // Copy final depobj arrays without iterators.
4947   if (HasDepobjDeps) {
4948     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4949       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4950         continue;
4951       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4952                          DependenciesArray);
4953     }
4954   }
4955   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4956       DependenciesArray, CGF.VoidPtrTy);
4957   return std::make_pair(NumOfElements, DependenciesArray);
4958 }
4959 
4960 Address CGOpenMPRuntime::emitDepobjDependClause(
4961     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4962     SourceLocation Loc) {
4963   if (Dependencies.DepExprs.empty())
4964     return Address::invalid();
4965   // Process list of dependencies.
4966   ASTContext &C = CGM.getContext();
4967   Address DependenciesArray = Address::invalid();
4968   unsigned NumDependencies = Dependencies.DepExprs.size();
4969   QualType FlagsTy;
4970   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4971   RecordDecl *KmpDependInfoRD =
4972       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4973 
4974   llvm::Value *Size;
4975   // Define type kmp_depend_info[<Dependencies.size()>];
4976   // For depobj reserve one extra element to store the number of elements.
4977   // It is required to handle depobj(x) update(in) construct.
4978   // kmp_depend_info[<Dependencies.size()>] deps;
4979   llvm::Value *NumDepsVal;
4980   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4981   if (const auto *IE =
4982           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4983     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4984     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4985       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4986       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4987       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4988     }
4989     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4990                                     NumDepsVal);
4991     CharUnits SizeInBytes =
4992         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4993     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4994     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4995     NumDepsVal =
4996         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4997   } else {
4998     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4999         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5000         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5001     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5002     Size = CGM.getSize(Sz.alignTo(Align));
5003     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5004   }
5005   // Need to allocate on the dynamic memory.
5006   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5007   // Use default allocator.
5008   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5009   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5010 
5011   llvm::Value *Addr =
5012       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5013                               CGM.getModule(), OMPRTL___kmpc_alloc),
5014                           Args, ".dep.arr.addr");
5015   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5016       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5017   DependenciesArray = Address(Addr, Align);
5018   // Write number of elements in the first element of array for depobj.
5019   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5020   // deps[i].base_addr = NumDependencies;
5021   LValue BaseAddrLVal = CGF.EmitLValueForField(
5022       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5023   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5024   llvm::PointerUnion<unsigned *, LValue *> Pos;
5025   unsigned Idx = 1;
5026   LValue PosLVal;
5027   if (Dependencies.IteratorExpr) {
5028     PosLVal = CGF.MakeAddrLValue(
5029         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5030         C.getSizeType());
5031     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5032                           /*IsInit=*/true);
5033     Pos = &PosLVal;
5034   } else {
5035     Pos = &Idx;
5036   }
5037   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5038   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5039       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5040   return DependenciesArray;
5041 }
5042 
5043 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5044                                         SourceLocation Loc) {
5045   ASTContext &C = CGM.getContext();
5046   QualType FlagsTy;
5047   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5048   LValue Base = CGF.EmitLoadOfPointerLValue(
5049       DepobjLVal.getAddress(CGF),
5050       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5051   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5052   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5053       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5054   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5055       Addr.getElementType(), Addr.getPointer(),
5056       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5057   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5058                                                                CGF.VoidPtrTy);
5059   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5060   // Use default allocator.
5061   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5062   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5063 
5064   // _kmpc_free(gtid, addr, nullptr);
5065   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5066                                 CGM.getModule(), OMPRTL___kmpc_free),
5067                             Args);
5068 }
5069 
5070 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5071                                        OpenMPDependClauseKind NewDepKind,
5072                                        SourceLocation Loc) {
5073   ASTContext &C = CGM.getContext();
5074   QualType FlagsTy;
5075   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5076   RecordDecl *KmpDependInfoRD =
5077       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5078   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5079   llvm::Value *NumDeps;
5080   LValue Base;
5081   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5082 
5083   Address Begin = Base.getAddress(CGF);
5084   // Cast from pointer to array type to pointer to single element.
5085   llvm::Value *End = CGF.Builder.CreateGEP(
5086       Begin.getElementType(), Begin.getPointer(), NumDeps);
5087   // The basic structure here is a while-do loop.
5088   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5089   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5090   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5091   CGF.EmitBlock(BodyBB);
5092   llvm::PHINode *ElementPHI =
5093       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5094   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5095   Begin = Address(ElementPHI, Begin.getAlignment());
5096   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5097                             Base.getTBAAInfo());
5098   // deps[i].flags = NewDepKind;
5099   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5100   LValue FlagsLVal = CGF.EmitLValueForField(
5101       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5102   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5103                         FlagsLVal);
5104 
5105   // Shift the address forward by one element.
5106   Address ElementNext =
5107       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5108   ElementPHI->addIncoming(ElementNext.getPointer(),
5109                           CGF.Builder.GetInsertBlock());
5110   llvm::Value *IsEmpty =
5111       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5112   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5113   // Done.
5114   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5115 }
5116 
5117 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5118                                    const OMPExecutableDirective &D,
5119                                    llvm::Function *TaskFunction,
5120                                    QualType SharedsTy, Address Shareds,
5121                                    const Expr *IfCond,
5122                                    const OMPTaskDataTy &Data) {
5123   if (!CGF.HaveInsertPoint())
5124     return;
5125 
5126   TaskResultTy Result =
5127       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5128   llvm::Value *NewTask = Result.NewTask;
5129   llvm::Function *TaskEntry = Result.TaskEntry;
5130   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5131   LValue TDBase = Result.TDBase;
5132   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5133   // Process list of dependences.
5134   Address DependenciesArray = Address::invalid();
5135   llvm::Value *NumOfElements;
5136   std::tie(NumOfElements, DependenciesArray) =
5137       emitDependClause(CGF, Data.Dependences, Loc);
5138 
5139   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5140   // libcall.
5141   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5142   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5143   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5144   // list is not empty
5145   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5146   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5147   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5148   llvm::Value *DepTaskArgs[7];
5149   if (!Data.Dependences.empty()) {
5150     DepTaskArgs[0] = UpLoc;
5151     DepTaskArgs[1] = ThreadID;
5152     DepTaskArgs[2] = NewTask;
5153     DepTaskArgs[3] = NumOfElements;
5154     DepTaskArgs[4] = DependenciesArray.getPointer();
5155     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5156     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5157   }
5158   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5159                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5160     if (!Data.Tied) {
5161       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5162       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5163       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5164     }
5165     if (!Data.Dependences.empty()) {
5166       CGF.EmitRuntimeCall(
5167           OMPBuilder.getOrCreateRuntimeFunction(
5168               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5169           DepTaskArgs);
5170     } else {
5171       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5172                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5173                           TaskArgs);
5174     }
5175     // Check if parent region is untied and build return for untied task;
5176     if (auto *Region =
5177             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5178       Region->emitUntiedSwitch(CGF);
5179   };
5180 
5181   llvm::Value *DepWaitTaskArgs[6];
5182   if (!Data.Dependences.empty()) {
5183     DepWaitTaskArgs[0] = UpLoc;
5184     DepWaitTaskArgs[1] = ThreadID;
5185     DepWaitTaskArgs[2] = NumOfElements;
5186     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5187     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5188     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5189   }
5190   auto &M = CGM.getModule();
5191   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5192                         TaskEntry, &Data, &DepWaitTaskArgs,
5193                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5194     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5195     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5196     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5197     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5198     // is specified.
5199     if (!Data.Dependences.empty())
5200       CGF.EmitRuntimeCall(
5201           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5202           DepWaitTaskArgs);
5203     // Call proxy_task_entry(gtid, new_task);
5204     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5205                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5206       Action.Enter(CGF);
5207       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5208       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5209                                                           OutlinedFnArgs);
5210     };
5211 
5212     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5213     // kmp_task_t *new_task);
5214     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5215     // kmp_task_t *new_task);
5216     RegionCodeGenTy RCG(CodeGen);
5217     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5218                               M, OMPRTL___kmpc_omp_task_begin_if0),
5219                           TaskArgs,
5220                           OMPBuilder.getOrCreateRuntimeFunction(
5221                               M, OMPRTL___kmpc_omp_task_complete_if0),
5222                           TaskArgs);
5223     RCG.setAction(Action);
5224     RCG(CGF);
5225   };
5226 
5227   if (IfCond) {
5228     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5229   } else {
5230     RegionCodeGenTy ThenRCG(ThenCodeGen);
5231     ThenRCG(CGF);
5232   }
5233 }
5234 
5235 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5236                                        const OMPLoopDirective &D,
5237                                        llvm::Function *TaskFunction,
5238                                        QualType SharedsTy, Address Shareds,
5239                                        const Expr *IfCond,
5240                                        const OMPTaskDataTy &Data) {
5241   if (!CGF.HaveInsertPoint())
5242     return;
5243   TaskResultTy Result =
5244       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5245   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5246   // libcall.
5247   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5248   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5249   // sched, kmp_uint64 grainsize, void *task_dup);
5250   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5251   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5252   llvm::Value *IfVal;
5253   if (IfCond) {
5254     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5255                                       /*isSigned=*/true);
5256   } else {
5257     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5258   }
5259 
5260   LValue LBLVal = CGF.EmitLValueForField(
5261       Result.TDBase,
5262       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5263   const auto *LBVar =
5264       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5265   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5266                        LBLVal.getQuals(),
5267                        /*IsInitializer=*/true);
5268   LValue UBLVal = CGF.EmitLValueForField(
5269       Result.TDBase,
5270       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5271   const auto *UBVar =
5272       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5273   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5274                        UBLVal.getQuals(),
5275                        /*IsInitializer=*/true);
5276   LValue StLVal = CGF.EmitLValueForField(
5277       Result.TDBase,
5278       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5279   const auto *StVar =
5280       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5281   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5282                        StLVal.getQuals(),
5283                        /*IsInitializer=*/true);
5284   // Store reductions address.
5285   LValue RedLVal = CGF.EmitLValueForField(
5286       Result.TDBase,
5287       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5288   if (Data.Reductions) {
5289     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5290   } else {
5291     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5292                                CGF.getContext().VoidPtrTy);
5293   }
5294   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5295   llvm::Value *TaskArgs[] = {
5296       UpLoc,
5297       ThreadID,
5298       Result.NewTask,
5299       IfVal,
5300       LBLVal.getPointer(CGF),
5301       UBLVal.getPointer(CGF),
5302       CGF.EmitLoadOfScalar(StLVal, Loc),
5303       llvm::ConstantInt::getSigned(
5304           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5305       llvm::ConstantInt::getSigned(
5306           CGF.IntTy, Data.Schedule.getPointer()
5307                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5308                          : NoSchedule),
5309       Data.Schedule.getPointer()
5310           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5311                                       /*isSigned=*/false)
5312           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5313       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5314                              Result.TaskDupFn, CGF.VoidPtrTy)
5315                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5316   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5317                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5318                       TaskArgs);
5319 }
5320 
5321 /// Emit reduction operation for each element of array (required for
5322 /// array sections) LHS op = RHS.
5323 /// \param Type Type of array.
5324 /// \param LHSVar Variable on the left side of the reduction operation
5325 /// (references element of array in original variable).
5326 /// \param RHSVar Variable on the right side of the reduction operation
5327 /// (references element of array in original variable).
5328 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5329 /// RHSVar.
5330 static void EmitOMPAggregateReduction(
5331     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5332     const VarDecl *RHSVar,
5333     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5334                                   const Expr *, const Expr *)> &RedOpGen,
5335     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5336     const Expr *UpExpr = nullptr) {
5337   // Perform element-by-element initialization.
5338   QualType ElementTy;
5339   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5340   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5341 
5342   // Drill down to the base element type on both arrays.
5343   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5344   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5345 
5346   llvm::Value *RHSBegin = RHSAddr.getPointer();
5347   llvm::Value *LHSBegin = LHSAddr.getPointer();
5348   // Cast from pointer to array type to pointer to single element.
5349   llvm::Value *LHSEnd =
5350       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5351   // The basic structure here is a while-do loop.
5352   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5353   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5354   llvm::Value *IsEmpty =
5355       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5356   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5357 
5358   // Enter the loop body, making that address the current address.
5359   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5360   CGF.EmitBlock(BodyBB);
5361 
5362   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5363 
5364   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5365       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5366   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5367   Address RHSElementCurrent =
5368       Address(RHSElementPHI,
5369               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5370 
5371   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5372       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5373   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5374   Address LHSElementCurrent =
5375       Address(LHSElementPHI,
5376               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5377 
5378   // Emit copy.
5379   CodeGenFunction::OMPPrivateScope Scope(CGF);
5380   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5381   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5382   Scope.Privatize();
5383   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5384   Scope.ForceCleanup();
5385 
5386   // Shift the address forward by one element.
5387   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5388       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5389       "omp.arraycpy.dest.element");
5390   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5391       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5392       "omp.arraycpy.src.element");
5393   // Check whether we've reached the end.
5394   llvm::Value *Done =
5395       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5396   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5397   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5398   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5399 
5400   // Done.
5401   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5402 }
5403 
5404 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5405 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5406 /// UDR combiner function.
5407 static void emitReductionCombiner(CodeGenFunction &CGF,
5408                                   const Expr *ReductionOp) {
5409   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5410     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5411       if (const auto *DRE =
5412               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5413         if (const auto *DRD =
5414                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5415           std::pair<llvm::Function *, llvm::Function *> Reduction =
5416               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5417           RValue Func = RValue::get(Reduction.first);
5418           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5419           CGF.EmitIgnoredExpr(ReductionOp);
5420           return;
5421         }
5422   CGF.EmitIgnoredExpr(ReductionOp);
5423 }
5424 
5425 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5426     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5427     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5428     ArrayRef<const Expr *> ReductionOps) {
5429   ASTContext &C = CGM.getContext();
5430 
5431   // void reduction_func(void *LHSArg, void *RHSArg);
5432   FunctionArgList Args;
5433   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5434                            ImplicitParamDecl::Other);
5435   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5436                            ImplicitParamDecl::Other);
5437   Args.push_back(&LHSArg);
5438   Args.push_back(&RHSArg);
5439   const auto &CGFI =
5440       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5441   std::string Name = getName({"omp", "reduction", "reduction_func"});
5442   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5443                                     llvm::GlobalValue::InternalLinkage, Name,
5444                                     &CGM.getModule());
5445   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5446   Fn->setDoesNotRecurse();
5447   CodeGenFunction CGF(CGM);
5448   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5449 
5450   // Dst = (void*[n])(LHSArg);
5451   // Src = (void*[n])(RHSArg);
5452   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5453       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5454       ArgsType), CGF.getPointerAlign());
5455   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5456       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5457       ArgsType), CGF.getPointerAlign());
5458 
5459   //  ...
5460   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5461   //  ...
5462   CodeGenFunction::OMPPrivateScope Scope(CGF);
5463   auto IPriv = Privates.begin();
5464   unsigned Idx = 0;
5465   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5466     const auto *RHSVar =
5467         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5468     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5469       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5470     });
5471     const auto *LHSVar =
5472         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5473     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5474       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5475     });
5476     QualType PrivTy = (*IPriv)->getType();
5477     if (PrivTy->isVariablyModifiedType()) {
5478       // Get array size and emit VLA type.
5479       ++Idx;
5480       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5481       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5482       const VariableArrayType *VLA =
5483           CGF.getContext().getAsVariableArrayType(PrivTy);
5484       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5485       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5486           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5487       CGF.EmitVariablyModifiedType(PrivTy);
5488     }
5489   }
5490   Scope.Privatize();
5491   IPriv = Privates.begin();
5492   auto ILHS = LHSExprs.begin();
5493   auto IRHS = RHSExprs.begin();
5494   for (const Expr *E : ReductionOps) {
5495     if ((*IPriv)->getType()->isArrayType()) {
5496       // Emit reduction for array section.
5497       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5498       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5499       EmitOMPAggregateReduction(
5500           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5501           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5502             emitReductionCombiner(CGF, E);
5503           });
5504     } else {
5505       // Emit reduction for array subscript or single variable.
5506       emitReductionCombiner(CGF, E);
5507     }
5508     ++IPriv;
5509     ++ILHS;
5510     ++IRHS;
5511   }
5512   Scope.ForceCleanup();
5513   CGF.FinishFunction();
5514   return Fn;
5515 }
5516 
5517 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5518                                                   const Expr *ReductionOp,
5519                                                   const Expr *PrivateRef,
5520                                                   const DeclRefExpr *LHS,
5521                                                   const DeclRefExpr *RHS) {
5522   if (PrivateRef->getType()->isArrayType()) {
5523     // Emit reduction for array section.
5524     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5525     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5526     EmitOMPAggregateReduction(
5527         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5528         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5529           emitReductionCombiner(CGF, ReductionOp);
5530         });
5531   } else {
5532     // Emit reduction for array subscript or single variable.
5533     emitReductionCombiner(CGF, ReductionOp);
5534   }
5535 }
5536 
5537 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5538                                     ArrayRef<const Expr *> Privates,
5539                                     ArrayRef<const Expr *> LHSExprs,
5540                                     ArrayRef<const Expr *> RHSExprs,
5541                                     ArrayRef<const Expr *> ReductionOps,
5542                                     ReductionOptionsTy Options) {
5543   if (!CGF.HaveInsertPoint())
5544     return;
5545 
5546   bool WithNowait = Options.WithNowait;
5547   bool SimpleReduction = Options.SimpleReduction;
5548 
5549   // Next code should be emitted for reduction:
5550   //
5551   // static kmp_critical_name lock = { 0 };
5552   //
5553   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5554   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5555   //  ...
5556   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5557   //  *(Type<n>-1*)rhs[<n>-1]);
5558   // }
5559   //
5560   // ...
5561   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5562   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5563   // RedList, reduce_func, &<lock>)) {
5564   // case 1:
5565   //  ...
5566   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5567   //  ...
5568   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5569   // break;
5570   // case 2:
5571   //  ...
5572   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5573   //  ...
5574   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5575   // break;
5576   // default:;
5577   // }
5578   //
5579   // if SimpleReduction is true, only the next code is generated:
5580   //  ...
5581   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5582   //  ...
5583 
5584   ASTContext &C = CGM.getContext();
5585 
5586   if (SimpleReduction) {
5587     CodeGenFunction::RunCleanupsScope Scope(CGF);
5588     auto IPriv = Privates.begin();
5589     auto ILHS = LHSExprs.begin();
5590     auto IRHS = RHSExprs.begin();
5591     for (const Expr *E : ReductionOps) {
5592       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5593                                   cast<DeclRefExpr>(*IRHS));
5594       ++IPriv;
5595       ++ILHS;
5596       ++IRHS;
5597     }
5598     return;
5599   }
5600 
5601   // 1. Build a list of reduction variables.
5602   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5603   auto Size = RHSExprs.size();
5604   for (const Expr *E : Privates) {
5605     if (E->getType()->isVariablyModifiedType())
5606       // Reserve place for array size.
5607       ++Size;
5608   }
5609   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5610   QualType ReductionArrayTy =
5611       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5612                              /*IndexTypeQuals=*/0);
5613   Address ReductionList =
5614       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5615   auto IPriv = Privates.begin();
5616   unsigned Idx = 0;
5617   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5618     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5619     CGF.Builder.CreateStore(
5620         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5621             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5622         Elem);
5623     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5624       // Store array size.
5625       ++Idx;
5626       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5627       llvm::Value *Size = CGF.Builder.CreateIntCast(
5628           CGF.getVLASize(
5629                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5630               .NumElts,
5631           CGF.SizeTy, /*isSigned=*/false);
5632       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5633                               Elem);
5634     }
5635   }
5636 
5637   // 2. Emit reduce_func().
5638   llvm::Function *ReductionFn = emitReductionFunction(
5639       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5640       LHSExprs, RHSExprs, ReductionOps);
5641 
5642   // 3. Create static kmp_critical_name lock = { 0 };
5643   std::string Name = getName({"reduction"});
5644   llvm::Value *Lock = getCriticalRegionLock(Name);
5645 
5646   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5647   // RedList, reduce_func, &<lock>);
5648   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5649   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5650   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5651   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5652       ReductionList.getPointer(), CGF.VoidPtrTy);
5653   llvm::Value *Args[] = {
5654       IdentTLoc,                             // ident_t *<loc>
5655       ThreadId,                              // i32 <gtid>
5656       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5657       ReductionArrayTySize,                  // size_type sizeof(RedList)
5658       RL,                                    // void *RedList
5659       ReductionFn, // void (*) (void *, void *) <reduce_func>
5660       Lock         // kmp_critical_name *&<lock>
5661   };
5662   llvm::Value *Res = CGF.EmitRuntimeCall(
5663       OMPBuilder.getOrCreateRuntimeFunction(
5664           CGM.getModule(),
5665           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5666       Args);
5667 
5668   // 5. Build switch(res)
5669   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5670   llvm::SwitchInst *SwInst =
5671       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5672 
5673   // 6. Build case 1:
5674   //  ...
5675   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5676   //  ...
5677   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5678   // break;
5679   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5680   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5681   CGF.EmitBlock(Case1BB);
5682 
5683   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5684   llvm::Value *EndArgs[] = {
5685       IdentTLoc, // ident_t *<loc>
5686       ThreadId,  // i32 <gtid>
5687       Lock       // kmp_critical_name *&<lock>
5688   };
5689   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5690                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5691     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5692     auto IPriv = Privates.begin();
5693     auto ILHS = LHSExprs.begin();
5694     auto IRHS = RHSExprs.begin();
5695     for (const Expr *E : ReductionOps) {
5696       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5697                                      cast<DeclRefExpr>(*IRHS));
5698       ++IPriv;
5699       ++ILHS;
5700       ++IRHS;
5701     }
5702   };
5703   RegionCodeGenTy RCG(CodeGen);
5704   CommonActionTy Action(
5705       nullptr, llvm::None,
5706       OMPBuilder.getOrCreateRuntimeFunction(
5707           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5708                                       : OMPRTL___kmpc_end_reduce),
5709       EndArgs);
5710   RCG.setAction(Action);
5711   RCG(CGF);
5712 
5713   CGF.EmitBranch(DefaultBB);
5714 
5715   // 7. Build case 2:
5716   //  ...
5717   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5718   //  ...
5719   // break;
5720   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5721   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5722   CGF.EmitBlock(Case2BB);
5723 
5724   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5725                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5726     auto ILHS = LHSExprs.begin();
5727     auto IRHS = RHSExprs.begin();
5728     auto IPriv = Privates.begin();
5729     for (const Expr *E : ReductionOps) {
5730       const Expr *XExpr = nullptr;
5731       const Expr *EExpr = nullptr;
5732       const Expr *UpExpr = nullptr;
5733       BinaryOperatorKind BO = BO_Comma;
5734       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5735         if (BO->getOpcode() == BO_Assign) {
5736           XExpr = BO->getLHS();
5737           UpExpr = BO->getRHS();
5738         }
5739       }
5740       // Try to emit update expression as a simple atomic.
5741       const Expr *RHSExpr = UpExpr;
5742       if (RHSExpr) {
5743         // Analyze RHS part of the whole expression.
5744         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5745                 RHSExpr->IgnoreParenImpCasts())) {
5746           // If this is a conditional operator, analyze its condition for
5747           // min/max reduction operator.
5748           RHSExpr = ACO->getCond();
5749         }
5750         if (const auto *BORHS =
5751                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5752           EExpr = BORHS->getRHS();
5753           BO = BORHS->getOpcode();
5754         }
5755       }
5756       if (XExpr) {
5757         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5758         auto &&AtomicRedGen = [BO, VD,
5759                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5760                                     const Expr *EExpr, const Expr *UpExpr) {
5761           LValue X = CGF.EmitLValue(XExpr);
5762           RValue E;
5763           if (EExpr)
5764             E = CGF.EmitAnyExpr(EExpr);
5765           CGF.EmitOMPAtomicSimpleUpdateExpr(
5766               X, E, BO, /*IsXLHSInRHSPart=*/true,
5767               llvm::AtomicOrdering::Monotonic, Loc,
5768               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5769                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5770                 PrivateScope.addPrivate(
5771                     VD, [&CGF, VD, XRValue, Loc]() {
5772                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5773                       CGF.emitOMPSimpleStore(
5774                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5775                           VD->getType().getNonReferenceType(), Loc);
5776                       return LHSTemp;
5777                     });
5778                 (void)PrivateScope.Privatize();
5779                 return CGF.EmitAnyExpr(UpExpr);
5780               });
5781         };
5782         if ((*IPriv)->getType()->isArrayType()) {
5783           // Emit atomic reduction for array section.
5784           const auto *RHSVar =
5785               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5786           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5787                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5788         } else {
5789           // Emit atomic reduction for array subscript or single variable.
5790           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5791         }
5792       } else {
5793         // Emit as a critical region.
5794         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5795                                            const Expr *, const Expr *) {
5796           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5797           std::string Name = RT.getName({"atomic_reduction"});
5798           RT.emitCriticalRegion(
5799               CGF, Name,
5800               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5801                 Action.Enter(CGF);
5802                 emitReductionCombiner(CGF, E);
5803               },
5804               Loc);
5805         };
5806         if ((*IPriv)->getType()->isArrayType()) {
5807           const auto *LHSVar =
5808               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5809           const auto *RHSVar =
5810               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5811           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5812                                     CritRedGen);
5813         } else {
5814           CritRedGen(CGF, nullptr, nullptr, nullptr);
5815         }
5816       }
5817       ++ILHS;
5818       ++IRHS;
5819       ++IPriv;
5820     }
5821   };
5822   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5823   if (!WithNowait) {
5824     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5825     llvm::Value *EndArgs[] = {
5826         IdentTLoc, // ident_t *<loc>
5827         ThreadId,  // i32 <gtid>
5828         Lock       // kmp_critical_name *&<lock>
5829     };
5830     CommonActionTy Action(nullptr, llvm::None,
5831                           OMPBuilder.getOrCreateRuntimeFunction(
5832                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5833                           EndArgs);
5834     AtomicRCG.setAction(Action);
5835     AtomicRCG(CGF);
5836   } else {
5837     AtomicRCG(CGF);
5838   }
5839 
5840   CGF.EmitBranch(DefaultBB);
5841   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5842 }
5843 
5844 /// Generates unique name for artificial threadprivate variables.
5845 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5846 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5847                                       const Expr *Ref) {
5848   SmallString<256> Buffer;
5849   llvm::raw_svector_ostream Out(Buffer);
5850   const clang::DeclRefExpr *DE;
5851   const VarDecl *D = ::getBaseDecl(Ref, DE);
5852   if (!D)
5853     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5854   D = D->getCanonicalDecl();
5855   std::string Name = CGM.getOpenMPRuntime().getName(
5856       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5857   Out << Prefix << Name << "_"
5858       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5859   return std::string(Out.str());
5860 }
5861 
5862 /// Emits reduction initializer function:
5863 /// \code
5864 /// void @.red_init(void* %arg, void* %orig) {
5865 /// %0 = bitcast void* %arg to <type>*
5866 /// store <type> <init>, <type>* %0
5867 /// ret void
5868 /// }
5869 /// \endcode
5870 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5871                                            SourceLocation Loc,
5872                                            ReductionCodeGen &RCG, unsigned N) {
5873   ASTContext &C = CGM.getContext();
5874   QualType VoidPtrTy = C.VoidPtrTy;
5875   VoidPtrTy.addRestrict();
5876   FunctionArgList Args;
5877   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5878                           ImplicitParamDecl::Other);
5879   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5880                               ImplicitParamDecl::Other);
5881   Args.emplace_back(&Param);
5882   Args.emplace_back(&ParamOrig);
5883   const auto &FnInfo =
5884       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5885   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5886   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5887   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5888                                     Name, &CGM.getModule());
5889   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5890   Fn->setDoesNotRecurse();
5891   CodeGenFunction CGF(CGM);
5892   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5893   Address PrivateAddr = CGF.EmitLoadOfPointer(
5894       CGF.GetAddrOfLocalVar(&Param),
5895       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5896   llvm::Value *Size = nullptr;
5897   // If the size of the reduction item is non-constant, load it from global
5898   // threadprivate variable.
5899   if (RCG.getSizes(N).second) {
5900     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5901         CGF, CGM.getContext().getSizeType(),
5902         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5903     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5904                                 CGM.getContext().getSizeType(), Loc);
5905   }
5906   RCG.emitAggregateType(CGF, N, Size);
5907   LValue OrigLVal;
5908   // If initializer uses initializer from declare reduction construct, emit a
5909   // pointer to the address of the original reduction item (reuired by reduction
5910   // initializer)
5911   if (RCG.usesReductionInitializer(N)) {
5912     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5913     SharedAddr = CGF.EmitLoadOfPointer(
5914         SharedAddr,
5915         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5916     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5917   } else {
5918     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5919         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5920         CGM.getContext().VoidPtrTy);
5921   }
5922   // Emit the initializer:
5923   // %0 = bitcast void* %arg to <type>*
5924   // store <type> <init>, <type>* %0
5925   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5926                          [](CodeGenFunction &) { return false; });
5927   CGF.FinishFunction();
5928   return Fn;
5929 }
5930 
5931 /// Emits reduction combiner function:
5932 /// \code
5933 /// void @.red_comb(void* %arg0, void* %arg1) {
5934 /// %lhs = bitcast void* %arg0 to <type>*
5935 /// %rhs = bitcast void* %arg1 to <type>*
5936 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5937 /// store <type> %2, <type>* %lhs
5938 /// ret void
5939 /// }
5940 /// \endcode
5941 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5942                                            SourceLocation Loc,
5943                                            ReductionCodeGen &RCG, unsigned N,
5944                                            const Expr *ReductionOp,
5945                                            const Expr *LHS, const Expr *RHS,
5946                                            const Expr *PrivateRef) {
5947   ASTContext &C = CGM.getContext();
5948   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5949   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5950   FunctionArgList Args;
5951   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5952                                C.VoidPtrTy, ImplicitParamDecl::Other);
5953   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5954                             ImplicitParamDecl::Other);
5955   Args.emplace_back(&ParamInOut);
5956   Args.emplace_back(&ParamIn);
5957   const auto &FnInfo =
5958       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5959   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5960   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5961   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5962                                     Name, &CGM.getModule());
5963   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5964   Fn->setDoesNotRecurse();
5965   CodeGenFunction CGF(CGM);
5966   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5967   llvm::Value *Size = nullptr;
5968   // If the size of the reduction item is non-constant, load it from global
5969   // threadprivate variable.
5970   if (RCG.getSizes(N).second) {
5971     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5972         CGF, CGM.getContext().getSizeType(),
5973         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5974     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5975                                 CGM.getContext().getSizeType(), Loc);
5976   }
5977   RCG.emitAggregateType(CGF, N, Size);
5978   // Remap lhs and rhs variables to the addresses of the function arguments.
5979   // %lhs = bitcast void* %arg0 to <type>*
5980   // %rhs = bitcast void* %arg1 to <type>*
5981   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5982   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5983     // Pull out the pointer to the variable.
5984     Address PtrAddr = CGF.EmitLoadOfPointer(
5985         CGF.GetAddrOfLocalVar(&ParamInOut),
5986         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5987     return CGF.Builder.CreateElementBitCast(
5988         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5989   });
5990   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5991     // Pull out the pointer to the variable.
5992     Address PtrAddr = CGF.EmitLoadOfPointer(
5993         CGF.GetAddrOfLocalVar(&ParamIn),
5994         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5995     return CGF.Builder.CreateElementBitCast(
5996         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5997   });
5998   PrivateScope.Privatize();
5999   // Emit the combiner body:
6000   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6001   // store <type> %2, <type>* %lhs
6002   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6003       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6004       cast<DeclRefExpr>(RHS));
6005   CGF.FinishFunction();
6006   return Fn;
6007 }
6008 
6009 /// Emits reduction finalizer function:
6010 /// \code
6011 /// void @.red_fini(void* %arg) {
6012 /// %0 = bitcast void* %arg to <type>*
6013 /// <destroy>(<type>* %0)
6014 /// ret void
6015 /// }
6016 /// \endcode
6017 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6018                                            SourceLocation Loc,
6019                                            ReductionCodeGen &RCG, unsigned N) {
6020   if (!RCG.needCleanups(N))
6021     return nullptr;
6022   ASTContext &C = CGM.getContext();
6023   FunctionArgList Args;
6024   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6025                           ImplicitParamDecl::Other);
6026   Args.emplace_back(&Param);
6027   const auto &FnInfo =
6028       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6029   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6030   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6031   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6032                                     Name, &CGM.getModule());
6033   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6034   Fn->setDoesNotRecurse();
6035   CodeGenFunction CGF(CGM);
6036   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6037   Address PrivateAddr = CGF.EmitLoadOfPointer(
6038       CGF.GetAddrOfLocalVar(&Param),
6039       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6040   llvm::Value *Size = nullptr;
6041   // If the size of the reduction item is non-constant, load it from global
6042   // threadprivate variable.
6043   if (RCG.getSizes(N).second) {
6044     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6045         CGF, CGM.getContext().getSizeType(),
6046         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6047     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6048                                 CGM.getContext().getSizeType(), Loc);
6049   }
6050   RCG.emitAggregateType(CGF, N, Size);
6051   // Emit the finalizer body:
6052   // <destroy>(<type>* %0)
6053   RCG.emitCleanups(CGF, N, PrivateAddr);
6054   CGF.FinishFunction(Loc);
6055   return Fn;
6056 }
6057 
6058 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6059     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6060     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6061   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6062     return nullptr;
6063 
6064   // Build typedef struct:
6065   // kmp_taskred_input {
6066   //   void *reduce_shar; // shared reduction item
6067   //   void *reduce_orig; // original reduction item used for initialization
6068   //   size_t reduce_size; // size of data item
6069   //   void *reduce_init; // data initialization routine
6070   //   void *reduce_fini; // data finalization routine
6071   //   void *reduce_comb; // data combiner routine
6072   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6073   // } kmp_taskred_input_t;
6074   ASTContext &C = CGM.getContext();
6075   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6076   RD->startDefinition();
6077   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6078   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6079   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6080   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6081   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6084       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6085   RD->completeDefinition();
6086   QualType RDType = C.getRecordType(RD);
6087   unsigned Size = Data.ReductionVars.size();
6088   llvm::APInt ArraySize(/*numBits=*/64, Size);
6089   QualType ArrayRDType = C.getConstantArrayType(
6090       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6091   // kmp_task_red_input_t .rd_input.[Size];
6092   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6093   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6094                        Data.ReductionCopies, Data.ReductionOps);
6095   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6096     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6097     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6098                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6099     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6100         TaskRedInput.getPointer(), Idxs,
6101         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6102         ".rd_input.gep.");
6103     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6104     // ElemLVal.reduce_shar = &Shareds[Cnt];
6105     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6106     RCG.emitSharedOrigLValue(CGF, Cnt);
6107     llvm::Value *CastedShared =
6108         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6109     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6110     // ElemLVal.reduce_orig = &Origs[Cnt];
6111     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6112     llvm::Value *CastedOrig =
6113         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6114     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6115     RCG.emitAggregateType(CGF, Cnt);
6116     llvm::Value *SizeValInChars;
6117     llvm::Value *SizeVal;
6118     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6119     // We use delayed creation/initialization for VLAs and array sections. It is
6120     // required because runtime does not provide the way to pass the sizes of
6121     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6122     // threadprivate global variables are used to store these values and use
6123     // them in the functions.
6124     bool DelayedCreation = !!SizeVal;
6125     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6126                                                /*isSigned=*/false);
6127     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6128     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6129     // ElemLVal.reduce_init = init;
6130     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6131     llvm::Value *InitAddr =
6132         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6133     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6134     // ElemLVal.reduce_fini = fini;
6135     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6136     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6137     llvm::Value *FiniAddr = Fini
6138                                 ? CGF.EmitCastToVoidPtr(Fini)
6139                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6140     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6141     // ElemLVal.reduce_comb = comb;
6142     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6143     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6144         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6145         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6146     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6147     // ElemLVal.flags = 0;
6148     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6149     if (DelayedCreation) {
6150       CGF.EmitStoreOfScalar(
6151           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6152           FlagsLVal);
6153     } else
6154       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6155                                  FlagsLVal.getType());
6156   }
6157   if (Data.IsReductionWithTaskMod) {
6158     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6159     // is_ws, int num, void *data);
6160     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6161     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6162                                                   CGM.IntTy, /*isSigned=*/true);
6163     llvm::Value *Args[] = {
6164         IdentTLoc, GTid,
6165         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6166                                /*isSigned=*/true),
6167         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6168         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6169             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6170     return CGF.EmitRuntimeCall(
6171         OMPBuilder.getOrCreateRuntimeFunction(
6172             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6173         Args);
6174   }
6175   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6176   llvm::Value *Args[] = {
6177       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6178                                 /*isSigned=*/true),
6179       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6180       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6181                                                       CGM.VoidPtrTy)};
6182   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6183                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6184                              Args);
6185 }
6186 
6187 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6188                                             SourceLocation Loc,
6189                                             bool IsWorksharingReduction) {
6190   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6191   // is_ws, int num, void *data);
6192   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6193   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6194                                                 CGM.IntTy, /*isSigned=*/true);
6195   llvm::Value *Args[] = {IdentTLoc, GTid,
6196                          llvm::ConstantInt::get(CGM.IntTy,
6197                                                 IsWorksharingReduction ? 1 : 0,
6198                                                 /*isSigned=*/true)};
6199   (void)CGF.EmitRuntimeCall(
6200       OMPBuilder.getOrCreateRuntimeFunction(
6201           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6202       Args);
6203 }
6204 
6205 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6206                                               SourceLocation Loc,
6207                                               ReductionCodeGen &RCG,
6208                                               unsigned N) {
6209   auto Sizes = RCG.getSizes(N);
6210   // Emit threadprivate global variable if the type is non-constant
6211   // (Sizes.second = nullptr).
6212   if (Sizes.second) {
6213     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6214                                                      /*isSigned=*/false);
6215     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6216         CGF, CGM.getContext().getSizeType(),
6217         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6218     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6219   }
6220 }
6221 
6222 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6223                                               SourceLocation Loc,
6224                                               llvm::Value *ReductionsPtr,
6225                                               LValue SharedLVal) {
6226   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6227   // *d);
6228   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6229                                                    CGM.IntTy,
6230                                                    /*isSigned=*/true),
6231                          ReductionsPtr,
6232                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6233                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6234   return Address(
6235       CGF.EmitRuntimeCall(
6236           OMPBuilder.getOrCreateRuntimeFunction(
6237               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6238           Args),
6239       SharedLVal.getAlignment());
6240 }
6241 
6242 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6243                                        SourceLocation Loc) {
6244   if (!CGF.HaveInsertPoint())
6245     return;
6246 
6247   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6248     OMPBuilder.createTaskwait(CGF.Builder);
6249   } else {
6250     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6251     // global_tid);
6252     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6253     // Ignore return result until untied tasks are supported.
6254     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6255                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6256                         Args);
6257   }
6258 
6259   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6260     Region->emitUntiedSwitch(CGF);
6261 }
6262 
6263 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6264                                            OpenMPDirectiveKind InnerKind,
6265                                            const RegionCodeGenTy &CodeGen,
6266                                            bool HasCancel) {
6267   if (!CGF.HaveInsertPoint())
6268     return;
6269   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6270                                  InnerKind != OMPD_critical &&
6271                                      InnerKind != OMPD_master &&
6272                                      InnerKind != OMPD_masked);
6273   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6274 }
6275 
6276 namespace {
6277 enum RTCancelKind {
6278   CancelNoreq = 0,
6279   CancelParallel = 1,
6280   CancelLoop = 2,
6281   CancelSections = 3,
6282   CancelTaskgroup = 4
6283 };
6284 } // anonymous namespace
6285 
6286 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6287   RTCancelKind CancelKind = CancelNoreq;
6288   if (CancelRegion == OMPD_parallel)
6289     CancelKind = CancelParallel;
6290   else if (CancelRegion == OMPD_for)
6291     CancelKind = CancelLoop;
6292   else if (CancelRegion == OMPD_sections)
6293     CancelKind = CancelSections;
6294   else {
6295     assert(CancelRegion == OMPD_taskgroup);
6296     CancelKind = CancelTaskgroup;
6297   }
6298   return CancelKind;
6299 }
6300 
6301 void CGOpenMPRuntime::emitCancellationPointCall(
6302     CodeGenFunction &CGF, SourceLocation Loc,
6303     OpenMPDirectiveKind CancelRegion) {
6304   if (!CGF.HaveInsertPoint())
6305     return;
6306   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6307   // global_tid, kmp_int32 cncl_kind);
6308   if (auto *OMPRegionInfo =
6309           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6310     // For 'cancellation point taskgroup', the task region info may not have a
6311     // cancel. This may instead happen in another adjacent task.
6312     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6313       llvm::Value *Args[] = {
6314           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6315           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6316       // Ignore return result until untied tasks are supported.
6317       llvm::Value *Result = CGF.EmitRuntimeCall(
6318           OMPBuilder.getOrCreateRuntimeFunction(
6319               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6320           Args);
6321       // if (__kmpc_cancellationpoint()) {
6322       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6323       //   exit from construct;
6324       // }
6325       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6326       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6327       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6328       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6329       CGF.EmitBlock(ExitBB);
6330       if (CancelRegion == OMPD_parallel)
6331         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6332       // exit from construct;
6333       CodeGenFunction::JumpDest CancelDest =
6334           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6335       CGF.EmitBranchThroughCleanup(CancelDest);
6336       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6337     }
6338   }
6339 }
6340 
6341 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6342                                      const Expr *IfCond,
6343                                      OpenMPDirectiveKind CancelRegion) {
6344   if (!CGF.HaveInsertPoint())
6345     return;
6346   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6347   // kmp_int32 cncl_kind);
6348   auto &M = CGM.getModule();
6349   if (auto *OMPRegionInfo =
6350           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6351     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6352                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6353       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6354       llvm::Value *Args[] = {
6355           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6356           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6357       // Ignore return result until untied tasks are supported.
6358       llvm::Value *Result = CGF.EmitRuntimeCall(
6359           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6360       // if (__kmpc_cancel()) {
6361       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6362       //   exit from construct;
6363       // }
6364       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6365       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6366       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6367       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6368       CGF.EmitBlock(ExitBB);
6369       if (CancelRegion == OMPD_parallel)
6370         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6371       // exit from construct;
6372       CodeGenFunction::JumpDest CancelDest =
6373           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6374       CGF.EmitBranchThroughCleanup(CancelDest);
6375       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6376     };
6377     if (IfCond) {
6378       emitIfClause(CGF, IfCond, ThenGen,
6379                    [](CodeGenFunction &, PrePostActionTy &) {});
6380     } else {
6381       RegionCodeGenTy ThenRCG(ThenGen);
6382       ThenRCG(CGF);
6383     }
6384   }
6385 }
6386 
6387 namespace {
6388 /// Cleanup action for uses_allocators support.
6389 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6390   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6391 
6392 public:
6393   OMPUsesAllocatorsActionTy(
6394       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6395       : Allocators(Allocators) {}
6396   void Enter(CodeGenFunction &CGF) override {
6397     if (!CGF.HaveInsertPoint())
6398       return;
6399     for (const auto &AllocatorData : Allocators) {
6400       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6401           CGF, AllocatorData.first, AllocatorData.second);
6402     }
6403   }
6404   void Exit(CodeGenFunction &CGF) override {
6405     if (!CGF.HaveInsertPoint())
6406       return;
6407     for (const auto &AllocatorData : Allocators) {
6408       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6409                                                         AllocatorData.first);
6410     }
6411   }
6412 };
6413 } // namespace
6414 
6415 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6416     const OMPExecutableDirective &D, StringRef ParentName,
6417     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6418     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6419   assert(!ParentName.empty() && "Invalid target region parent name!");
6420   HasEmittedTargetRegion = true;
6421   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6422   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6423     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6424       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6425       if (!D.AllocatorTraits)
6426         continue;
6427       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6428     }
6429   }
6430   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6431   CodeGen.setAction(UsesAllocatorAction);
6432   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6433                                    IsOffloadEntry, CodeGen);
6434 }
6435 
6436 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6437                                              const Expr *Allocator,
6438                                              const Expr *AllocatorTraits) {
6439   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6440   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6441   // Use default memspace handle.
6442   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6443   llvm::Value *NumTraits = llvm::ConstantInt::get(
6444       CGF.IntTy, cast<ConstantArrayType>(
6445                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6446                      ->getSize()
6447                      .getLimitedValue());
6448   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6449   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6450       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6451   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6452                                            AllocatorTraitsLVal.getBaseInfo(),
6453                                            AllocatorTraitsLVal.getTBAAInfo());
6454   llvm::Value *Traits =
6455       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6456 
6457   llvm::Value *AllocatorVal =
6458       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6459                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6460                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6461   // Store to allocator.
6462   CGF.EmitVarDecl(*cast<VarDecl>(
6463       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6464   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6465   AllocatorVal =
6466       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6467                                Allocator->getType(), Allocator->getExprLoc());
6468   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6469 }
6470 
6471 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6472                                              const Expr *Allocator) {
6473   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6474   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6475   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6476   llvm::Value *AllocatorVal =
6477       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6478   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6479                                           CGF.getContext().VoidPtrTy,
6480                                           Allocator->getExprLoc());
6481   (void)CGF.EmitRuntimeCall(
6482       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6483                                             OMPRTL___kmpc_destroy_allocator),
6484       {ThreadId, AllocatorVal});
6485 }
6486 
6487 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6488     const OMPExecutableDirective &D, StringRef ParentName,
6489     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6490     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6491   // Create a unique name for the entry function using the source location
6492   // information of the current target region. The name will be something like:
6493   //
6494   // __omp_offloading_DD_FFFF_PP_lBB
6495   //
6496   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6497   // mangled name of the function that encloses the target region and BB is the
6498   // line number of the target region.
6499 
6500   unsigned DeviceID;
6501   unsigned FileID;
6502   unsigned Line;
6503   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6504                            Line);
6505   SmallString<64> EntryFnName;
6506   {
6507     llvm::raw_svector_ostream OS(EntryFnName);
6508     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6509        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6510   }
6511 
6512   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6513 
6514   CodeGenFunction CGF(CGM, true);
6515   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6516   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6517 
6518   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6519 
6520   // If this target outline function is not an offload entry, we don't need to
6521   // register it.
6522   if (!IsOffloadEntry)
6523     return;
6524 
6525   // The target region ID is used by the runtime library to identify the current
6526   // target region, so it only has to be unique and not necessarily point to
6527   // anything. It could be the pointer to the outlined function that implements
6528   // the target region, but we aren't using that so that the compiler doesn't
6529   // need to keep that, and could therefore inline the host function if proven
6530   // worthwhile during optimization. In the other hand, if emitting code for the
6531   // device, the ID has to be the function address so that it can retrieved from
6532   // the offloading entry and launched by the runtime library. We also mark the
6533   // outlined function to have external linkage in case we are emitting code for
6534   // the device, because these functions will be entry points to the device.
6535 
6536   if (CGM.getLangOpts().OpenMPIsDevice) {
6537     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6538     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6539     OutlinedFn->setDSOLocal(false);
6540     if (CGM.getTriple().isAMDGCN())
6541       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6542   } else {
6543     std::string Name = getName({EntryFnName, "region_id"});
6544     OutlinedFnID = new llvm::GlobalVariable(
6545         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6546         llvm::GlobalValue::WeakAnyLinkage,
6547         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6548   }
6549 
6550   // Register the information for the entry associated with this target region.
6551   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6552       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6553       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6554 }
6555 
6556 /// Checks if the expression is constant or does not have non-trivial function
6557 /// calls.
6558 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6559   // We can skip constant expressions.
6560   // We can skip expressions with trivial calls or simple expressions.
6561   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6562           !E->hasNonTrivialCall(Ctx)) &&
6563          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6564 }
6565 
6566 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6567                                                     const Stmt *Body) {
6568   const Stmt *Child = Body->IgnoreContainers();
6569   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6570     Child = nullptr;
6571     for (const Stmt *S : C->body()) {
6572       if (const auto *E = dyn_cast<Expr>(S)) {
6573         if (isTrivial(Ctx, E))
6574           continue;
6575       }
6576       // Some of the statements can be ignored.
6577       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6578           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6579         continue;
6580       // Analyze declarations.
6581       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6582         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6583               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6584                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6585                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6586                   isa<UsingDirectiveDecl>(D) ||
6587                   isa<OMPDeclareReductionDecl>(D) ||
6588                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6589                 return true;
6590               const auto *VD = dyn_cast<VarDecl>(D);
6591               if (!VD)
6592                 return false;
6593               return VD->hasGlobalStorage() || !VD->isUsed();
6594             }))
6595           continue;
6596       }
6597       // Found multiple children - cannot get the one child only.
6598       if (Child)
6599         return nullptr;
6600       Child = S;
6601     }
6602     if (Child)
6603       Child = Child->IgnoreContainers();
6604   }
6605   return Child;
6606 }
6607 
6608 /// Emit the number of teams for a target directive.  Inspect the num_teams
6609 /// clause associated with a teams construct combined or closely nested
6610 /// with the target directive.
6611 ///
6612 /// Emit a team of size one for directives such as 'target parallel' that
6613 /// have no associated teams construct.
6614 ///
6615 /// Otherwise, return nullptr.
6616 static llvm::Value *
6617 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6618                                const OMPExecutableDirective &D) {
6619   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6620          "Clauses associated with the teams directive expected to be emitted "
6621          "only for the host!");
6622   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6623   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6624          "Expected target-based executable directive.");
6625   CGBuilderTy &Bld = CGF.Builder;
6626   switch (DirectiveKind) {
6627   case OMPD_target: {
6628     const auto *CS = D.getInnermostCapturedStmt();
6629     const auto *Body =
6630         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6631     const Stmt *ChildStmt =
6632         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6633     if (const auto *NestedDir =
6634             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6635       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6636         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6637           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6638           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6639           const Expr *NumTeams =
6640               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6641           llvm::Value *NumTeamsVal =
6642               CGF.EmitScalarExpr(NumTeams,
6643                                  /*IgnoreResultAssign*/ true);
6644           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6645                                    /*isSigned=*/true);
6646         }
6647         return Bld.getInt32(0);
6648       }
6649       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6650           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6651         return Bld.getInt32(1);
6652       return Bld.getInt32(0);
6653     }
6654     return nullptr;
6655   }
6656   case OMPD_target_teams:
6657   case OMPD_target_teams_distribute:
6658   case OMPD_target_teams_distribute_simd:
6659   case OMPD_target_teams_distribute_parallel_for:
6660   case OMPD_target_teams_distribute_parallel_for_simd: {
6661     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6662       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6663       const Expr *NumTeams =
6664           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6665       llvm::Value *NumTeamsVal =
6666           CGF.EmitScalarExpr(NumTeams,
6667                              /*IgnoreResultAssign*/ true);
6668       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6669                                /*isSigned=*/true);
6670     }
6671     return Bld.getInt32(0);
6672   }
6673   case OMPD_target_parallel:
6674   case OMPD_target_parallel_for:
6675   case OMPD_target_parallel_for_simd:
6676   case OMPD_target_simd:
6677     return Bld.getInt32(1);
6678   case OMPD_parallel:
6679   case OMPD_for:
6680   case OMPD_parallel_for:
6681   case OMPD_parallel_master:
6682   case OMPD_parallel_sections:
6683   case OMPD_for_simd:
6684   case OMPD_parallel_for_simd:
6685   case OMPD_cancel:
6686   case OMPD_cancellation_point:
6687   case OMPD_ordered:
6688   case OMPD_threadprivate:
6689   case OMPD_allocate:
6690   case OMPD_task:
6691   case OMPD_simd:
6692   case OMPD_tile:
6693   case OMPD_unroll:
6694   case OMPD_sections:
6695   case OMPD_section:
6696   case OMPD_single:
6697   case OMPD_master:
6698   case OMPD_critical:
6699   case OMPD_taskyield:
6700   case OMPD_barrier:
6701   case OMPD_taskwait:
6702   case OMPD_taskgroup:
6703   case OMPD_atomic:
6704   case OMPD_flush:
6705   case OMPD_depobj:
6706   case OMPD_scan:
6707   case OMPD_teams:
6708   case OMPD_target_data:
6709   case OMPD_target_exit_data:
6710   case OMPD_target_enter_data:
6711   case OMPD_distribute:
6712   case OMPD_distribute_simd:
6713   case OMPD_distribute_parallel_for:
6714   case OMPD_distribute_parallel_for_simd:
6715   case OMPD_teams_distribute:
6716   case OMPD_teams_distribute_simd:
6717   case OMPD_teams_distribute_parallel_for:
6718   case OMPD_teams_distribute_parallel_for_simd:
6719   case OMPD_target_update:
6720   case OMPD_declare_simd:
6721   case OMPD_declare_variant:
6722   case OMPD_begin_declare_variant:
6723   case OMPD_end_declare_variant:
6724   case OMPD_declare_target:
6725   case OMPD_end_declare_target:
6726   case OMPD_declare_reduction:
6727   case OMPD_declare_mapper:
6728   case OMPD_taskloop:
6729   case OMPD_taskloop_simd:
6730   case OMPD_master_taskloop:
6731   case OMPD_master_taskloop_simd:
6732   case OMPD_parallel_master_taskloop:
6733   case OMPD_parallel_master_taskloop_simd:
6734   case OMPD_requires:
6735   case OMPD_unknown:
6736     break;
6737   default:
6738     break;
6739   }
6740   llvm_unreachable("Unexpected directive kind.");
6741 }
6742 
6743 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6744                                   llvm::Value *DefaultThreadLimitVal) {
6745   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6746       CGF.getContext(), CS->getCapturedStmt());
6747   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6748     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6749       llvm::Value *NumThreads = nullptr;
6750       llvm::Value *CondVal = nullptr;
6751       // Handle if clause. If if clause present, the number of threads is
6752       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6753       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6754         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6755         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6756         const OMPIfClause *IfClause = nullptr;
6757         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6758           if (C->getNameModifier() == OMPD_unknown ||
6759               C->getNameModifier() == OMPD_parallel) {
6760             IfClause = C;
6761             break;
6762           }
6763         }
6764         if (IfClause) {
6765           const Expr *Cond = IfClause->getCondition();
6766           bool Result;
6767           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6768             if (!Result)
6769               return CGF.Builder.getInt32(1);
6770           } else {
6771             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6772             if (const auto *PreInit =
6773                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6774               for (const auto *I : PreInit->decls()) {
6775                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6776                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6777                 } else {
6778                   CodeGenFunction::AutoVarEmission Emission =
6779                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6780                   CGF.EmitAutoVarCleanups(Emission);
6781                 }
6782               }
6783             }
6784             CondVal = CGF.EvaluateExprAsBool(Cond);
6785           }
6786         }
6787       }
6788       // Check the value of num_threads clause iff if clause was not specified
6789       // or is not evaluated to false.
6790       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6791         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6792         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6793         const auto *NumThreadsClause =
6794             Dir->getSingleClause<OMPNumThreadsClause>();
6795         CodeGenFunction::LexicalScope Scope(
6796             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6797         if (const auto *PreInit =
6798                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6799           for (const auto *I : PreInit->decls()) {
6800             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6801               CGF.EmitVarDecl(cast<VarDecl>(*I));
6802             } else {
6803               CodeGenFunction::AutoVarEmission Emission =
6804                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6805               CGF.EmitAutoVarCleanups(Emission);
6806             }
6807           }
6808         }
6809         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6810         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6811                                                /*isSigned=*/false);
6812         if (DefaultThreadLimitVal)
6813           NumThreads = CGF.Builder.CreateSelect(
6814               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6815               DefaultThreadLimitVal, NumThreads);
6816       } else {
6817         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6818                                            : CGF.Builder.getInt32(0);
6819       }
6820       // Process condition of the if clause.
6821       if (CondVal) {
6822         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6823                                               CGF.Builder.getInt32(1));
6824       }
6825       return NumThreads;
6826     }
6827     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6828       return CGF.Builder.getInt32(1);
6829     return DefaultThreadLimitVal;
6830   }
6831   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6832                                : CGF.Builder.getInt32(0);
6833 }
6834 
6835 /// Emit the number of threads for a target directive.  Inspect the
6836 /// thread_limit clause associated with a teams construct combined or closely
6837 /// nested with the target directive.
6838 ///
6839 /// Emit the num_threads clause for directives such as 'target parallel' that
6840 /// have no associated teams construct.
6841 ///
6842 /// Otherwise, return nullptr.
6843 static llvm::Value *
6844 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6845                                  const OMPExecutableDirective &D) {
6846   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6847          "Clauses associated with the teams directive expected to be emitted "
6848          "only for the host!");
6849   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6850   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6851          "Expected target-based executable directive.");
6852   CGBuilderTy &Bld = CGF.Builder;
6853   llvm::Value *ThreadLimitVal = nullptr;
6854   llvm::Value *NumThreadsVal = nullptr;
6855   switch (DirectiveKind) {
6856   case OMPD_target: {
6857     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6858     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6859       return NumThreads;
6860     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6861         CGF.getContext(), CS->getCapturedStmt());
6862     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6863       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6864         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6865         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6866         const auto *ThreadLimitClause =
6867             Dir->getSingleClause<OMPThreadLimitClause>();
6868         CodeGenFunction::LexicalScope Scope(
6869             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6870         if (const auto *PreInit =
6871                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6872           for (const auto *I : PreInit->decls()) {
6873             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6874               CGF.EmitVarDecl(cast<VarDecl>(*I));
6875             } else {
6876               CodeGenFunction::AutoVarEmission Emission =
6877                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6878               CGF.EmitAutoVarCleanups(Emission);
6879             }
6880           }
6881         }
6882         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6883             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6884         ThreadLimitVal =
6885             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6886       }
6887       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6888           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6889         CS = Dir->getInnermostCapturedStmt();
6890         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6891             CGF.getContext(), CS->getCapturedStmt());
6892         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6893       }
6894       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6895           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6896         CS = Dir->getInnermostCapturedStmt();
6897         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6898           return NumThreads;
6899       }
6900       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6901         return Bld.getInt32(1);
6902     }
6903     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6904   }
6905   case OMPD_target_teams: {
6906     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6907       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6908       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6909       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6910           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6911       ThreadLimitVal =
6912           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6913     }
6914     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6915     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6916       return NumThreads;
6917     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6918         CGF.getContext(), CS->getCapturedStmt());
6919     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6920       if (Dir->getDirectiveKind() == OMPD_distribute) {
6921         CS = Dir->getInnermostCapturedStmt();
6922         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6923           return NumThreads;
6924       }
6925     }
6926     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6927   }
6928   case OMPD_target_teams_distribute:
6929     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6930       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6931       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6932       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6933           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6934       ThreadLimitVal =
6935           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6936     }
6937     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6938   case OMPD_target_parallel:
6939   case OMPD_target_parallel_for:
6940   case OMPD_target_parallel_for_simd:
6941   case OMPD_target_teams_distribute_parallel_for:
6942   case OMPD_target_teams_distribute_parallel_for_simd: {
6943     llvm::Value *CondVal = nullptr;
6944     // Handle if clause. If if clause present, the number of threads is
6945     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6946     if (D.hasClausesOfKind<OMPIfClause>()) {
6947       const OMPIfClause *IfClause = nullptr;
6948       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6949         if (C->getNameModifier() == OMPD_unknown ||
6950             C->getNameModifier() == OMPD_parallel) {
6951           IfClause = C;
6952           break;
6953         }
6954       }
6955       if (IfClause) {
6956         const Expr *Cond = IfClause->getCondition();
6957         bool Result;
6958         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6959           if (!Result)
6960             return Bld.getInt32(1);
6961         } else {
6962           CodeGenFunction::RunCleanupsScope Scope(CGF);
6963           CondVal = CGF.EvaluateExprAsBool(Cond);
6964         }
6965       }
6966     }
6967     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6968       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6969       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6970       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6971           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6972       ThreadLimitVal =
6973           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6974     }
6975     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6976       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6977       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6978       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6979           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6980       NumThreadsVal =
6981           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6982       ThreadLimitVal = ThreadLimitVal
6983                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6984                                                                 ThreadLimitVal),
6985                                               NumThreadsVal, ThreadLimitVal)
6986                            : NumThreadsVal;
6987     }
6988     if (!ThreadLimitVal)
6989       ThreadLimitVal = Bld.getInt32(0);
6990     if (CondVal)
6991       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6992     return ThreadLimitVal;
6993   }
6994   case OMPD_target_teams_distribute_simd:
6995   case OMPD_target_simd:
6996     return Bld.getInt32(1);
6997   case OMPD_parallel:
6998   case OMPD_for:
6999   case OMPD_parallel_for:
7000   case OMPD_parallel_master:
7001   case OMPD_parallel_sections:
7002   case OMPD_for_simd:
7003   case OMPD_parallel_for_simd:
7004   case OMPD_cancel:
7005   case OMPD_cancellation_point:
7006   case OMPD_ordered:
7007   case OMPD_threadprivate:
7008   case OMPD_allocate:
7009   case OMPD_task:
7010   case OMPD_simd:
7011   case OMPD_tile:
7012   case OMPD_unroll:
7013   case OMPD_sections:
7014   case OMPD_section:
7015   case OMPD_single:
7016   case OMPD_master:
7017   case OMPD_critical:
7018   case OMPD_taskyield:
7019   case OMPD_barrier:
7020   case OMPD_taskwait:
7021   case OMPD_taskgroup:
7022   case OMPD_atomic:
7023   case OMPD_flush:
7024   case OMPD_depobj:
7025   case OMPD_scan:
7026   case OMPD_teams:
7027   case OMPD_target_data:
7028   case OMPD_target_exit_data:
7029   case OMPD_target_enter_data:
7030   case OMPD_distribute:
7031   case OMPD_distribute_simd:
7032   case OMPD_distribute_parallel_for:
7033   case OMPD_distribute_parallel_for_simd:
7034   case OMPD_teams_distribute:
7035   case OMPD_teams_distribute_simd:
7036   case OMPD_teams_distribute_parallel_for:
7037   case OMPD_teams_distribute_parallel_for_simd:
7038   case OMPD_target_update:
7039   case OMPD_declare_simd:
7040   case OMPD_declare_variant:
7041   case OMPD_begin_declare_variant:
7042   case OMPD_end_declare_variant:
7043   case OMPD_declare_target:
7044   case OMPD_end_declare_target:
7045   case OMPD_declare_reduction:
7046   case OMPD_declare_mapper:
7047   case OMPD_taskloop:
7048   case OMPD_taskloop_simd:
7049   case OMPD_master_taskloop:
7050   case OMPD_master_taskloop_simd:
7051   case OMPD_parallel_master_taskloop:
7052   case OMPD_parallel_master_taskloop_simd:
7053   case OMPD_requires:
7054   case OMPD_unknown:
7055     break;
7056   default:
7057     break;
7058   }
7059   llvm_unreachable("Unsupported directive kind.");
7060 }
7061 
7062 namespace {
7063 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7064 
7065 // Utility to handle information from clauses associated with a given
7066 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7067 // It provides a convenient interface to obtain the information and generate
7068 // code for that information.
7069 class MappableExprsHandler {
7070 public:
7071   /// Values for bit flags used to specify the mapping type for
7072   /// offloading.
7073   enum OpenMPOffloadMappingFlags : uint64_t {
7074     /// No flags
7075     OMP_MAP_NONE = 0x0,
7076     /// Allocate memory on the device and move data from host to device.
7077     OMP_MAP_TO = 0x01,
7078     /// Allocate memory on the device and move data from device to host.
7079     OMP_MAP_FROM = 0x02,
7080     /// Always perform the requested mapping action on the element, even
7081     /// if it was already mapped before.
7082     OMP_MAP_ALWAYS = 0x04,
7083     /// Delete the element from the device environment, ignoring the
7084     /// current reference count associated with the element.
7085     OMP_MAP_DELETE = 0x08,
7086     /// The element being mapped is a pointer-pointee pair; both the
7087     /// pointer and the pointee should be mapped.
7088     OMP_MAP_PTR_AND_OBJ = 0x10,
7089     /// This flags signals that the base address of an entry should be
7090     /// passed to the target kernel as an argument.
7091     OMP_MAP_TARGET_PARAM = 0x20,
7092     /// Signal that the runtime library has to return the device pointer
7093     /// in the current position for the data being mapped. Used when we have the
7094     /// use_device_ptr or use_device_addr clause.
7095     OMP_MAP_RETURN_PARAM = 0x40,
7096     /// This flag signals that the reference being passed is a pointer to
7097     /// private data.
7098     OMP_MAP_PRIVATE = 0x80,
7099     /// Pass the element to the device by value.
7100     OMP_MAP_LITERAL = 0x100,
7101     /// Implicit map
7102     OMP_MAP_IMPLICIT = 0x200,
7103     /// Close is a hint to the runtime to allocate memory close to
7104     /// the target device.
7105     OMP_MAP_CLOSE = 0x400,
7106     /// 0x800 is reserved for compatibility with XLC.
7107     /// Produce a runtime error if the data is not already allocated.
7108     OMP_MAP_PRESENT = 0x1000,
7109     /// Signal that the runtime library should use args as an array of
7110     /// descriptor_dim pointers and use args_size as dims. Used when we have
7111     /// non-contiguous list items in target update directive
7112     OMP_MAP_NON_CONTIG = 0x100000000000,
7113     /// The 16 MSBs of the flags indicate whether the entry is member of some
7114     /// struct/class.
7115     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7116     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7117   };
7118 
7119   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7120   static unsigned getFlagMemberOffset() {
7121     unsigned Offset = 0;
7122     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7123          Remain = Remain >> 1)
7124       Offset++;
7125     return Offset;
7126   }
7127 
7128   /// Class that holds debugging information for a data mapping to be passed to
7129   /// the runtime library.
7130   class MappingExprInfo {
7131     /// The variable declaration used for the data mapping.
7132     const ValueDecl *MapDecl = nullptr;
7133     /// The original expression used in the map clause, or null if there is
7134     /// none.
7135     const Expr *MapExpr = nullptr;
7136 
7137   public:
7138     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7139         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7140 
7141     const ValueDecl *getMapDecl() const { return MapDecl; }
7142     const Expr *getMapExpr() const { return MapExpr; }
7143   };
7144 
7145   /// Class that associates information with a base pointer to be passed to the
7146   /// runtime library.
7147   class BasePointerInfo {
7148     /// The base pointer.
7149     llvm::Value *Ptr = nullptr;
7150     /// The base declaration that refers to this device pointer, or null if
7151     /// there is none.
7152     const ValueDecl *DevPtrDecl = nullptr;
7153 
7154   public:
7155     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7156         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7157     llvm::Value *operator*() const { return Ptr; }
7158     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7159     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7160   };
7161 
7162   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7163   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7164   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7165   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7166   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7167   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7168   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7169 
7170   /// This structure contains combined information generated for mappable
7171   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7172   /// mappers, and non-contiguous information.
7173   struct MapCombinedInfoTy {
7174     struct StructNonContiguousInfo {
7175       bool IsNonContiguous = false;
7176       MapDimArrayTy Dims;
7177       MapNonContiguousArrayTy Offsets;
7178       MapNonContiguousArrayTy Counts;
7179       MapNonContiguousArrayTy Strides;
7180     };
7181     MapExprsArrayTy Exprs;
7182     MapBaseValuesArrayTy BasePointers;
7183     MapValuesArrayTy Pointers;
7184     MapValuesArrayTy Sizes;
7185     MapFlagsArrayTy Types;
7186     MapMappersArrayTy Mappers;
7187     StructNonContiguousInfo NonContigInfo;
7188 
7189     /// Append arrays in \a CurInfo.
7190     void append(MapCombinedInfoTy &CurInfo) {
7191       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7192       BasePointers.append(CurInfo.BasePointers.begin(),
7193                           CurInfo.BasePointers.end());
7194       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7195       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7196       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7197       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7198       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7199                                  CurInfo.NonContigInfo.Dims.end());
7200       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7201                                     CurInfo.NonContigInfo.Offsets.end());
7202       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7203                                    CurInfo.NonContigInfo.Counts.end());
7204       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7205                                     CurInfo.NonContigInfo.Strides.end());
7206     }
7207   };
7208 
7209   /// Map between a struct and the its lowest & highest elements which have been
7210   /// mapped.
7211   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7212   ///                    HE(FieldIndex, Pointer)}
7213   struct StructRangeInfoTy {
7214     MapCombinedInfoTy PreliminaryMapData;
7215     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7216         0, Address::invalid()};
7217     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7218         0, Address::invalid()};
7219     Address Base = Address::invalid();
7220     Address LB = Address::invalid();
7221     bool IsArraySection = false;
7222     bool HasCompleteRecord = false;
7223   };
7224 
7225 private:
7226   /// Kind that defines how a device pointer has to be returned.
7227   struct MapInfo {
7228     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7229     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7230     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7231     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7232     bool ReturnDevicePointer = false;
7233     bool IsImplicit = false;
7234     const ValueDecl *Mapper = nullptr;
7235     const Expr *VarRef = nullptr;
7236     bool ForDeviceAddr = false;
7237 
7238     MapInfo() = default;
7239     MapInfo(
7240         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7241         OpenMPMapClauseKind MapType,
7242         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7243         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7244         bool ReturnDevicePointer, bool IsImplicit,
7245         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7246         bool ForDeviceAddr = false)
7247         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7248           MotionModifiers(MotionModifiers),
7249           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7250           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7251   };
7252 
7253   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7254   /// member and there is no map information about it, then emission of that
7255   /// entry is deferred until the whole struct has been processed.
7256   struct DeferredDevicePtrEntryTy {
7257     const Expr *IE = nullptr;
7258     const ValueDecl *VD = nullptr;
7259     bool ForDeviceAddr = false;
7260 
7261     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7262                              bool ForDeviceAddr)
7263         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7264   };
7265 
7266   /// The target directive from where the mappable clauses were extracted. It
7267   /// is either a executable directive or a user-defined mapper directive.
7268   llvm::PointerUnion<const OMPExecutableDirective *,
7269                      const OMPDeclareMapperDecl *>
7270       CurDir;
7271 
7272   /// Function the directive is being generated for.
7273   CodeGenFunction &CGF;
7274 
7275   /// Set of all first private variables in the current directive.
7276   /// bool data is set to true if the variable is implicitly marked as
7277   /// firstprivate, false otherwise.
7278   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7279 
7280   /// Map between device pointer declarations and their expression components.
7281   /// The key value for declarations in 'this' is null.
7282   llvm::DenseMap<
7283       const ValueDecl *,
7284       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7285       DevPointersMap;
7286 
7287   llvm::Value *getExprTypeSize(const Expr *E) const {
7288     QualType ExprTy = E->getType().getCanonicalType();
7289 
7290     // Calculate the size for array shaping expression.
7291     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7292       llvm::Value *Size =
7293           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7294       for (const Expr *SE : OAE->getDimensions()) {
7295         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7296         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7297                                       CGF.getContext().getSizeType(),
7298                                       SE->getExprLoc());
7299         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7300       }
7301       return Size;
7302     }
7303 
7304     // Reference types are ignored for mapping purposes.
7305     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7306       ExprTy = RefTy->getPointeeType().getCanonicalType();
7307 
7308     // Given that an array section is considered a built-in type, we need to
7309     // do the calculation based on the length of the section instead of relying
7310     // on CGF.getTypeSize(E->getType()).
7311     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7312       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7313                             OAE->getBase()->IgnoreParenImpCasts())
7314                             .getCanonicalType();
7315 
7316       // If there is no length associated with the expression and lower bound is
7317       // not specified too, that means we are using the whole length of the
7318       // base.
7319       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7320           !OAE->getLowerBound())
7321         return CGF.getTypeSize(BaseTy);
7322 
7323       llvm::Value *ElemSize;
7324       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7325         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7326       } else {
7327         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7328         assert(ATy && "Expecting array type if not a pointer type.");
7329         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7330       }
7331 
7332       // If we don't have a length at this point, that is because we have an
7333       // array section with a single element.
7334       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7335         return ElemSize;
7336 
7337       if (const Expr *LenExpr = OAE->getLength()) {
7338         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7339         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7340                                              CGF.getContext().getSizeType(),
7341                                              LenExpr->getExprLoc());
7342         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7343       }
7344       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7345              OAE->getLowerBound() && "expected array_section[lb:].");
7346       // Size = sizetype - lb * elemtype;
7347       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7348       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7349       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7350                                        CGF.getContext().getSizeType(),
7351                                        OAE->getLowerBound()->getExprLoc());
7352       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7353       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7354       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7355       LengthVal = CGF.Builder.CreateSelect(
7356           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7357       return LengthVal;
7358     }
7359     return CGF.getTypeSize(ExprTy);
7360   }
7361 
7362   /// Return the corresponding bits for a given map clause modifier. Add
7363   /// a flag marking the map as a pointer if requested. Add a flag marking the
7364   /// map as the first one of a series of maps that relate to the same map
7365   /// expression.
7366   OpenMPOffloadMappingFlags getMapTypeBits(
7367       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7368       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7369       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7370     OpenMPOffloadMappingFlags Bits =
7371         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7372     switch (MapType) {
7373     case OMPC_MAP_alloc:
7374     case OMPC_MAP_release:
7375       // alloc and release is the default behavior in the runtime library,  i.e.
7376       // if we don't pass any bits alloc/release that is what the runtime is
7377       // going to do. Therefore, we don't need to signal anything for these two
7378       // type modifiers.
7379       break;
7380     case OMPC_MAP_to:
7381       Bits |= OMP_MAP_TO;
7382       break;
7383     case OMPC_MAP_from:
7384       Bits |= OMP_MAP_FROM;
7385       break;
7386     case OMPC_MAP_tofrom:
7387       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7388       break;
7389     case OMPC_MAP_delete:
7390       Bits |= OMP_MAP_DELETE;
7391       break;
7392     case OMPC_MAP_unknown:
7393       llvm_unreachable("Unexpected map type!");
7394     }
7395     if (AddPtrFlag)
7396       Bits |= OMP_MAP_PTR_AND_OBJ;
7397     if (AddIsTargetParamFlag)
7398       Bits |= OMP_MAP_TARGET_PARAM;
7399     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7400         != MapModifiers.end())
7401       Bits |= OMP_MAP_ALWAYS;
7402     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7403         != MapModifiers.end())
7404       Bits |= OMP_MAP_CLOSE;
7405     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7406             MapModifiers.end() ||
7407         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7408             MotionModifiers.end())
7409       Bits |= OMP_MAP_PRESENT;
7410     if (IsNonContiguous)
7411       Bits |= OMP_MAP_NON_CONTIG;
7412     return Bits;
7413   }
7414 
7415   /// Return true if the provided expression is a final array section. A
7416   /// final array section, is one whose length can't be proved to be one.
7417   bool isFinalArraySectionExpression(const Expr *E) const {
7418     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7419 
7420     // It is not an array section and therefore not a unity-size one.
7421     if (!OASE)
7422       return false;
7423 
7424     // An array section with no colon always refer to a single element.
7425     if (OASE->getColonLocFirst().isInvalid())
7426       return false;
7427 
7428     const Expr *Length = OASE->getLength();
7429 
7430     // If we don't have a length we have to check if the array has size 1
7431     // for this dimension. Also, we should always expect a length if the
7432     // base type is pointer.
7433     if (!Length) {
7434       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7435                              OASE->getBase()->IgnoreParenImpCasts())
7436                              .getCanonicalType();
7437       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7438         return ATy->getSize().getSExtValue() != 1;
7439       // If we don't have a constant dimension length, we have to consider
7440       // the current section as having any size, so it is not necessarily
7441       // unitary. If it happen to be unity size, that's user fault.
7442       return true;
7443     }
7444 
7445     // Check if the length evaluates to 1.
7446     Expr::EvalResult Result;
7447     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7448       return true; // Can have more that size 1.
7449 
7450     llvm::APSInt ConstLength = Result.Val.getInt();
7451     return ConstLength.getSExtValue() != 1;
7452   }
7453 
7454   /// Generate the base pointers, section pointers, sizes, map type bits, and
7455   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7456   /// map type, map or motion modifiers, and expression components.
7457   /// \a IsFirstComponent should be set to true if the provided set of
7458   /// components is the first associated with a capture.
7459   void generateInfoForComponentList(
7460       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7461       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7462       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7463       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7464       bool IsFirstComponentList, bool IsImplicit,
7465       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7466       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7467       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7468           OverlappedElements = llvm::None) const {
7469     // The following summarizes what has to be generated for each map and the
7470     // types below. The generated information is expressed in this order:
7471     // base pointer, section pointer, size, flags
7472     // (to add to the ones that come from the map type and modifier).
7473     //
7474     // double d;
7475     // int i[100];
7476     // float *p;
7477     //
7478     // struct S1 {
7479     //   int i;
7480     //   float f[50];
7481     // }
7482     // struct S2 {
7483     //   int i;
7484     //   float f[50];
7485     //   S1 s;
7486     //   double *p;
7487     //   struct S2 *ps;
7488     //   int &ref;
7489     // }
7490     // S2 s;
7491     // S2 *ps;
7492     //
7493     // map(d)
7494     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7495     //
7496     // map(i)
7497     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7498     //
7499     // map(i[1:23])
7500     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7501     //
7502     // map(p)
7503     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7504     //
7505     // map(p[1:24])
7506     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7507     // in unified shared memory mode or for local pointers
7508     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7509     //
7510     // map(s)
7511     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7512     //
7513     // map(s.i)
7514     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7515     //
7516     // map(s.s.f)
7517     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7518     //
7519     // map(s.p)
7520     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7521     //
7522     // map(to: s.p[:22])
7523     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7524     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7525     // &(s.p), &(s.p[0]), 22*sizeof(double),
7526     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7527     // (*) alloc space for struct members, only this is a target parameter
7528     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7529     //      optimizes this entry out, same in the examples below)
7530     // (***) map the pointee (map: to)
7531     //
7532     // map(to: s.ref)
7533     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7534     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7535     // (*) alloc space for struct members, only this is a target parameter
7536     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7537     //      optimizes this entry out, same in the examples below)
7538     // (***) map the pointee (map: to)
7539     //
7540     // map(s.ps)
7541     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7542     //
7543     // map(from: s.ps->s.i)
7544     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7545     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7546     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7547     //
7548     // map(to: s.ps->ps)
7549     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7550     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7551     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7552     //
7553     // map(s.ps->ps->ps)
7554     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7555     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7556     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7557     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7558     //
7559     // map(to: s.ps->ps->s.f[:22])
7560     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7561     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7562     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7563     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7564     //
7565     // map(ps)
7566     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7567     //
7568     // map(ps->i)
7569     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7570     //
7571     // map(ps->s.f)
7572     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7573     //
7574     // map(from: ps->p)
7575     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7576     //
7577     // map(to: ps->p[:22])
7578     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7579     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7580     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7581     //
7582     // map(ps->ps)
7583     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7584     //
7585     // map(from: ps->ps->s.i)
7586     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7587     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7588     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7589     //
7590     // map(from: ps->ps->ps)
7591     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7592     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7593     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7594     //
7595     // map(ps->ps->ps->ps)
7596     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7597     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7598     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7599     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7600     //
7601     // map(to: ps->ps->ps->s.f[:22])
7602     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7603     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7604     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7605     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7606     //
7607     // map(to: s.f[:22]) map(from: s.p[:33])
7608     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7609     //     sizeof(double*) (**), TARGET_PARAM
7610     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7611     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7612     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7613     // (*) allocate contiguous space needed to fit all mapped members even if
7614     //     we allocate space for members not mapped (in this example,
7615     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7616     //     them as well because they fall between &s.f[0] and &s.p)
7617     //
7618     // map(from: s.f[:22]) map(to: ps->p[:33])
7619     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7620     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7621     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7622     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7623     // (*) the struct this entry pertains to is the 2nd element in the list of
7624     //     arguments, hence MEMBER_OF(2)
7625     //
7626     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7627     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7628     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7629     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7630     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7631     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7632     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7633     // (*) the struct this entry pertains to is the 4th element in the list
7634     //     of arguments, hence MEMBER_OF(4)
7635 
7636     // Track if the map information being generated is the first for a capture.
7637     bool IsCaptureFirstInfo = IsFirstComponentList;
7638     // When the variable is on a declare target link or in a to clause with
7639     // unified memory, a reference is needed to hold the host/device address
7640     // of the variable.
7641     bool RequiresReference = false;
7642 
7643     // Scan the components from the base to the complete expression.
7644     auto CI = Components.rbegin();
7645     auto CE = Components.rend();
7646     auto I = CI;
7647 
7648     // Track if the map information being generated is the first for a list of
7649     // components.
7650     bool IsExpressionFirstInfo = true;
7651     bool FirstPointerInComplexData = false;
7652     Address BP = Address::invalid();
7653     const Expr *AssocExpr = I->getAssociatedExpression();
7654     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7655     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7656     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7657 
7658     if (isa<MemberExpr>(AssocExpr)) {
7659       // The base is the 'this' pointer. The content of the pointer is going
7660       // to be the base of the field being mapped.
7661       BP = CGF.LoadCXXThisAddress();
7662     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7663                (OASE &&
7664                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7665       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7666     } else if (OAShE &&
7667                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7668       BP = Address(
7669           CGF.EmitScalarExpr(OAShE->getBase()),
7670           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7671     } else {
7672       // The base is the reference to the variable.
7673       // BP = &Var.
7674       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7675       if (const auto *VD =
7676               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7677         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7678                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7679           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7680               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7681                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7682             RequiresReference = true;
7683             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7684           }
7685         }
7686       }
7687 
7688       // If the variable is a pointer and is being dereferenced (i.e. is not
7689       // the last component), the base has to be the pointer itself, not its
7690       // reference. References are ignored for mapping purposes.
7691       QualType Ty =
7692           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7693       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7694         // No need to generate individual map information for the pointer, it
7695         // can be associated with the combined storage if shared memory mode is
7696         // active or the base declaration is not global variable.
7697         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7698         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7699             !VD || VD->hasLocalStorage())
7700           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7701         else
7702           FirstPointerInComplexData = true;
7703         ++I;
7704       }
7705     }
7706 
7707     // Track whether a component of the list should be marked as MEMBER_OF some
7708     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7709     // in a component list should be marked as MEMBER_OF, all subsequent entries
7710     // do not belong to the base struct. E.g.
7711     // struct S2 s;
7712     // s.ps->ps->ps->f[:]
7713     //   (1) (2) (3) (4)
7714     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7715     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7716     // is the pointee of ps(2) which is not member of struct s, so it should not
7717     // be marked as such (it is still PTR_AND_OBJ).
7718     // The variable is initialized to false so that PTR_AND_OBJ entries which
7719     // are not struct members are not considered (e.g. array of pointers to
7720     // data).
7721     bool ShouldBeMemberOf = false;
7722 
7723     // Variable keeping track of whether or not we have encountered a component
7724     // in the component list which is a member expression. Useful when we have a
7725     // pointer or a final array section, in which case it is the previous
7726     // component in the list which tells us whether we have a member expression.
7727     // E.g. X.f[:]
7728     // While processing the final array section "[:]" it is "f" which tells us
7729     // whether we are dealing with a member of a declared struct.
7730     const MemberExpr *EncounteredME = nullptr;
7731 
7732     // Track for the total number of dimension. Start from one for the dummy
7733     // dimension.
7734     uint64_t DimSize = 1;
7735 
7736     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7737     bool IsPrevMemberReference = false;
7738 
7739     for (; I != CE; ++I) {
7740       // If the current component is member of a struct (parent struct) mark it.
7741       if (!EncounteredME) {
7742         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7743         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7744         // as MEMBER_OF the parent struct.
7745         if (EncounteredME) {
7746           ShouldBeMemberOf = true;
7747           // Do not emit as complex pointer if this is actually not array-like
7748           // expression.
7749           if (FirstPointerInComplexData) {
7750             QualType Ty = std::prev(I)
7751                               ->getAssociatedDeclaration()
7752                               ->getType()
7753                               .getNonReferenceType();
7754             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7755             FirstPointerInComplexData = false;
7756           }
7757         }
7758       }
7759 
7760       auto Next = std::next(I);
7761 
7762       // We need to generate the addresses and sizes if this is the last
7763       // component, if the component is a pointer or if it is an array section
7764       // whose length can't be proved to be one. If this is a pointer, it
7765       // becomes the base address for the following components.
7766 
7767       // A final array section, is one whose length can't be proved to be one.
7768       // If the map item is non-contiguous then we don't treat any array section
7769       // as final array section.
7770       bool IsFinalArraySection =
7771           !IsNonContiguous &&
7772           isFinalArraySectionExpression(I->getAssociatedExpression());
7773 
7774       // If we have a declaration for the mapping use that, otherwise use
7775       // the base declaration of the map clause.
7776       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7777                                      ? I->getAssociatedDeclaration()
7778                                      : BaseDecl;
7779       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7780                                                : MapExpr;
7781 
7782       // Get information on whether the element is a pointer. Have to do a
7783       // special treatment for array sections given that they are built-in
7784       // types.
7785       const auto *OASE =
7786           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7787       const auto *OAShE =
7788           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7789       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7790       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7791       bool IsPointer =
7792           OAShE ||
7793           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7794                        .getCanonicalType()
7795                        ->isAnyPointerType()) ||
7796           I->getAssociatedExpression()->getType()->isAnyPointerType();
7797       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7798                                MapDecl &&
7799                                MapDecl->getType()->isLValueReferenceType();
7800       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7801 
7802       if (OASE)
7803         ++DimSize;
7804 
7805       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7806           IsFinalArraySection) {
7807         // If this is not the last component, we expect the pointer to be
7808         // associated with an array expression or member expression.
7809         assert((Next == CE ||
7810                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7811                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7812                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7813                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7814                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7815                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7816                "Unexpected expression");
7817 
7818         Address LB = Address::invalid();
7819         Address LowestElem = Address::invalid();
7820         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7821                                        const MemberExpr *E) {
7822           const Expr *BaseExpr = E->getBase();
7823           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7824           // scalar.
7825           LValue BaseLV;
7826           if (E->isArrow()) {
7827             LValueBaseInfo BaseInfo;
7828             TBAAAccessInfo TBAAInfo;
7829             Address Addr =
7830                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7831             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7832             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7833           } else {
7834             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7835           }
7836           return BaseLV;
7837         };
7838         if (OAShE) {
7839           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7840                                     CGF.getContext().getTypeAlignInChars(
7841                                         OAShE->getBase()->getType()));
7842         } else if (IsMemberReference) {
7843           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7844           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7845           LowestElem = CGF.EmitLValueForFieldInitialization(
7846                               BaseLVal, cast<FieldDecl>(MapDecl))
7847                            .getAddress(CGF);
7848           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7849                    .getAddress(CGF);
7850         } else {
7851           LowestElem = LB =
7852               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7853                   .getAddress(CGF);
7854         }
7855 
7856         // If this component is a pointer inside the base struct then we don't
7857         // need to create any entry for it - it will be combined with the object
7858         // it is pointing to into a single PTR_AND_OBJ entry.
7859         bool IsMemberPointerOrAddr =
7860             EncounteredME &&
7861             (((IsPointer || ForDeviceAddr) &&
7862               I->getAssociatedExpression() == EncounteredME) ||
7863              (IsPrevMemberReference && !IsPointer) ||
7864              (IsMemberReference && Next != CE &&
7865               !Next->getAssociatedExpression()->getType()->isPointerType()));
7866         if (!OverlappedElements.empty() && Next == CE) {
7867           // Handle base element with the info for overlapped elements.
7868           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7869           assert(!IsPointer &&
7870                  "Unexpected base element with the pointer type.");
7871           // Mark the whole struct as the struct that requires allocation on the
7872           // device.
7873           PartialStruct.LowestElem = {0, LowestElem};
7874           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7875               I->getAssociatedExpression()->getType());
7876           Address HB = CGF.Builder.CreateConstGEP(
7877               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
7878                                                               CGF.VoidPtrTy),
7879               TypeSize.getQuantity() - 1);
7880           PartialStruct.HighestElem = {
7881               std::numeric_limits<decltype(
7882                   PartialStruct.HighestElem.first)>::max(),
7883               HB};
7884           PartialStruct.Base = BP;
7885           PartialStruct.LB = LB;
7886           assert(
7887               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7888               "Overlapped elements must be used only once for the variable.");
7889           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7890           // Emit data for non-overlapped data.
7891           OpenMPOffloadMappingFlags Flags =
7892               OMP_MAP_MEMBER_OF |
7893               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7894                              /*AddPtrFlag=*/false,
7895                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7896           llvm::Value *Size = nullptr;
7897           // Do bitcopy of all non-overlapped structure elements.
7898           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7899                    Component : OverlappedElements) {
7900             Address ComponentLB = Address::invalid();
7901             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7902                  Component) {
7903               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7904                 const auto *FD = dyn_cast<FieldDecl>(VD);
7905                 if (FD && FD->getType()->isLValueReferenceType()) {
7906                   const auto *ME =
7907                       cast<MemberExpr>(MC.getAssociatedExpression());
7908                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7909                   ComponentLB =
7910                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7911                           .getAddress(CGF);
7912                 } else {
7913                   ComponentLB =
7914                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7915                           .getAddress(CGF);
7916                 }
7917                 Size = CGF.Builder.CreatePtrDiff(
7918                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7919                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7920                 break;
7921               }
7922             }
7923             assert(Size && "Failed to determine structure size");
7924             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7925             CombinedInfo.BasePointers.push_back(BP.getPointer());
7926             CombinedInfo.Pointers.push_back(LB.getPointer());
7927             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7928                 Size, CGF.Int64Ty, /*isSigned=*/true));
7929             CombinedInfo.Types.push_back(Flags);
7930             CombinedInfo.Mappers.push_back(nullptr);
7931             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7932                                                                       : 1);
7933             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7934           }
7935           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7936           CombinedInfo.BasePointers.push_back(BP.getPointer());
7937           CombinedInfo.Pointers.push_back(LB.getPointer());
7938           Size = CGF.Builder.CreatePtrDiff(
7939               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7940               CGF.EmitCastToVoidPtr(LB.getPointer()));
7941           CombinedInfo.Sizes.push_back(
7942               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7943           CombinedInfo.Types.push_back(Flags);
7944           CombinedInfo.Mappers.push_back(nullptr);
7945           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7946                                                                     : 1);
7947           break;
7948         }
7949         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7950         if (!IsMemberPointerOrAddr ||
7951             (Next == CE && MapType != OMPC_MAP_unknown)) {
7952           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7953           CombinedInfo.BasePointers.push_back(BP.getPointer());
7954           CombinedInfo.Pointers.push_back(LB.getPointer());
7955           CombinedInfo.Sizes.push_back(
7956               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7957           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7958                                                                     : 1);
7959 
7960           // If Mapper is valid, the last component inherits the mapper.
7961           bool HasMapper = Mapper && Next == CE;
7962           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7963 
7964           // We need to add a pointer flag for each map that comes from the
7965           // same expression except for the first one. We also need to signal
7966           // this map is the first one that relates with the current capture
7967           // (there is a set of entries for each capture).
7968           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7969               MapType, MapModifiers, MotionModifiers, IsImplicit,
7970               !IsExpressionFirstInfo || RequiresReference ||
7971                   FirstPointerInComplexData || IsMemberReference,
7972               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7973 
7974           if (!IsExpressionFirstInfo || IsMemberReference) {
7975             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7976             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7977             if (IsPointer || (IsMemberReference && Next != CE))
7978               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7979                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7980 
7981             if (ShouldBeMemberOf) {
7982               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7983               // should be later updated with the correct value of MEMBER_OF.
7984               Flags |= OMP_MAP_MEMBER_OF;
7985               // From now on, all subsequent PTR_AND_OBJ entries should not be
7986               // marked as MEMBER_OF.
7987               ShouldBeMemberOf = false;
7988             }
7989           }
7990 
7991           CombinedInfo.Types.push_back(Flags);
7992         }
7993 
7994         // If we have encountered a member expression so far, keep track of the
7995         // mapped member. If the parent is "*this", then the value declaration
7996         // is nullptr.
7997         if (EncounteredME) {
7998           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7999           unsigned FieldIndex = FD->getFieldIndex();
8000 
8001           // Update info about the lowest and highest elements for this struct
8002           if (!PartialStruct.Base.isValid()) {
8003             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8004             if (IsFinalArraySection) {
8005               Address HB =
8006                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8007                       .getAddress(CGF);
8008               PartialStruct.HighestElem = {FieldIndex, HB};
8009             } else {
8010               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8011             }
8012             PartialStruct.Base = BP;
8013             PartialStruct.LB = BP;
8014           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8015             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8016           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8017             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8018           }
8019         }
8020 
8021         // Need to emit combined struct for array sections.
8022         if (IsFinalArraySection || IsNonContiguous)
8023           PartialStruct.IsArraySection = true;
8024 
8025         // If we have a final array section, we are done with this expression.
8026         if (IsFinalArraySection)
8027           break;
8028 
8029         // The pointer becomes the base for the next element.
8030         if (Next != CE)
8031           BP = IsMemberReference ? LowestElem : LB;
8032 
8033         IsExpressionFirstInfo = false;
8034         IsCaptureFirstInfo = false;
8035         FirstPointerInComplexData = false;
8036         IsPrevMemberReference = IsMemberReference;
8037       } else if (FirstPointerInComplexData) {
8038         QualType Ty = Components.rbegin()
8039                           ->getAssociatedDeclaration()
8040                           ->getType()
8041                           .getNonReferenceType();
8042         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8043         FirstPointerInComplexData = false;
8044       }
8045     }
8046     // If ran into the whole component - allocate the space for the whole
8047     // record.
8048     if (!EncounteredME)
8049       PartialStruct.HasCompleteRecord = true;
8050 
8051     if (!IsNonContiguous)
8052       return;
8053 
8054     const ASTContext &Context = CGF.getContext();
8055 
8056     // For supporting stride in array section, we need to initialize the first
8057     // dimension size as 1, first offset as 0, and first count as 1
8058     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8059     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8060     MapValuesArrayTy CurStrides;
8061     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8062     uint64_t ElementTypeSize;
8063 
8064     // Collect Size information for each dimension and get the element size as
8065     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8066     // should be [10, 10] and the first stride is 4 btyes.
8067     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8068          Components) {
8069       const Expr *AssocExpr = Component.getAssociatedExpression();
8070       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8071 
8072       if (!OASE)
8073         continue;
8074 
8075       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8076       auto *CAT = Context.getAsConstantArrayType(Ty);
8077       auto *VAT = Context.getAsVariableArrayType(Ty);
8078 
8079       // We need all the dimension size except for the last dimension.
8080       assert((VAT || CAT || &Component == &*Components.begin()) &&
8081              "Should be either ConstantArray or VariableArray if not the "
8082              "first Component");
8083 
8084       // Get element size if CurStrides is empty.
8085       if (CurStrides.empty()) {
8086         const Type *ElementType = nullptr;
8087         if (CAT)
8088           ElementType = CAT->getElementType().getTypePtr();
8089         else if (VAT)
8090           ElementType = VAT->getElementType().getTypePtr();
8091         else
8092           assert(&Component == &*Components.begin() &&
8093                  "Only expect pointer (non CAT or VAT) when this is the "
8094                  "first Component");
8095         // If ElementType is null, then it means the base is a pointer
8096         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8097         // for next iteration.
8098         if (ElementType) {
8099           // For the case that having pointer as base, we need to remove one
8100           // level of indirection.
8101           if (&Component != &*Components.begin())
8102             ElementType = ElementType->getPointeeOrArrayElementType();
8103           ElementTypeSize =
8104               Context.getTypeSizeInChars(ElementType).getQuantity();
8105           CurStrides.push_back(
8106               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8107         }
8108       }
8109       // Get dimension value except for the last dimension since we don't need
8110       // it.
8111       if (DimSizes.size() < Components.size() - 1) {
8112         if (CAT)
8113           DimSizes.push_back(llvm::ConstantInt::get(
8114               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8115         else if (VAT)
8116           DimSizes.push_back(CGF.Builder.CreateIntCast(
8117               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8118               /*IsSigned=*/false));
8119       }
8120     }
8121 
8122     // Skip the dummy dimension since we have already have its information.
8123     auto DI = DimSizes.begin() + 1;
8124     // Product of dimension.
8125     llvm::Value *DimProd =
8126         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8127 
8128     // Collect info for non-contiguous. Notice that offset, count, and stride
8129     // are only meaningful for array-section, so we insert a null for anything
8130     // other than array-section.
8131     // Also, the size of offset, count, and stride are not the same as
8132     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8133     // count, and stride are the same as the number of non-contiguous
8134     // declaration in target update to/from clause.
8135     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8136          Components) {
8137       const Expr *AssocExpr = Component.getAssociatedExpression();
8138 
8139       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8140         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8141             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8142             /*isSigned=*/false);
8143         CurOffsets.push_back(Offset);
8144         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8145         CurStrides.push_back(CurStrides.back());
8146         continue;
8147       }
8148 
8149       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8150 
8151       if (!OASE)
8152         continue;
8153 
8154       // Offset
8155       const Expr *OffsetExpr = OASE->getLowerBound();
8156       llvm::Value *Offset = nullptr;
8157       if (!OffsetExpr) {
8158         // If offset is absent, then we just set it to zero.
8159         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8160       } else {
8161         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8162                                            CGF.Int64Ty,
8163                                            /*isSigned=*/false);
8164       }
8165       CurOffsets.push_back(Offset);
8166 
8167       // Count
8168       const Expr *CountExpr = OASE->getLength();
8169       llvm::Value *Count = nullptr;
8170       if (!CountExpr) {
8171         // In Clang, once a high dimension is an array section, we construct all
8172         // the lower dimension as array section, however, for case like
8173         // arr[0:2][2], Clang construct the inner dimension as an array section
8174         // but it actually is not in an array section form according to spec.
8175         if (!OASE->getColonLocFirst().isValid() &&
8176             !OASE->getColonLocSecond().isValid()) {
8177           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8178         } else {
8179           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8180           // When the length is absent it defaults to ⌈(size −
8181           // lower-bound)/stride⌉, where size is the size of the array
8182           // dimension.
8183           const Expr *StrideExpr = OASE->getStride();
8184           llvm::Value *Stride =
8185               StrideExpr
8186                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8187                                               CGF.Int64Ty, /*isSigned=*/false)
8188                   : nullptr;
8189           if (Stride)
8190             Count = CGF.Builder.CreateUDiv(
8191                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8192           else
8193             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8194         }
8195       } else {
8196         Count = CGF.EmitScalarExpr(CountExpr);
8197       }
8198       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8199       CurCounts.push_back(Count);
8200 
8201       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8202       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8203       //              Offset      Count     Stride
8204       //    D0          0           1         4    (int)    <- dummy dimension
8205       //    D1          0           2         8    (2 * (1) * 4)
8206       //    D2          1           2         20   (1 * (1 * 5) * 4)
8207       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8208       const Expr *StrideExpr = OASE->getStride();
8209       llvm::Value *Stride =
8210           StrideExpr
8211               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8212                                           CGF.Int64Ty, /*isSigned=*/false)
8213               : nullptr;
8214       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8215       if (Stride)
8216         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8217       else
8218         CurStrides.push_back(DimProd);
8219       if (DI != DimSizes.end())
8220         ++DI;
8221     }
8222 
8223     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8224     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8225     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8226   }
8227 
8228   /// Return the adjusted map modifiers if the declaration a capture refers to
8229   /// appears in a first-private clause. This is expected to be used only with
8230   /// directives that start with 'target'.
8231   MappableExprsHandler::OpenMPOffloadMappingFlags
8232   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8233     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8234 
8235     // A first private variable captured by reference will use only the
8236     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8237     // declaration is known as first-private in this handler.
8238     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8239       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8240         return MappableExprsHandler::OMP_MAP_TO |
8241                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8242       return MappableExprsHandler::OMP_MAP_PRIVATE |
8243              MappableExprsHandler::OMP_MAP_TO;
8244     }
8245     return MappableExprsHandler::OMP_MAP_TO |
8246            MappableExprsHandler::OMP_MAP_FROM;
8247   }
8248 
8249   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8250     // Rotate by getFlagMemberOffset() bits.
8251     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8252                                                   << getFlagMemberOffset());
8253   }
8254 
8255   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8256                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8257     // If the entry is PTR_AND_OBJ but has not been marked with the special
8258     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8259     // marked as MEMBER_OF.
8260     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8261         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8262       return;
8263 
8264     // Reset the placeholder value to prepare the flag for the assignment of the
8265     // proper MEMBER_OF value.
8266     Flags &= ~OMP_MAP_MEMBER_OF;
8267     Flags |= MemberOfFlag;
8268   }
8269 
8270   void getPlainLayout(const CXXRecordDecl *RD,
8271                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8272                       bool AsBase) const {
8273     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8274 
8275     llvm::StructType *St =
8276         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8277 
8278     unsigned NumElements = St->getNumElements();
8279     llvm::SmallVector<
8280         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8281         RecordLayout(NumElements);
8282 
8283     // Fill bases.
8284     for (const auto &I : RD->bases()) {
8285       if (I.isVirtual())
8286         continue;
8287       const auto *Base = I.getType()->getAsCXXRecordDecl();
8288       // Ignore empty bases.
8289       if (Base->isEmpty() || CGF.getContext()
8290                                  .getASTRecordLayout(Base)
8291                                  .getNonVirtualSize()
8292                                  .isZero())
8293         continue;
8294 
8295       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8296       RecordLayout[FieldIndex] = Base;
8297     }
8298     // Fill in virtual bases.
8299     for (const auto &I : RD->vbases()) {
8300       const auto *Base = I.getType()->getAsCXXRecordDecl();
8301       // Ignore empty bases.
8302       if (Base->isEmpty())
8303         continue;
8304       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8305       if (RecordLayout[FieldIndex])
8306         continue;
8307       RecordLayout[FieldIndex] = Base;
8308     }
8309     // Fill in all the fields.
8310     assert(!RD->isUnion() && "Unexpected union.");
8311     for (const auto *Field : RD->fields()) {
8312       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8313       // will fill in later.)
8314       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8315         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8316         RecordLayout[FieldIndex] = Field;
8317       }
8318     }
8319     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8320              &Data : RecordLayout) {
8321       if (Data.isNull())
8322         continue;
8323       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8324         getPlainLayout(Base, Layout, /*AsBase=*/true);
8325       else
8326         Layout.push_back(Data.get<const FieldDecl *>());
8327     }
8328   }
8329 
8330   /// Generate all the base pointers, section pointers, sizes, map types, and
8331   /// mappers for the extracted mappable expressions (all included in \a
8332   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8333   /// pair of the relevant declaration and index where it occurs is appended to
8334   /// the device pointers info array.
8335   void generateAllInfoForClauses(
8336       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8337       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8338           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8339     // We have to process the component lists that relate with the same
8340     // declaration in a single chunk so that we can generate the map flags
8341     // correctly. Therefore, we organize all lists in a map.
8342     enum MapKind { Present, Allocs, Other, Total };
8343     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8344                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8345         Info;
8346 
8347     // Helper function to fill the information map for the different supported
8348     // clauses.
8349     auto &&InfoGen =
8350         [&Info, &SkipVarSet](
8351             const ValueDecl *D, MapKind Kind,
8352             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8353             OpenMPMapClauseKind MapType,
8354             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8355             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8356             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8357             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8358           if (SkipVarSet.contains(D))
8359             return;
8360           auto It = Info.find(D);
8361           if (It == Info.end())
8362             It = Info
8363                      .insert(std::make_pair(
8364                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8365                      .first;
8366           It->second[Kind].emplace_back(
8367               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8368               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8369         };
8370 
8371     for (const auto *Cl : Clauses) {
8372       const auto *C = dyn_cast<OMPMapClause>(Cl);
8373       if (!C)
8374         continue;
8375       MapKind Kind = Other;
8376       if (!C->getMapTypeModifiers().empty() &&
8377           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8378             return K == OMPC_MAP_MODIFIER_present;
8379           }))
8380         Kind = Present;
8381       else if (C->getMapType() == OMPC_MAP_alloc)
8382         Kind = Allocs;
8383       const auto *EI = C->getVarRefs().begin();
8384       for (const auto L : C->component_lists()) {
8385         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8386         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8387                 C->getMapTypeModifiers(), llvm::None,
8388                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8389                 E);
8390         ++EI;
8391       }
8392     }
8393     for (const auto *Cl : Clauses) {
8394       const auto *C = dyn_cast<OMPToClause>(Cl);
8395       if (!C)
8396         continue;
8397       MapKind Kind = Other;
8398       if (!C->getMotionModifiers().empty() &&
8399           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8400             return K == OMPC_MOTION_MODIFIER_present;
8401           }))
8402         Kind = Present;
8403       const auto *EI = C->getVarRefs().begin();
8404       for (const auto L : C->component_lists()) {
8405         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8406                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8407                 C->isImplicit(), std::get<2>(L), *EI);
8408         ++EI;
8409       }
8410     }
8411     for (const auto *Cl : Clauses) {
8412       const auto *C = dyn_cast<OMPFromClause>(Cl);
8413       if (!C)
8414         continue;
8415       MapKind Kind = Other;
8416       if (!C->getMotionModifiers().empty() &&
8417           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8418             return K == OMPC_MOTION_MODIFIER_present;
8419           }))
8420         Kind = Present;
8421       const auto *EI = C->getVarRefs().begin();
8422       for (const auto L : C->component_lists()) {
8423         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8424                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8425                 C->isImplicit(), std::get<2>(L), *EI);
8426         ++EI;
8427       }
8428     }
8429 
8430     // Look at the use_device_ptr clause information and mark the existing map
8431     // entries as such. If there is no map information for an entry in the
8432     // use_device_ptr list, we create one with map type 'alloc' and zero size
8433     // section. It is the user fault if that was not mapped before. If there is
8434     // no map information and the pointer is a struct member, then we defer the
8435     // emission of that entry until the whole struct has been processed.
8436     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8437                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8438         DeferredInfo;
8439     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8440 
8441     for (const auto *Cl : Clauses) {
8442       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8443       if (!C)
8444         continue;
8445       for (const auto L : C->component_lists()) {
8446         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8447             std::get<1>(L);
8448         assert(!Components.empty() &&
8449                "Not expecting empty list of components!");
8450         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8451         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8452         const Expr *IE = Components.back().getAssociatedExpression();
8453         // If the first component is a member expression, we have to look into
8454         // 'this', which maps to null in the map of map information. Otherwise
8455         // look directly for the information.
8456         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8457 
8458         // We potentially have map information for this declaration already.
8459         // Look for the first set of components that refer to it.
8460         if (It != Info.end()) {
8461           bool Found = false;
8462           for (auto &Data : It->second) {
8463             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8464               return MI.Components.back().getAssociatedDeclaration() == VD;
8465             });
8466             // If we found a map entry, signal that the pointer has to be
8467             // returned and move on to the next declaration. Exclude cases where
8468             // the base pointer is mapped as array subscript, array section or
8469             // array shaping. The base address is passed as a pointer to base in
8470             // this case and cannot be used as a base for use_device_ptr list
8471             // item.
8472             if (CI != Data.end()) {
8473               auto PrevCI = std::next(CI->Components.rbegin());
8474               const auto *VarD = dyn_cast<VarDecl>(VD);
8475               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8476                   isa<MemberExpr>(IE) ||
8477                   !VD->getType().getNonReferenceType()->isPointerType() ||
8478                   PrevCI == CI->Components.rend() ||
8479                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8480                   VarD->hasLocalStorage()) {
8481                 CI->ReturnDevicePointer = true;
8482                 Found = true;
8483                 break;
8484               }
8485             }
8486           }
8487           if (Found)
8488             continue;
8489         }
8490 
8491         // We didn't find any match in our map information - generate a zero
8492         // size array section - if the pointer is a struct member we defer this
8493         // action until the whole struct has been processed.
8494         if (isa<MemberExpr>(IE)) {
8495           // Insert the pointer into Info to be processed by
8496           // generateInfoForComponentList. Because it is a member pointer
8497           // without a pointee, no entry will be generated for it, therefore
8498           // we need to generate one after the whole struct has been processed.
8499           // Nonetheless, generateInfoForComponentList must be called to take
8500           // the pointer into account for the calculation of the range of the
8501           // partial struct.
8502           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8503                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8504                   nullptr);
8505           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8506         } else {
8507           llvm::Value *Ptr =
8508               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8509           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8510           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8511           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8512           UseDevicePtrCombinedInfo.Sizes.push_back(
8513               llvm::Constant::getNullValue(CGF.Int64Ty));
8514           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8515           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8516         }
8517       }
8518     }
8519 
8520     // Look at the use_device_addr clause information and mark the existing map
8521     // entries as such. If there is no map information for an entry in the
8522     // use_device_addr list, we create one with map type 'alloc' and zero size
8523     // section. It is the user fault if that was not mapped before. If there is
8524     // no map information and the pointer is a struct member, then we defer the
8525     // emission of that entry until the whole struct has been processed.
8526     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8527     for (const auto *Cl : Clauses) {
8528       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8529       if (!C)
8530         continue;
8531       for (const auto L : C->component_lists()) {
8532         assert(!std::get<1>(L).empty() &&
8533                "Not expecting empty list of components!");
8534         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8535         if (!Processed.insert(VD).second)
8536           continue;
8537         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8538         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8539         // If the first component is a member expression, we have to look into
8540         // 'this', which maps to null in the map of map information. Otherwise
8541         // look directly for the information.
8542         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8543 
8544         // We potentially have map information for this declaration already.
8545         // Look for the first set of components that refer to it.
8546         if (It != Info.end()) {
8547           bool Found = false;
8548           for (auto &Data : It->second) {
8549             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8550               return MI.Components.back().getAssociatedDeclaration() == VD;
8551             });
8552             // If we found a map entry, signal that the pointer has to be
8553             // returned and move on to the next declaration.
8554             if (CI != Data.end()) {
8555               CI->ReturnDevicePointer = true;
8556               Found = true;
8557               break;
8558             }
8559           }
8560           if (Found)
8561             continue;
8562         }
8563 
8564         // We didn't find any match in our map information - generate a zero
8565         // size array section - if the pointer is a struct member we defer this
8566         // action until the whole struct has been processed.
8567         if (isa<MemberExpr>(IE)) {
8568           // Insert the pointer into Info to be processed by
8569           // generateInfoForComponentList. Because it is a member pointer
8570           // without a pointee, no entry will be generated for it, therefore
8571           // we need to generate one after the whole struct has been processed.
8572           // Nonetheless, generateInfoForComponentList must be called to take
8573           // the pointer into account for the calculation of the range of the
8574           // partial struct.
8575           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8576                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8577                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8578           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8579         } else {
8580           llvm::Value *Ptr;
8581           if (IE->isGLValue())
8582             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8583           else
8584             Ptr = CGF.EmitScalarExpr(IE);
8585           CombinedInfo.Exprs.push_back(VD);
8586           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8587           CombinedInfo.Pointers.push_back(Ptr);
8588           CombinedInfo.Sizes.push_back(
8589               llvm::Constant::getNullValue(CGF.Int64Ty));
8590           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8591           CombinedInfo.Mappers.push_back(nullptr);
8592         }
8593       }
8594     }
8595 
8596     for (const auto &Data : Info) {
8597       StructRangeInfoTy PartialStruct;
8598       // Temporary generated information.
8599       MapCombinedInfoTy CurInfo;
8600       const Decl *D = Data.first;
8601       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8602       for (const auto &M : Data.second) {
8603         for (const MapInfo &L : M) {
8604           assert(!L.Components.empty() &&
8605                  "Not expecting declaration with no component lists.");
8606 
8607           // Remember the current base pointer index.
8608           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8609           CurInfo.NonContigInfo.IsNonContiguous =
8610               L.Components.back().isNonContiguous();
8611           generateInfoForComponentList(
8612               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8613               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8614               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8615 
8616           // If this entry relates with a device pointer, set the relevant
8617           // declaration and add the 'return pointer' flag.
8618           if (L.ReturnDevicePointer) {
8619             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8620                    "Unexpected number of mapped base pointers.");
8621 
8622             const ValueDecl *RelevantVD =
8623                 L.Components.back().getAssociatedDeclaration();
8624             assert(RelevantVD &&
8625                    "No relevant declaration related with device pointer??");
8626 
8627             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8628                 RelevantVD);
8629             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8630           }
8631         }
8632       }
8633 
8634       // Append any pending zero-length pointers which are struct members and
8635       // used with use_device_ptr or use_device_addr.
8636       auto CI = DeferredInfo.find(Data.first);
8637       if (CI != DeferredInfo.end()) {
8638         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8639           llvm::Value *BasePtr;
8640           llvm::Value *Ptr;
8641           if (L.ForDeviceAddr) {
8642             if (L.IE->isGLValue())
8643               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8644             else
8645               Ptr = this->CGF.EmitScalarExpr(L.IE);
8646             BasePtr = Ptr;
8647             // Entry is RETURN_PARAM. Also, set the placeholder value
8648             // MEMBER_OF=FFFF so that the entry is later updated with the
8649             // correct value of MEMBER_OF.
8650             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8651           } else {
8652             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8653             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8654                                              L.IE->getExprLoc());
8655             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8656             // placeholder value MEMBER_OF=FFFF so that the entry is later
8657             // updated with the correct value of MEMBER_OF.
8658             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8659                                     OMP_MAP_MEMBER_OF);
8660           }
8661           CurInfo.Exprs.push_back(L.VD);
8662           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8663           CurInfo.Pointers.push_back(Ptr);
8664           CurInfo.Sizes.push_back(
8665               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8666           CurInfo.Mappers.push_back(nullptr);
8667         }
8668       }
8669       // If there is an entry in PartialStruct it means we have a struct with
8670       // individual members mapped. Emit an extra combined entry.
8671       if (PartialStruct.Base.isValid()) {
8672         CurInfo.NonContigInfo.Dims.push_back(0);
8673         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8674       }
8675 
8676       // We need to append the results of this capture to what we already
8677       // have.
8678       CombinedInfo.append(CurInfo);
8679     }
8680     // Append data for use_device_ptr clauses.
8681     CombinedInfo.append(UseDevicePtrCombinedInfo);
8682   }
8683 
8684 public:
8685   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8686       : CurDir(&Dir), CGF(CGF) {
8687     // Extract firstprivate clause information.
8688     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8689       for (const auto *D : C->varlists())
8690         FirstPrivateDecls.try_emplace(
8691             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8692     // Extract implicit firstprivates from uses_allocators clauses.
8693     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8694       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8695         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8696         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8697           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8698                                         /*Implicit=*/true);
8699         else if (const auto *VD = dyn_cast<VarDecl>(
8700                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8701                          ->getDecl()))
8702           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8703       }
8704     }
8705     // Extract device pointer clause information.
8706     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8707       for (auto L : C->component_lists())
8708         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8709   }
8710 
8711   /// Constructor for the declare mapper directive.
8712   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8713       : CurDir(&Dir), CGF(CGF) {}
8714 
8715   /// Generate code for the combined entry if we have a partially mapped struct
8716   /// and take care of the mapping flags of the arguments corresponding to
8717   /// individual struct members.
8718   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8719                          MapFlagsArrayTy &CurTypes,
8720                          const StructRangeInfoTy &PartialStruct,
8721                          const ValueDecl *VD = nullptr,
8722                          bool NotTargetParams = true) const {
8723     if (CurTypes.size() == 1 &&
8724         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8725         !PartialStruct.IsArraySection)
8726       return;
8727     Address LBAddr = PartialStruct.LowestElem.second;
8728     Address HBAddr = PartialStruct.HighestElem.second;
8729     if (PartialStruct.HasCompleteRecord) {
8730       LBAddr = PartialStruct.LB;
8731       HBAddr = PartialStruct.LB;
8732     }
8733     CombinedInfo.Exprs.push_back(VD);
8734     // Base is the base of the struct
8735     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8736     // Pointer is the address of the lowest element
8737     llvm::Value *LB = LBAddr.getPointer();
8738     CombinedInfo.Pointers.push_back(LB);
8739     // There should not be a mapper for a combined entry.
8740     CombinedInfo.Mappers.push_back(nullptr);
8741     // Size is (addr of {highest+1} element) - (addr of lowest element)
8742     llvm::Value *HB = HBAddr.getPointer();
8743     llvm::Value *HAddr =
8744         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8745     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8746     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8747     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8748     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8749                                                   /*isSigned=*/false);
8750     CombinedInfo.Sizes.push_back(Size);
8751     // Map type is always TARGET_PARAM, if generate info for captures.
8752     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8753                                                  : OMP_MAP_TARGET_PARAM);
8754     // If any element has the present modifier, then make sure the runtime
8755     // doesn't attempt to allocate the struct.
8756     if (CurTypes.end() !=
8757         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8758           return Type & OMP_MAP_PRESENT;
8759         }))
8760       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8761     // Remove TARGET_PARAM flag from the first element
8762     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8763 
8764     // All other current entries will be MEMBER_OF the combined entry
8765     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8766     // 0xFFFF in the MEMBER_OF field).
8767     OpenMPOffloadMappingFlags MemberOfFlag =
8768         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8769     for (auto &M : CurTypes)
8770       setCorrectMemberOfFlag(M, MemberOfFlag);
8771   }
8772 
8773   /// Generate all the base pointers, section pointers, sizes, map types, and
8774   /// mappers for the extracted mappable expressions (all included in \a
8775   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8776   /// pair of the relevant declaration and index where it occurs is appended to
8777   /// the device pointers info array.
8778   void generateAllInfo(
8779       MapCombinedInfoTy &CombinedInfo,
8780       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8781           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8782     assert(CurDir.is<const OMPExecutableDirective *>() &&
8783            "Expect a executable directive");
8784     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8785     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8786   }
8787 
8788   /// Generate all the base pointers, section pointers, sizes, map types, and
8789   /// mappers for the extracted map clauses of user-defined mapper (all included
8790   /// in \a CombinedInfo).
8791   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8792     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8793            "Expect a declare mapper directive");
8794     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8795     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8796   }
8797 
8798   /// Emit capture info for lambdas for variables captured by reference.
8799   void generateInfoForLambdaCaptures(
8800       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8801       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8802     const auto *RD = VD->getType()
8803                          .getCanonicalType()
8804                          .getNonReferenceType()
8805                          ->getAsCXXRecordDecl();
8806     if (!RD || !RD->isLambda())
8807       return;
8808     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8809     LValue VDLVal = CGF.MakeAddrLValue(
8810         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8811     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8812     FieldDecl *ThisCapture = nullptr;
8813     RD->getCaptureFields(Captures, ThisCapture);
8814     if (ThisCapture) {
8815       LValue ThisLVal =
8816           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8817       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8818       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8819                                  VDLVal.getPointer(CGF));
8820       CombinedInfo.Exprs.push_back(VD);
8821       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8822       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8823       CombinedInfo.Sizes.push_back(
8824           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8825                                     CGF.Int64Ty, /*isSigned=*/true));
8826       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8827                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8828       CombinedInfo.Mappers.push_back(nullptr);
8829     }
8830     for (const LambdaCapture &LC : RD->captures()) {
8831       if (!LC.capturesVariable())
8832         continue;
8833       const VarDecl *VD = LC.getCapturedVar();
8834       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8835         continue;
8836       auto It = Captures.find(VD);
8837       assert(It != Captures.end() && "Found lambda capture without field.");
8838       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8839       if (LC.getCaptureKind() == LCK_ByRef) {
8840         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8841         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8842                                    VDLVal.getPointer(CGF));
8843         CombinedInfo.Exprs.push_back(VD);
8844         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8845         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8846         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8847             CGF.getTypeSize(
8848                 VD->getType().getCanonicalType().getNonReferenceType()),
8849             CGF.Int64Ty, /*isSigned=*/true));
8850       } else {
8851         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8852         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8853                                    VDLVal.getPointer(CGF));
8854         CombinedInfo.Exprs.push_back(VD);
8855         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8856         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8857         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8858       }
8859       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8860                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8861       CombinedInfo.Mappers.push_back(nullptr);
8862     }
8863   }
8864 
8865   /// Set correct indices for lambdas captures.
8866   void adjustMemberOfForLambdaCaptures(
8867       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8868       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8869       MapFlagsArrayTy &Types) const {
8870     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8871       // Set correct member_of idx for all implicit lambda captures.
8872       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8873                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8874         continue;
8875       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8876       assert(BasePtr && "Unable to find base lambda address.");
8877       int TgtIdx = -1;
8878       for (unsigned J = I; J > 0; --J) {
8879         unsigned Idx = J - 1;
8880         if (Pointers[Idx] != BasePtr)
8881           continue;
8882         TgtIdx = Idx;
8883         break;
8884       }
8885       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8886       // All other current entries will be MEMBER_OF the combined entry
8887       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8888       // 0xFFFF in the MEMBER_OF field).
8889       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8890       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8891     }
8892   }
8893 
8894   /// Generate the base pointers, section pointers, sizes, map types, and
8895   /// mappers associated to a given capture (all included in \a CombinedInfo).
8896   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8897                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8898                               StructRangeInfoTy &PartialStruct) const {
8899     assert(!Cap->capturesVariableArrayType() &&
8900            "Not expecting to generate map info for a variable array type!");
8901 
8902     // We need to know when we generating information for the first component
8903     const ValueDecl *VD = Cap->capturesThis()
8904                               ? nullptr
8905                               : Cap->getCapturedVar()->getCanonicalDecl();
8906 
8907     // If this declaration appears in a is_device_ptr clause we just have to
8908     // pass the pointer by value. If it is a reference to a declaration, we just
8909     // pass its value.
8910     if (DevPointersMap.count(VD)) {
8911       CombinedInfo.Exprs.push_back(VD);
8912       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8913       CombinedInfo.Pointers.push_back(Arg);
8914       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8915           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8916           /*isSigned=*/true));
8917       CombinedInfo.Types.push_back(
8918           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8919           OMP_MAP_TARGET_PARAM);
8920       CombinedInfo.Mappers.push_back(nullptr);
8921       return;
8922     }
8923 
8924     using MapData =
8925         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8926                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8927                    const ValueDecl *, const Expr *>;
8928     SmallVector<MapData, 4> DeclComponentLists;
8929     assert(CurDir.is<const OMPExecutableDirective *>() &&
8930            "Expect a executable directive");
8931     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8932     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8933       const auto *EI = C->getVarRefs().begin();
8934       for (const auto L : C->decl_component_lists(VD)) {
8935         const ValueDecl *VDecl, *Mapper;
8936         // The Expression is not correct if the mapping is implicit
8937         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8938         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8939         std::tie(VDecl, Components, Mapper) = L;
8940         assert(VDecl == VD && "We got information for the wrong declaration??");
8941         assert(!Components.empty() &&
8942                "Not expecting declaration with no component lists.");
8943         DeclComponentLists.emplace_back(Components, C->getMapType(),
8944                                         C->getMapTypeModifiers(),
8945                                         C->isImplicit(), Mapper, E);
8946         ++EI;
8947       }
8948     }
8949     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8950                                              const MapData &RHS) {
8951       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8952       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8953       bool HasPresent = !MapModifiers.empty() &&
8954                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8955                           return K == clang::OMPC_MAP_MODIFIER_present;
8956                         });
8957       bool HasAllocs = MapType == OMPC_MAP_alloc;
8958       MapModifiers = std::get<2>(RHS);
8959       MapType = std::get<1>(LHS);
8960       bool HasPresentR =
8961           !MapModifiers.empty() &&
8962           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8963             return K == clang::OMPC_MAP_MODIFIER_present;
8964           });
8965       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8966       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8967     });
8968 
8969     // Find overlapping elements (including the offset from the base element).
8970     llvm::SmallDenseMap<
8971         const MapData *,
8972         llvm::SmallVector<
8973             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8974         4>
8975         OverlappedData;
8976     size_t Count = 0;
8977     for (const MapData &L : DeclComponentLists) {
8978       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8979       OpenMPMapClauseKind MapType;
8980       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8981       bool IsImplicit;
8982       const ValueDecl *Mapper;
8983       const Expr *VarRef;
8984       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8985           L;
8986       ++Count;
8987       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8988         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8989         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8990                  VarRef) = L1;
8991         auto CI = Components.rbegin();
8992         auto CE = Components.rend();
8993         auto SI = Components1.rbegin();
8994         auto SE = Components1.rend();
8995         for (; CI != CE && SI != SE; ++CI, ++SI) {
8996           if (CI->getAssociatedExpression()->getStmtClass() !=
8997               SI->getAssociatedExpression()->getStmtClass())
8998             break;
8999           // Are we dealing with different variables/fields?
9000           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9001             break;
9002         }
9003         // Found overlapping if, at least for one component, reached the head
9004         // of the components list.
9005         if (CI == CE || SI == SE) {
9006           // Ignore it if it is the same component.
9007           if (CI == CE && SI == SE)
9008             continue;
9009           const auto It = (SI == SE) ? CI : SI;
9010           // If one component is a pointer and another one is a kind of
9011           // dereference of this pointer (array subscript, section, dereference,
9012           // etc.), it is not an overlapping.
9013           // Same, if one component is a base and another component is a
9014           // dereferenced pointer memberexpr with the same base.
9015           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9016               (std::prev(It)->getAssociatedDeclaration() &&
9017                std::prev(It)
9018                    ->getAssociatedDeclaration()
9019                    ->getType()
9020                    ->isPointerType()) ||
9021               (It->getAssociatedDeclaration() &&
9022                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9023                std::next(It) != CE && std::next(It) != SE))
9024             continue;
9025           const MapData &BaseData = CI == CE ? L : L1;
9026           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9027               SI == SE ? Components : Components1;
9028           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9029           OverlappedElements.getSecond().push_back(SubData);
9030         }
9031       }
9032     }
9033     // Sort the overlapped elements for each item.
9034     llvm::SmallVector<const FieldDecl *, 4> Layout;
9035     if (!OverlappedData.empty()) {
9036       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9037       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9038       while (BaseType != OrigType) {
9039         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9040         OrigType = BaseType->getPointeeOrArrayElementType();
9041       }
9042 
9043       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9044         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9045       else {
9046         const auto *RD = BaseType->getAsRecordDecl();
9047         Layout.append(RD->field_begin(), RD->field_end());
9048       }
9049     }
9050     for (auto &Pair : OverlappedData) {
9051       llvm::stable_sort(
9052           Pair.getSecond(),
9053           [&Layout](
9054               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9055               OMPClauseMappableExprCommon::MappableExprComponentListRef
9056                   Second) {
9057             auto CI = First.rbegin();
9058             auto CE = First.rend();
9059             auto SI = Second.rbegin();
9060             auto SE = Second.rend();
9061             for (; CI != CE && SI != SE; ++CI, ++SI) {
9062               if (CI->getAssociatedExpression()->getStmtClass() !=
9063                   SI->getAssociatedExpression()->getStmtClass())
9064                 break;
9065               // Are we dealing with different variables/fields?
9066               if (CI->getAssociatedDeclaration() !=
9067                   SI->getAssociatedDeclaration())
9068                 break;
9069             }
9070 
9071             // Lists contain the same elements.
9072             if (CI == CE && SI == SE)
9073               return false;
9074 
9075             // List with less elements is less than list with more elements.
9076             if (CI == CE || SI == SE)
9077               return CI == CE;
9078 
9079             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9080             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9081             if (FD1->getParent() == FD2->getParent())
9082               return FD1->getFieldIndex() < FD2->getFieldIndex();
9083             const auto *It =
9084                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9085                   return FD == FD1 || FD == FD2;
9086                 });
9087             return *It == FD1;
9088           });
9089     }
9090 
9091     // Associated with a capture, because the mapping flags depend on it.
9092     // Go through all of the elements with the overlapped elements.
9093     bool IsFirstComponentList = true;
9094     for (const auto &Pair : OverlappedData) {
9095       const MapData &L = *Pair.getFirst();
9096       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9097       OpenMPMapClauseKind MapType;
9098       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9099       bool IsImplicit;
9100       const ValueDecl *Mapper;
9101       const Expr *VarRef;
9102       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9103           L;
9104       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9105           OverlappedComponents = Pair.getSecond();
9106       generateInfoForComponentList(
9107           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9108           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9109           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9110       IsFirstComponentList = false;
9111     }
9112     // Go through other elements without overlapped elements.
9113     for (const MapData &L : DeclComponentLists) {
9114       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9115       OpenMPMapClauseKind MapType;
9116       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9117       bool IsImplicit;
9118       const ValueDecl *Mapper;
9119       const Expr *VarRef;
9120       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9121           L;
9122       auto It = OverlappedData.find(&L);
9123       if (It == OverlappedData.end())
9124         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9125                                      Components, CombinedInfo, PartialStruct,
9126                                      IsFirstComponentList, IsImplicit, Mapper,
9127                                      /*ForDeviceAddr=*/false, VD, VarRef);
9128       IsFirstComponentList = false;
9129     }
9130   }
9131 
9132   /// Generate the default map information for a given capture \a CI,
9133   /// record field declaration \a RI and captured value \a CV.
9134   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9135                               const FieldDecl &RI, llvm::Value *CV,
9136                               MapCombinedInfoTy &CombinedInfo) const {
9137     bool IsImplicit = true;
9138     // Do the default mapping.
9139     if (CI.capturesThis()) {
9140       CombinedInfo.Exprs.push_back(nullptr);
9141       CombinedInfo.BasePointers.push_back(CV);
9142       CombinedInfo.Pointers.push_back(CV);
9143       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9144       CombinedInfo.Sizes.push_back(
9145           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9146                                     CGF.Int64Ty, /*isSigned=*/true));
9147       // Default map type.
9148       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9149     } else if (CI.capturesVariableByCopy()) {
9150       const VarDecl *VD = CI.getCapturedVar();
9151       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9152       CombinedInfo.BasePointers.push_back(CV);
9153       CombinedInfo.Pointers.push_back(CV);
9154       if (!RI.getType()->isAnyPointerType()) {
9155         // We have to signal to the runtime captures passed by value that are
9156         // not pointers.
9157         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9158         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9159             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9160       } else {
9161         // Pointers are implicitly mapped with a zero size and no flags
9162         // (other than first map that is added for all implicit maps).
9163         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9164         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9165       }
9166       auto I = FirstPrivateDecls.find(VD);
9167       if (I != FirstPrivateDecls.end())
9168         IsImplicit = I->getSecond();
9169     } else {
9170       assert(CI.capturesVariable() && "Expected captured reference.");
9171       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9172       QualType ElementType = PtrTy->getPointeeType();
9173       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9174           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9175       // The default map type for a scalar/complex type is 'to' because by
9176       // default the value doesn't have to be retrieved. For an aggregate
9177       // type, the default is 'tofrom'.
9178       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9179       const VarDecl *VD = CI.getCapturedVar();
9180       auto I = FirstPrivateDecls.find(VD);
9181       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9182       CombinedInfo.BasePointers.push_back(CV);
9183       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9184         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9185             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9186             AlignmentSource::Decl));
9187         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9188       } else {
9189         CombinedInfo.Pointers.push_back(CV);
9190       }
9191       if (I != FirstPrivateDecls.end())
9192         IsImplicit = I->getSecond();
9193     }
9194     // Every default map produces a single argument which is a target parameter.
9195     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9196 
9197     // Add flag stating this is an implicit map.
9198     if (IsImplicit)
9199       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9200 
9201     // No user-defined mapper for default mapping.
9202     CombinedInfo.Mappers.push_back(nullptr);
9203   }
9204 };
9205 } // anonymous namespace
9206 
9207 static void emitNonContiguousDescriptor(
9208     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9209     CGOpenMPRuntime::TargetDataInfo &Info) {
9210   CodeGenModule &CGM = CGF.CGM;
9211   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9212       &NonContigInfo = CombinedInfo.NonContigInfo;
9213 
9214   // Build an array of struct descriptor_dim and then assign it to
9215   // offload_args.
9216   //
9217   // struct descriptor_dim {
9218   //  uint64_t offset;
9219   //  uint64_t count;
9220   //  uint64_t stride
9221   // };
9222   ASTContext &C = CGF.getContext();
9223   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9224   RecordDecl *RD;
9225   RD = C.buildImplicitRecord("descriptor_dim");
9226   RD->startDefinition();
9227   addFieldToRecordDecl(C, RD, Int64Ty);
9228   addFieldToRecordDecl(C, RD, Int64Ty);
9229   addFieldToRecordDecl(C, RD, Int64Ty);
9230   RD->completeDefinition();
9231   QualType DimTy = C.getRecordType(RD);
9232 
9233   enum { OffsetFD = 0, CountFD, StrideFD };
9234   // We need two index variable here since the size of "Dims" is the same as the
9235   // size of Components, however, the size of offset, count, and stride is equal
9236   // to the size of base declaration that is non-contiguous.
9237   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9238     // Skip emitting ir if dimension size is 1 since it cannot be
9239     // non-contiguous.
9240     if (NonContigInfo.Dims[I] == 1)
9241       continue;
9242     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9243     QualType ArrayTy =
9244         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9245     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9246     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9247       unsigned RevIdx = EE - II - 1;
9248       LValue DimsLVal = CGF.MakeAddrLValue(
9249           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9250       // Offset
9251       LValue OffsetLVal = CGF.EmitLValueForField(
9252           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9253       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9254       // Count
9255       LValue CountLVal = CGF.EmitLValueForField(
9256           DimsLVal, *std::next(RD->field_begin(), CountFD));
9257       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9258       // Stride
9259       LValue StrideLVal = CGF.EmitLValueForField(
9260           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9261       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9262     }
9263     // args[I] = &dims
9264     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9265         DimsAddr, CGM.Int8PtrTy);
9266     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9267         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9268         Info.PointersArray, 0, I);
9269     Address PAddr(P, CGF.getPointerAlign());
9270     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9271     ++L;
9272   }
9273 }
9274 
9275 /// Emit a string constant containing the names of the values mapped to the
9276 /// offloading runtime library.
9277 llvm::Constant *
9278 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9279                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9280   llvm::Constant *SrcLocStr;
9281   if (!MapExprs.getMapDecl()) {
9282     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9283   } else {
9284     std::string ExprName = "";
9285     if (MapExprs.getMapExpr()) {
9286       PrintingPolicy P(CGF.getContext().getLangOpts());
9287       llvm::raw_string_ostream OS(ExprName);
9288       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9289       OS.flush();
9290     } else {
9291       ExprName = MapExprs.getMapDecl()->getNameAsString();
9292     }
9293 
9294     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9295     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9296     const char *FileName = PLoc.getFilename();
9297     unsigned Line = PLoc.getLine();
9298     unsigned Column = PLoc.getColumn();
9299     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9300                                                 Line, Column);
9301   }
9302   return SrcLocStr;
9303 }
9304 
9305 /// Emit the arrays used to pass the captures and map information to the
9306 /// offloading runtime library. If there is no map or capture information,
9307 /// return nullptr by reference.
9308 static void emitOffloadingArrays(
9309     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9310     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9311     bool IsNonContiguous = false) {
9312   CodeGenModule &CGM = CGF.CGM;
9313   ASTContext &Ctx = CGF.getContext();
9314 
9315   // Reset the array information.
9316   Info.clearArrayInfo();
9317   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9318 
9319   if (Info.NumberOfPtrs) {
9320     // Detect if we have any capture size requiring runtime evaluation of the
9321     // size so that a constant array could be eventually used.
9322     bool hasRuntimeEvaluationCaptureSize = false;
9323     for (llvm::Value *S : CombinedInfo.Sizes)
9324       if (!isa<llvm::Constant>(S)) {
9325         hasRuntimeEvaluationCaptureSize = true;
9326         break;
9327       }
9328 
9329     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9330     QualType PointerArrayType = Ctx.getConstantArrayType(
9331         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9332         /*IndexTypeQuals=*/0);
9333 
9334     Info.BasePointersArray =
9335         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9336     Info.PointersArray =
9337         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9338     Address MappersArray =
9339         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9340     Info.MappersArray = MappersArray.getPointer();
9341 
9342     // If we don't have any VLA types or other types that require runtime
9343     // evaluation, we can use a constant array for the map sizes, otherwise we
9344     // need to fill up the arrays as we do for the pointers.
9345     QualType Int64Ty =
9346         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9347     if (hasRuntimeEvaluationCaptureSize) {
9348       QualType SizeArrayType = Ctx.getConstantArrayType(
9349           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9350           /*IndexTypeQuals=*/0);
9351       Info.SizesArray =
9352           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9353     } else {
9354       // We expect all the sizes to be constant, so we collect them to create
9355       // a constant array.
9356       SmallVector<llvm::Constant *, 16> ConstSizes;
9357       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9358         if (IsNonContiguous &&
9359             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9360           ConstSizes.push_back(llvm::ConstantInt::get(
9361               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9362         } else {
9363           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9364         }
9365       }
9366 
9367       auto *SizesArrayInit = llvm::ConstantArray::get(
9368           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9369       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9370       auto *SizesArrayGbl = new llvm::GlobalVariable(
9371           CGM.getModule(), SizesArrayInit->getType(),
9372           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9373           SizesArrayInit, Name);
9374       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9375       Info.SizesArray = SizesArrayGbl;
9376     }
9377 
9378     // The map types are always constant so we don't need to generate code to
9379     // fill arrays. Instead, we create an array constant.
9380     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9381     llvm::copy(CombinedInfo.Types, Mapping.begin());
9382     std::string MaptypesName =
9383         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9384     auto *MapTypesArrayGbl =
9385         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9386     Info.MapTypesArray = MapTypesArrayGbl;
9387 
9388     // The information types are only built if there is debug information
9389     // requested.
9390     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9391       Info.MapNamesArray = llvm::Constant::getNullValue(
9392           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9393     } else {
9394       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9395         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9396       };
9397       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9398       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9399       std::string MapnamesName =
9400           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9401       auto *MapNamesArrayGbl =
9402           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9403       Info.MapNamesArray = MapNamesArrayGbl;
9404     }
9405 
9406     // If there's a present map type modifier, it must not be applied to the end
9407     // of a region, so generate a separate map type array in that case.
9408     if (Info.separateBeginEndCalls()) {
9409       bool EndMapTypesDiffer = false;
9410       for (uint64_t &Type : Mapping) {
9411         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9412           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9413           EndMapTypesDiffer = true;
9414         }
9415       }
9416       if (EndMapTypesDiffer) {
9417         MapTypesArrayGbl =
9418             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9419         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9420       }
9421     }
9422 
9423     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9424       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9425       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9426           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9427           Info.BasePointersArray, 0, I);
9428       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9429           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9430       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9431       CGF.Builder.CreateStore(BPVal, BPAddr);
9432 
9433       if (Info.requiresDevicePointerInfo())
9434         if (const ValueDecl *DevVD =
9435                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9436           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9437 
9438       llvm::Value *PVal = CombinedInfo.Pointers[I];
9439       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9440           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9441           Info.PointersArray, 0, I);
9442       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9443           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9444       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9445       CGF.Builder.CreateStore(PVal, PAddr);
9446 
9447       if (hasRuntimeEvaluationCaptureSize) {
9448         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9449             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9450             Info.SizesArray,
9451             /*Idx0=*/0,
9452             /*Idx1=*/I);
9453         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9454         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9455                                                           CGM.Int64Ty,
9456                                                           /*isSigned=*/true),
9457                                 SAddr);
9458       }
9459 
9460       // Fill up the mapper array.
9461       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9462       if (CombinedInfo.Mappers[I]) {
9463         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9464             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9465         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9466         Info.HasMapper = true;
9467       }
9468       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9469       CGF.Builder.CreateStore(MFunc, MAddr);
9470     }
9471   }
9472 
9473   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9474       Info.NumberOfPtrs == 0)
9475     return;
9476 
9477   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9478 }
9479 
9480 namespace {
9481 /// Additional arguments for emitOffloadingArraysArgument function.
9482 struct ArgumentsOptions {
9483   bool ForEndCall = false;
9484   ArgumentsOptions() = default;
9485   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9486 };
9487 } // namespace
9488 
9489 /// Emit the arguments to be passed to the runtime library based on the
9490 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9491 /// ForEndCall, emit map types to be passed for the end of the region instead of
9492 /// the beginning.
9493 static void emitOffloadingArraysArgument(
9494     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9495     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9496     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9497     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9498     const ArgumentsOptions &Options = ArgumentsOptions()) {
9499   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9500          "expected region end call to runtime only when end call is separate");
9501   CodeGenModule &CGM = CGF.CGM;
9502   if (Info.NumberOfPtrs) {
9503     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9504         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9505         Info.BasePointersArray,
9506         /*Idx0=*/0, /*Idx1=*/0);
9507     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9508         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9509         Info.PointersArray,
9510         /*Idx0=*/0,
9511         /*Idx1=*/0);
9512     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9513         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9514         /*Idx0=*/0, /*Idx1=*/0);
9515     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9516         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9517         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9518                                                     : Info.MapTypesArray,
9519         /*Idx0=*/0,
9520         /*Idx1=*/0);
9521 
9522     // Only emit the mapper information arrays if debug information is
9523     // requested.
9524     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9525       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9526     else
9527       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9528           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9529           Info.MapNamesArray,
9530           /*Idx0=*/0,
9531           /*Idx1=*/0);
9532     // If there is no user-defined mapper, set the mapper array to nullptr to
9533     // avoid an unnecessary data privatization
9534     if (!Info.HasMapper)
9535       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9536     else
9537       MappersArrayArg =
9538           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9539   } else {
9540     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9541     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9542     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9543     MapTypesArrayArg =
9544         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9545     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9546     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9547   }
9548 }
9549 
9550 /// Check for inner distribute directive.
9551 static const OMPExecutableDirective *
9552 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9553   const auto *CS = D.getInnermostCapturedStmt();
9554   const auto *Body =
9555       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9556   const Stmt *ChildStmt =
9557       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9558 
9559   if (const auto *NestedDir =
9560           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9561     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9562     switch (D.getDirectiveKind()) {
9563     case OMPD_target:
9564       if (isOpenMPDistributeDirective(DKind))
9565         return NestedDir;
9566       if (DKind == OMPD_teams) {
9567         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9568             /*IgnoreCaptured=*/true);
9569         if (!Body)
9570           return nullptr;
9571         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9572         if (const auto *NND =
9573                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9574           DKind = NND->getDirectiveKind();
9575           if (isOpenMPDistributeDirective(DKind))
9576             return NND;
9577         }
9578       }
9579       return nullptr;
9580     case OMPD_target_teams:
9581       if (isOpenMPDistributeDirective(DKind))
9582         return NestedDir;
9583       return nullptr;
9584     case OMPD_target_parallel:
9585     case OMPD_target_simd:
9586     case OMPD_target_parallel_for:
9587     case OMPD_target_parallel_for_simd:
9588       return nullptr;
9589     case OMPD_target_teams_distribute:
9590     case OMPD_target_teams_distribute_simd:
9591     case OMPD_target_teams_distribute_parallel_for:
9592     case OMPD_target_teams_distribute_parallel_for_simd:
9593     case OMPD_parallel:
9594     case OMPD_for:
9595     case OMPD_parallel_for:
9596     case OMPD_parallel_master:
9597     case OMPD_parallel_sections:
9598     case OMPD_for_simd:
9599     case OMPD_parallel_for_simd:
9600     case OMPD_cancel:
9601     case OMPD_cancellation_point:
9602     case OMPD_ordered:
9603     case OMPD_threadprivate:
9604     case OMPD_allocate:
9605     case OMPD_task:
9606     case OMPD_simd:
9607     case OMPD_tile:
9608     case OMPD_unroll:
9609     case OMPD_sections:
9610     case OMPD_section:
9611     case OMPD_single:
9612     case OMPD_master:
9613     case OMPD_critical:
9614     case OMPD_taskyield:
9615     case OMPD_barrier:
9616     case OMPD_taskwait:
9617     case OMPD_taskgroup:
9618     case OMPD_atomic:
9619     case OMPD_flush:
9620     case OMPD_depobj:
9621     case OMPD_scan:
9622     case OMPD_teams:
9623     case OMPD_target_data:
9624     case OMPD_target_exit_data:
9625     case OMPD_target_enter_data:
9626     case OMPD_distribute:
9627     case OMPD_distribute_simd:
9628     case OMPD_distribute_parallel_for:
9629     case OMPD_distribute_parallel_for_simd:
9630     case OMPD_teams_distribute:
9631     case OMPD_teams_distribute_simd:
9632     case OMPD_teams_distribute_parallel_for:
9633     case OMPD_teams_distribute_parallel_for_simd:
9634     case OMPD_target_update:
9635     case OMPD_declare_simd:
9636     case OMPD_declare_variant:
9637     case OMPD_begin_declare_variant:
9638     case OMPD_end_declare_variant:
9639     case OMPD_declare_target:
9640     case OMPD_end_declare_target:
9641     case OMPD_declare_reduction:
9642     case OMPD_declare_mapper:
9643     case OMPD_taskloop:
9644     case OMPD_taskloop_simd:
9645     case OMPD_master_taskloop:
9646     case OMPD_master_taskloop_simd:
9647     case OMPD_parallel_master_taskloop:
9648     case OMPD_parallel_master_taskloop_simd:
9649     case OMPD_requires:
9650     case OMPD_unknown:
9651     default:
9652       llvm_unreachable("Unexpected directive.");
9653     }
9654   }
9655 
9656   return nullptr;
9657 }
9658 
9659 /// Emit the user-defined mapper function. The code generation follows the
9660 /// pattern in the example below.
9661 /// \code
9662 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9663 ///                                           void *base, void *begin,
9664 ///                                           int64_t size, int64_t type,
9665 ///                                           void *name = nullptr) {
9666 ///   // Allocate space for an array section first or add a base/begin for
9667 ///   // pointer dereference.
9668 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9669 ///       !maptype.IsDelete)
9670 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9671 ///                                 size*sizeof(Ty), clearToFromMember(type));
9672 ///   // Map members.
9673 ///   for (unsigned i = 0; i < size; i++) {
9674 ///     // For each component specified by this mapper:
9675 ///     for (auto c : begin[i]->all_components) {
9676 ///       if (c.hasMapper())
9677 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9678 ///                       c.arg_type, c.arg_name);
9679 ///       else
9680 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9681 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9682 ///                                     c.arg_name);
9683 ///     }
9684 ///   }
9685 ///   // Delete the array section.
9686 ///   if (size > 1 && maptype.IsDelete)
9687 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9688 ///                                 size*sizeof(Ty), clearToFromMember(type));
9689 /// }
9690 /// \endcode
9691 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9692                                             CodeGenFunction *CGF) {
9693   if (UDMMap.count(D) > 0)
9694     return;
9695   ASTContext &C = CGM.getContext();
9696   QualType Ty = D->getType();
9697   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9698   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9699   auto *MapperVarDecl =
9700       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9701   SourceLocation Loc = D->getLocation();
9702   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9703 
9704   // Prepare mapper function arguments and attributes.
9705   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9706                               C.VoidPtrTy, ImplicitParamDecl::Other);
9707   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9708                             ImplicitParamDecl::Other);
9709   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9710                              C.VoidPtrTy, ImplicitParamDecl::Other);
9711   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9712                             ImplicitParamDecl::Other);
9713   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9714                             ImplicitParamDecl::Other);
9715   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9716                             ImplicitParamDecl::Other);
9717   FunctionArgList Args;
9718   Args.push_back(&HandleArg);
9719   Args.push_back(&BaseArg);
9720   Args.push_back(&BeginArg);
9721   Args.push_back(&SizeArg);
9722   Args.push_back(&TypeArg);
9723   Args.push_back(&NameArg);
9724   const CGFunctionInfo &FnInfo =
9725       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9726   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9727   SmallString<64> TyStr;
9728   llvm::raw_svector_ostream Out(TyStr);
9729   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9730   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9731   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9732                                     Name, &CGM.getModule());
9733   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9734   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9735   // Start the mapper function code generation.
9736   CodeGenFunction MapperCGF(CGM);
9737   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9738   // Compute the starting and end addresses of array elements.
9739   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9740       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9741       C.getPointerType(Int64Ty), Loc);
9742   // Prepare common arguments for array initiation and deletion.
9743   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9744       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9745       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9746   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9747       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9748       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9749   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9750       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9751       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9752   // Convert the size in bytes into the number of array elements.
9753   Size = MapperCGF.Builder.CreateExactUDiv(
9754       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9755   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9756       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9757   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
9758       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
9759   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9760       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9761       C.getPointerType(Int64Ty), Loc);
9762   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9763       MapperCGF.GetAddrOfLocalVar(&NameArg),
9764       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9765 
9766   // Emit array initiation if this is an array section and \p MapType indicates
9767   // that memory allocation is required.
9768   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9769   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9770                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9771 
9772   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9773 
9774   // Emit the loop header block.
9775   MapperCGF.EmitBlock(HeadBB);
9776   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9777   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9778   // Evaluate whether the initial condition is satisfied.
9779   llvm::Value *IsEmpty =
9780       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9781   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9782   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9783 
9784   // Emit the loop body block.
9785   MapperCGF.EmitBlock(BodyBB);
9786   llvm::BasicBlock *LastBB = BodyBB;
9787   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9788       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9789   PtrPHI->addIncoming(PtrBegin, EntryBB);
9790   Address PtrCurrent =
9791       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9792                           .getAlignment()
9793                           .alignmentOfArrayElement(ElementSize));
9794   // Privatize the declared variable of mapper to be the current array element.
9795   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9796   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9797   (void)Scope.Privatize();
9798 
9799   // Get map clause information. Fill up the arrays with all mapped variables.
9800   MappableExprsHandler::MapCombinedInfoTy Info;
9801   MappableExprsHandler MEHandler(*D, MapperCGF);
9802   MEHandler.generateAllInfoForMapper(Info);
9803 
9804   // Call the runtime API __tgt_mapper_num_components to get the number of
9805   // pre-existing components.
9806   llvm::Value *OffloadingArgs[] = {Handle};
9807   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9808       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9809                                             OMPRTL___tgt_mapper_num_components),
9810       OffloadingArgs);
9811   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9812       PreviousSize,
9813       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9814 
9815   // Fill up the runtime mapper handle for all components.
9816   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9817     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9818         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9819     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9820         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9821     llvm::Value *CurSizeArg = Info.Sizes[I];
9822     llvm::Value *CurNameArg =
9823         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9824             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9825             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9826 
9827     // Extract the MEMBER_OF field from the map type.
9828     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9829     llvm::Value *MemberMapType =
9830         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9831 
9832     // Combine the map type inherited from user-defined mapper with that
9833     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9834     // bits of the \a MapType, which is the input argument of the mapper
9835     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9836     // bits of MemberMapType.
9837     // [OpenMP 5.0], 1.2.6. map-type decay.
9838     //        | alloc |  to   | from  | tofrom | release | delete
9839     // ----------------------------------------------------------
9840     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9841     // to     | alloc |  to   | alloc |   to   | release | delete
9842     // from   | alloc | alloc | from  |  from  | release | delete
9843     // tofrom | alloc |  to   | from  | tofrom | release | delete
9844     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9845         MapType,
9846         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9847                                    MappableExprsHandler::OMP_MAP_FROM));
9848     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9849     llvm::BasicBlock *AllocElseBB =
9850         MapperCGF.createBasicBlock("omp.type.alloc.else");
9851     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9852     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9853     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9854     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9855     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9856     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9857     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9858     MapperCGF.EmitBlock(AllocBB);
9859     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9860         MemberMapType,
9861         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9862                                      MappableExprsHandler::OMP_MAP_FROM)));
9863     MapperCGF.Builder.CreateBr(EndBB);
9864     MapperCGF.EmitBlock(AllocElseBB);
9865     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9866         LeftToFrom,
9867         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9868     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9869     // In case of to, clear OMP_MAP_FROM.
9870     MapperCGF.EmitBlock(ToBB);
9871     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9872         MemberMapType,
9873         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9874     MapperCGF.Builder.CreateBr(EndBB);
9875     MapperCGF.EmitBlock(ToElseBB);
9876     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9877         LeftToFrom,
9878         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9879     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9880     // In case of from, clear OMP_MAP_TO.
9881     MapperCGF.EmitBlock(FromBB);
9882     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9883         MemberMapType,
9884         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9885     // In case of tofrom, do nothing.
9886     MapperCGF.EmitBlock(EndBB);
9887     LastBB = EndBB;
9888     llvm::PHINode *CurMapType =
9889         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9890     CurMapType->addIncoming(AllocMapType, AllocBB);
9891     CurMapType->addIncoming(ToMapType, ToBB);
9892     CurMapType->addIncoming(FromMapType, FromBB);
9893     CurMapType->addIncoming(MemberMapType, ToElseBB);
9894 
9895     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9896                                      CurSizeArg, CurMapType, CurNameArg};
9897     if (Info.Mappers[I]) {
9898       // Call the corresponding mapper function.
9899       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9900           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9901       assert(MapperFunc && "Expect a valid mapper function is available.");
9902       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9903     } else {
9904       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9905       // data structure.
9906       MapperCGF.EmitRuntimeCall(
9907           OMPBuilder.getOrCreateRuntimeFunction(
9908               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9909           OffloadingArgs);
9910     }
9911   }
9912 
9913   // Update the pointer to point to the next element that needs to be mapped,
9914   // and check whether we have mapped all elements.
9915   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
9916   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9917       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9918   PtrPHI->addIncoming(PtrNext, LastBB);
9919   llvm::Value *IsDone =
9920       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9921   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9922   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9923 
9924   MapperCGF.EmitBlock(ExitBB);
9925   // Emit array deletion if this is an array section and \p MapType indicates
9926   // that deletion is required.
9927   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9928                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9929 
9930   // Emit the function exit block.
9931   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9932   MapperCGF.FinishFunction();
9933   UDMMap.try_emplace(D, Fn);
9934   if (CGF) {
9935     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9936     Decls.second.push_back(D);
9937   }
9938 }
9939 
9940 /// Emit the array initialization or deletion portion for user-defined mapper
9941 /// code generation. First, it evaluates whether an array section is mapped and
9942 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9943 /// true, and \a MapType indicates to not delete this array, array
9944 /// initialization code is generated. If \a IsInit is false, and \a MapType
9945 /// indicates to not this array, array deletion code is generated.
9946 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9947     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9948     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9949     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9950     bool IsInit) {
9951   StringRef Prefix = IsInit ? ".init" : ".del";
9952 
9953   // Evaluate if this is an array section.
9954   llvm::BasicBlock *BodyBB =
9955       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9956   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9957       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9958   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9959       MapType,
9960       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9961   llvm::Value *DeleteCond;
9962   llvm::Value *Cond;
9963   if (IsInit) {
9964     // base != begin?
9965     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9966         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9967     // IsPtrAndObj?
9968     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9969         MapType,
9970         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9971     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9972     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9973     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9974     DeleteCond = MapperCGF.Builder.CreateIsNull(
9975         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9976   } else {
9977     Cond = IsArray;
9978     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9979         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9980   }
9981   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9982   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9983 
9984   MapperCGF.EmitBlock(BodyBB);
9985   // Get the array size by multiplying element size and element number (i.e., \p
9986   // Size).
9987   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9988       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9989   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9990   // memory allocation/deletion purpose only.
9991   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9992       MapType,
9993       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9994                                    MappableExprsHandler::OMP_MAP_FROM)));
9995   MapTypeArg = MapperCGF.Builder.CreateOr(
9996       MapTypeArg,
9997       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
9998 
9999   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10000   // data structure.
10001   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10002                                    ArraySize, MapTypeArg, MapName};
10003   MapperCGF.EmitRuntimeCall(
10004       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10005                                             OMPRTL___tgt_push_mapper_component),
10006       OffloadingArgs);
10007 }
10008 
10009 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10010     const OMPDeclareMapperDecl *D) {
10011   auto I = UDMMap.find(D);
10012   if (I != UDMMap.end())
10013     return I->second;
10014   emitUserDefinedMapper(D);
10015   return UDMMap.lookup(D);
10016 }
10017 
10018 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10019     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10020     llvm::Value *DeviceID,
10021     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10022                                      const OMPLoopDirective &D)>
10023         SizeEmitter) {
10024   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10025   const OMPExecutableDirective *TD = &D;
10026   // Get nested teams distribute kind directive, if any.
10027   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10028     TD = getNestedDistributeDirective(CGM.getContext(), D);
10029   if (!TD)
10030     return;
10031   const auto *LD = cast<OMPLoopDirective>(TD);
10032   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10033                                                          PrePostActionTy &) {
10034     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10035       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10036       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10037       CGF.EmitRuntimeCall(
10038           OMPBuilder.getOrCreateRuntimeFunction(
10039               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10040           Args);
10041     }
10042   };
10043   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10044 }
10045 
10046 void CGOpenMPRuntime::emitTargetCall(
10047     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10048     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10049     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10050     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10051                                      const OMPLoopDirective &D)>
10052         SizeEmitter) {
10053   if (!CGF.HaveInsertPoint())
10054     return;
10055 
10056   assert(OutlinedFn && "Invalid outlined function!");
10057 
10058   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10059                                  D.hasClausesOfKind<OMPNowaitClause>();
10060   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10061   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10062   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10063                                             PrePostActionTy &) {
10064     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10065   };
10066   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10067 
10068   CodeGenFunction::OMPTargetDataInfo InputInfo;
10069   llvm::Value *MapTypesArray = nullptr;
10070   llvm::Value *MapNamesArray = nullptr;
10071   // Fill up the pointer arrays and transfer execution to the device.
10072   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10073                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10074                     &CapturedVars,
10075                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10076     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10077       // Reverse offloading is not supported, so just execute on the host.
10078       if (RequiresOuterTask) {
10079         CapturedVars.clear();
10080         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10081       }
10082       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10083       return;
10084     }
10085 
10086     // On top of the arrays that were filled up, the target offloading call
10087     // takes as arguments the device id as well as the host pointer. The host
10088     // pointer is used by the runtime library to identify the current target
10089     // region, so it only has to be unique and not necessarily point to
10090     // anything. It could be the pointer to the outlined function that
10091     // implements the target region, but we aren't using that so that the
10092     // compiler doesn't need to keep that, and could therefore inline the host
10093     // function if proven worthwhile during optimization.
10094 
10095     // From this point on, we need to have an ID of the target region defined.
10096     assert(OutlinedFnID && "Invalid outlined function ID!");
10097 
10098     // Emit device ID if any.
10099     llvm::Value *DeviceID;
10100     if (Device.getPointer()) {
10101       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10102               Device.getInt() == OMPC_DEVICE_device_num) &&
10103              "Expected device_num modifier.");
10104       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10105       DeviceID =
10106           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10107     } else {
10108       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10109     }
10110 
10111     // Emit the number of elements in the offloading arrays.
10112     llvm::Value *PointerNum =
10113         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10114 
10115     // Return value of the runtime offloading call.
10116     llvm::Value *Return;
10117 
10118     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10119     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10120 
10121     // Source location for the ident struct
10122     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10123 
10124     // Emit tripcount for the target loop-based directive.
10125     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10126 
10127     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10128     // The target region is an outlined function launched by the runtime
10129     // via calls __tgt_target() or __tgt_target_teams().
10130     //
10131     // __tgt_target() launches a target region with one team and one thread,
10132     // executing a serial region.  This master thread may in turn launch
10133     // more threads within its team upon encountering a parallel region,
10134     // however, no additional teams can be launched on the device.
10135     //
10136     // __tgt_target_teams() launches a target region with one or more teams,
10137     // each with one or more threads.  This call is required for target
10138     // constructs such as:
10139     //  'target teams'
10140     //  'target' / 'teams'
10141     //  'target teams distribute parallel for'
10142     //  'target parallel'
10143     // and so on.
10144     //
10145     // Note that on the host and CPU targets, the runtime implementation of
10146     // these calls simply call the outlined function without forking threads.
10147     // The outlined functions themselves have runtime calls to
10148     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10149     // the compiler in emitTeamsCall() and emitParallelCall().
10150     //
10151     // In contrast, on the NVPTX target, the implementation of
10152     // __tgt_target_teams() launches a GPU kernel with the requested number
10153     // of teams and threads so no additional calls to the runtime are required.
10154     if (NumTeams) {
10155       // If we have NumTeams defined this means that we have an enclosed teams
10156       // region. Therefore we also expect to have NumThreads defined. These two
10157       // values should be defined in the presence of a teams directive,
10158       // regardless of having any clauses associated. If the user is using teams
10159       // but no clauses, these two values will be the default that should be
10160       // passed to the runtime library - a 32-bit integer with the value zero.
10161       assert(NumThreads && "Thread limit expression should be available along "
10162                            "with number of teams.");
10163       SmallVector<llvm::Value *> OffloadingArgs = {
10164           RTLoc,
10165           DeviceID,
10166           OutlinedFnID,
10167           PointerNum,
10168           InputInfo.BasePointersArray.getPointer(),
10169           InputInfo.PointersArray.getPointer(),
10170           InputInfo.SizesArray.getPointer(),
10171           MapTypesArray,
10172           MapNamesArray,
10173           InputInfo.MappersArray.getPointer(),
10174           NumTeams,
10175           NumThreads};
10176       if (HasNowait) {
10177         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10178         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10179         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10180         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10181         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10182         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10183       }
10184       Return = CGF.EmitRuntimeCall(
10185           OMPBuilder.getOrCreateRuntimeFunction(
10186               CGM.getModule(), HasNowait
10187                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10188                                    : OMPRTL___tgt_target_teams_mapper),
10189           OffloadingArgs);
10190     } else {
10191       SmallVector<llvm::Value *> OffloadingArgs = {
10192           RTLoc,
10193           DeviceID,
10194           OutlinedFnID,
10195           PointerNum,
10196           InputInfo.BasePointersArray.getPointer(),
10197           InputInfo.PointersArray.getPointer(),
10198           InputInfo.SizesArray.getPointer(),
10199           MapTypesArray,
10200           MapNamesArray,
10201           InputInfo.MappersArray.getPointer()};
10202       if (HasNowait) {
10203         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10204         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10205         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10206         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10207         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10208         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10209       }
10210       Return = CGF.EmitRuntimeCall(
10211           OMPBuilder.getOrCreateRuntimeFunction(
10212               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10213                                          : OMPRTL___tgt_target_mapper),
10214           OffloadingArgs);
10215     }
10216 
10217     // Check the error code and execute the host version if required.
10218     llvm::BasicBlock *OffloadFailedBlock =
10219         CGF.createBasicBlock("omp_offload.failed");
10220     llvm::BasicBlock *OffloadContBlock =
10221         CGF.createBasicBlock("omp_offload.cont");
10222     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10223     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10224 
10225     CGF.EmitBlock(OffloadFailedBlock);
10226     if (RequiresOuterTask) {
10227       CapturedVars.clear();
10228       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10229     }
10230     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10231     CGF.EmitBranch(OffloadContBlock);
10232 
10233     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10234   };
10235 
10236   // Notify that the host version must be executed.
10237   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10238                     RequiresOuterTask](CodeGenFunction &CGF,
10239                                        PrePostActionTy &) {
10240     if (RequiresOuterTask) {
10241       CapturedVars.clear();
10242       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10243     }
10244     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10245   };
10246 
10247   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10248                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10249                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10250     // Fill up the arrays with all the captured variables.
10251     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10252 
10253     // Get mappable expression information.
10254     MappableExprsHandler MEHandler(D, CGF);
10255     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10256     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10257 
10258     auto RI = CS.getCapturedRecordDecl()->field_begin();
10259     auto *CV = CapturedVars.begin();
10260     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10261                                               CE = CS.capture_end();
10262          CI != CE; ++CI, ++RI, ++CV) {
10263       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10264       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10265 
10266       // VLA sizes are passed to the outlined region by copy and do not have map
10267       // information associated.
10268       if (CI->capturesVariableArrayType()) {
10269         CurInfo.Exprs.push_back(nullptr);
10270         CurInfo.BasePointers.push_back(*CV);
10271         CurInfo.Pointers.push_back(*CV);
10272         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10273             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10274         // Copy to the device as an argument. No need to retrieve it.
10275         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10276                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10277                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10278         CurInfo.Mappers.push_back(nullptr);
10279       } else {
10280         // If we have any information in the map clause, we use it, otherwise we
10281         // just do a default mapping.
10282         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10283         if (!CI->capturesThis())
10284           MappedVarSet.insert(CI->getCapturedVar());
10285         else
10286           MappedVarSet.insert(nullptr);
10287         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10288           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10289         // Generate correct mapping for variables captured by reference in
10290         // lambdas.
10291         if (CI->capturesVariable())
10292           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10293                                                   CurInfo, LambdaPointers);
10294       }
10295       // We expect to have at least an element of information for this capture.
10296       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10297              "Non-existing map pointer for capture!");
10298       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10299              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10300              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10301              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10302              "Inconsistent map information sizes!");
10303 
10304       // If there is an entry in PartialStruct it means we have a struct with
10305       // individual members mapped. Emit an extra combined entry.
10306       if (PartialStruct.Base.isValid()) {
10307         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10308         MEHandler.emitCombinedEntry(
10309             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10310             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10311       }
10312 
10313       // We need to append the results of this capture to what we already have.
10314       CombinedInfo.append(CurInfo);
10315     }
10316     // Adjust MEMBER_OF flags for the lambdas captures.
10317     MEHandler.adjustMemberOfForLambdaCaptures(
10318         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10319         CombinedInfo.Types);
10320     // Map any list items in a map clause that were not captures because they
10321     // weren't referenced within the construct.
10322     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10323 
10324     TargetDataInfo Info;
10325     // Fill up the arrays and create the arguments.
10326     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10327     emitOffloadingArraysArgument(
10328         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10329         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10330         {/*ForEndTask=*/false});
10331 
10332     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10333     InputInfo.BasePointersArray =
10334         Address(Info.BasePointersArray, CGM.getPointerAlign());
10335     InputInfo.PointersArray =
10336         Address(Info.PointersArray, CGM.getPointerAlign());
10337     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10338     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10339     MapTypesArray = Info.MapTypesArray;
10340     MapNamesArray = Info.MapNamesArray;
10341     if (RequiresOuterTask)
10342       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10343     else
10344       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10345   };
10346 
10347   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10348                              CodeGenFunction &CGF, PrePostActionTy &) {
10349     if (RequiresOuterTask) {
10350       CodeGenFunction::OMPTargetDataInfo InputInfo;
10351       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10352     } else {
10353       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10354     }
10355   };
10356 
10357   // If we have a target function ID it means that we need to support
10358   // offloading, otherwise, just execute on the host. We need to execute on host
10359   // regardless of the conditional in the if clause if, e.g., the user do not
10360   // specify target triples.
10361   if (OutlinedFnID) {
10362     if (IfCond) {
10363       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10364     } else {
10365       RegionCodeGenTy ThenRCG(TargetThenGen);
10366       ThenRCG(CGF);
10367     }
10368   } else {
10369     RegionCodeGenTy ElseRCG(TargetElseGen);
10370     ElseRCG(CGF);
10371   }
10372 }
10373 
10374 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10375                                                     StringRef ParentName) {
10376   if (!S)
10377     return;
10378 
10379   // Codegen OMP target directives that offload compute to the device.
10380   bool RequiresDeviceCodegen =
10381       isa<OMPExecutableDirective>(S) &&
10382       isOpenMPTargetExecutionDirective(
10383           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10384 
10385   if (RequiresDeviceCodegen) {
10386     const auto &E = *cast<OMPExecutableDirective>(S);
10387     unsigned DeviceID;
10388     unsigned FileID;
10389     unsigned Line;
10390     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10391                              FileID, Line);
10392 
10393     // Is this a target region that should not be emitted as an entry point? If
10394     // so just signal we are done with this target region.
10395     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10396                                                             ParentName, Line))
10397       return;
10398 
10399     switch (E.getDirectiveKind()) {
10400     case OMPD_target:
10401       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10402                                                    cast<OMPTargetDirective>(E));
10403       break;
10404     case OMPD_target_parallel:
10405       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10406           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10407       break;
10408     case OMPD_target_teams:
10409       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10410           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10411       break;
10412     case OMPD_target_teams_distribute:
10413       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10414           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10415       break;
10416     case OMPD_target_teams_distribute_simd:
10417       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10418           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10419       break;
10420     case OMPD_target_parallel_for:
10421       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10422           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10423       break;
10424     case OMPD_target_parallel_for_simd:
10425       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10426           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10427       break;
10428     case OMPD_target_simd:
10429       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10430           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10431       break;
10432     case OMPD_target_teams_distribute_parallel_for:
10433       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10434           CGM, ParentName,
10435           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10436       break;
10437     case OMPD_target_teams_distribute_parallel_for_simd:
10438       CodeGenFunction::
10439           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10440               CGM, ParentName,
10441               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10442       break;
10443     case OMPD_parallel:
10444     case OMPD_for:
10445     case OMPD_parallel_for:
10446     case OMPD_parallel_master:
10447     case OMPD_parallel_sections:
10448     case OMPD_for_simd:
10449     case OMPD_parallel_for_simd:
10450     case OMPD_cancel:
10451     case OMPD_cancellation_point:
10452     case OMPD_ordered:
10453     case OMPD_threadprivate:
10454     case OMPD_allocate:
10455     case OMPD_task:
10456     case OMPD_simd:
10457     case OMPD_tile:
10458     case OMPD_unroll:
10459     case OMPD_sections:
10460     case OMPD_section:
10461     case OMPD_single:
10462     case OMPD_master:
10463     case OMPD_critical:
10464     case OMPD_taskyield:
10465     case OMPD_barrier:
10466     case OMPD_taskwait:
10467     case OMPD_taskgroup:
10468     case OMPD_atomic:
10469     case OMPD_flush:
10470     case OMPD_depobj:
10471     case OMPD_scan:
10472     case OMPD_teams:
10473     case OMPD_target_data:
10474     case OMPD_target_exit_data:
10475     case OMPD_target_enter_data:
10476     case OMPD_distribute:
10477     case OMPD_distribute_simd:
10478     case OMPD_distribute_parallel_for:
10479     case OMPD_distribute_parallel_for_simd:
10480     case OMPD_teams_distribute:
10481     case OMPD_teams_distribute_simd:
10482     case OMPD_teams_distribute_parallel_for:
10483     case OMPD_teams_distribute_parallel_for_simd:
10484     case OMPD_target_update:
10485     case OMPD_declare_simd:
10486     case OMPD_declare_variant:
10487     case OMPD_begin_declare_variant:
10488     case OMPD_end_declare_variant:
10489     case OMPD_declare_target:
10490     case OMPD_end_declare_target:
10491     case OMPD_declare_reduction:
10492     case OMPD_declare_mapper:
10493     case OMPD_taskloop:
10494     case OMPD_taskloop_simd:
10495     case OMPD_master_taskloop:
10496     case OMPD_master_taskloop_simd:
10497     case OMPD_parallel_master_taskloop:
10498     case OMPD_parallel_master_taskloop_simd:
10499     case OMPD_requires:
10500     case OMPD_unknown:
10501     default:
10502       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10503     }
10504     return;
10505   }
10506 
10507   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10508     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10509       return;
10510 
10511     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10512     return;
10513   }
10514 
10515   // If this is a lambda function, look into its body.
10516   if (const auto *L = dyn_cast<LambdaExpr>(S))
10517     S = L->getBody();
10518 
10519   // Keep looking for target regions recursively.
10520   for (const Stmt *II : S->children())
10521     scanForTargetRegionsFunctions(II, ParentName);
10522 }
10523 
10524 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10525   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10526       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10527   if (!DevTy)
10528     return false;
10529   // Do not emit device_type(nohost) functions for the host.
10530   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10531     return true;
10532   // Do not emit device_type(host) functions for the device.
10533   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10534     return true;
10535   return false;
10536 }
10537 
10538 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10539   // If emitting code for the host, we do not process FD here. Instead we do
10540   // the normal code generation.
10541   if (!CGM.getLangOpts().OpenMPIsDevice) {
10542     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10543       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10544                                   CGM.getLangOpts().OpenMPIsDevice))
10545         return true;
10546     return false;
10547   }
10548 
10549   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10550   // Try to detect target regions in the function.
10551   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10552     StringRef Name = CGM.getMangledName(GD);
10553     scanForTargetRegionsFunctions(FD->getBody(), Name);
10554     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10555                                 CGM.getLangOpts().OpenMPIsDevice))
10556       return true;
10557   }
10558 
10559   // Do not to emit function if it is not marked as declare target.
10560   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10561          AlreadyEmittedTargetDecls.count(VD) == 0;
10562 }
10563 
10564 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10565   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10566                               CGM.getLangOpts().OpenMPIsDevice))
10567     return true;
10568 
10569   if (!CGM.getLangOpts().OpenMPIsDevice)
10570     return false;
10571 
10572   // Check if there are Ctors/Dtors in this declaration and look for target
10573   // regions in it. We use the complete variant to produce the kernel name
10574   // mangling.
10575   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10576   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10577     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10578       StringRef ParentName =
10579           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10580       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10581     }
10582     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10583       StringRef ParentName =
10584           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10585       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10586     }
10587   }
10588 
10589   // Do not to emit variable if it is not marked as declare target.
10590   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10591       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10592           cast<VarDecl>(GD.getDecl()));
10593   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10594       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10595        HasRequiresUnifiedSharedMemory)) {
10596     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10597     return true;
10598   }
10599   return false;
10600 }
10601 
10602 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10603                                                    llvm::Constant *Addr) {
10604   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10605       !CGM.getLangOpts().OpenMPIsDevice)
10606     return;
10607 
10608   // If we have host/nohost variables, they do not need to be registered.
10609   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10610       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10611   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10612     return;
10613 
10614   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10615       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10616   if (!Res) {
10617     if (CGM.getLangOpts().OpenMPIsDevice) {
10618       // Register non-target variables being emitted in device code (debug info
10619       // may cause this).
10620       StringRef VarName = CGM.getMangledName(VD);
10621       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10622     }
10623     return;
10624   }
10625   // Register declare target variables.
10626   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10627   StringRef VarName;
10628   CharUnits VarSize;
10629   llvm::GlobalValue::LinkageTypes Linkage;
10630 
10631   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10632       !HasRequiresUnifiedSharedMemory) {
10633     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10634     VarName = CGM.getMangledName(VD);
10635     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10636       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10637       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10638     } else {
10639       VarSize = CharUnits::Zero();
10640     }
10641     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10642     // Temp solution to prevent optimizations of the internal variables.
10643     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10644       // Do not create a "ref-variable" if the original is not also available
10645       // on the host.
10646       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10647         return;
10648       std::string RefName = getName({VarName, "ref"});
10649       if (!CGM.GetGlobalValue(RefName)) {
10650         llvm::Constant *AddrRef =
10651             getOrCreateInternalVariable(Addr->getType(), RefName);
10652         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10653         GVAddrRef->setConstant(/*Val=*/true);
10654         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10655         GVAddrRef->setInitializer(Addr);
10656         CGM.addCompilerUsedGlobal(GVAddrRef);
10657       }
10658     }
10659   } else {
10660     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10661             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10662              HasRequiresUnifiedSharedMemory)) &&
10663            "Declare target attribute must link or to with unified memory.");
10664     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10665       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10666     else
10667       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10668 
10669     if (CGM.getLangOpts().OpenMPIsDevice) {
10670       VarName = Addr->getName();
10671       Addr = nullptr;
10672     } else {
10673       VarName = getAddrOfDeclareTargetVar(VD).getName();
10674       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10675     }
10676     VarSize = CGM.getPointerSize();
10677     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10678   }
10679 
10680   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10681       VarName, Addr, VarSize, Flags, Linkage);
10682 }
10683 
10684 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10685   if (isa<FunctionDecl>(GD.getDecl()) ||
10686       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10687     return emitTargetFunctions(GD);
10688 
10689   return emitTargetGlobalVariable(GD);
10690 }
10691 
10692 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10693   for (const VarDecl *VD : DeferredGlobalVariables) {
10694     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10695         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10696     if (!Res)
10697       continue;
10698     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10699         !HasRequiresUnifiedSharedMemory) {
10700       CGM.EmitGlobal(VD);
10701     } else {
10702       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10703               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10704                HasRequiresUnifiedSharedMemory)) &&
10705              "Expected link clause or to clause with unified memory.");
10706       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10707     }
10708   }
10709 }
10710 
10711 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10712     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10713   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10714          " Expected target-based directive.");
10715 }
10716 
10717 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10718   for (const OMPClause *Clause : D->clauselists()) {
10719     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10720       HasRequiresUnifiedSharedMemory = true;
10721     } else if (const auto *AC =
10722                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10723       switch (AC->getAtomicDefaultMemOrderKind()) {
10724       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10725         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10726         break;
10727       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10728         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10729         break;
10730       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10731         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10732         break;
10733       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10734         break;
10735       }
10736     }
10737   }
10738 }
10739 
10740 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10741   return RequiresAtomicOrdering;
10742 }
10743 
10744 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10745                                                        LangAS &AS) {
10746   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10747     return false;
10748   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10749   switch(A->getAllocatorType()) {
10750   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10751   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10752   // Not supported, fallback to the default mem space.
10753   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10754   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10755   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10756   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10757   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10758   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10759   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10760     AS = LangAS::Default;
10761     return true;
10762   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10763     llvm_unreachable("Expected predefined allocator for the variables with the "
10764                      "static storage.");
10765   }
10766   return false;
10767 }
10768 
10769 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10770   return HasRequiresUnifiedSharedMemory;
10771 }
10772 
10773 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10774     CodeGenModule &CGM)
10775     : CGM(CGM) {
10776   if (CGM.getLangOpts().OpenMPIsDevice) {
10777     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10778     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10779   }
10780 }
10781 
10782 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10783   if (CGM.getLangOpts().OpenMPIsDevice)
10784     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10785 }
10786 
10787 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10788   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10789     return true;
10790 
10791   const auto *D = cast<FunctionDecl>(GD.getDecl());
10792   // Do not to emit function if it is marked as declare target as it was already
10793   // emitted.
10794   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10795     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10796       if (auto *F = dyn_cast_or_null<llvm::Function>(
10797               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10798         return !F->isDeclaration();
10799       return false;
10800     }
10801     return true;
10802   }
10803 
10804   return !AlreadyEmittedTargetDecls.insert(D).second;
10805 }
10806 
10807 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10808   // If we don't have entries or if we are emitting code for the device, we
10809   // don't need to do anything.
10810   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10811       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10812       (OffloadEntriesInfoManager.empty() &&
10813        !HasEmittedDeclareTargetRegion &&
10814        !HasEmittedTargetRegion))
10815     return nullptr;
10816 
10817   // Create and register the function that handles the requires directives.
10818   ASTContext &C = CGM.getContext();
10819 
10820   llvm::Function *RequiresRegFn;
10821   {
10822     CodeGenFunction CGF(CGM);
10823     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10824     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10825     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10826     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10827     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10828     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10829     // TODO: check for other requires clauses.
10830     // The requires directive takes effect only when a target region is
10831     // present in the compilation unit. Otherwise it is ignored and not
10832     // passed to the runtime. This avoids the runtime from throwing an error
10833     // for mismatching requires clauses across compilation units that don't
10834     // contain at least 1 target region.
10835     assert((HasEmittedTargetRegion ||
10836             HasEmittedDeclareTargetRegion ||
10837             !OffloadEntriesInfoManager.empty()) &&
10838            "Target or declare target region expected.");
10839     if (HasRequiresUnifiedSharedMemory)
10840       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10841     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10842                             CGM.getModule(), OMPRTL___tgt_register_requires),
10843                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10844     CGF.FinishFunction();
10845   }
10846   return RequiresRegFn;
10847 }
10848 
10849 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10850                                     const OMPExecutableDirective &D,
10851                                     SourceLocation Loc,
10852                                     llvm::Function *OutlinedFn,
10853                                     ArrayRef<llvm::Value *> CapturedVars) {
10854   if (!CGF.HaveInsertPoint())
10855     return;
10856 
10857   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10858   CodeGenFunction::RunCleanupsScope Scope(CGF);
10859 
10860   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10861   llvm::Value *Args[] = {
10862       RTLoc,
10863       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10864       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10865   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10866   RealArgs.append(std::begin(Args), std::end(Args));
10867   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10868 
10869   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10870       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10871   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10872 }
10873 
10874 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10875                                          const Expr *NumTeams,
10876                                          const Expr *ThreadLimit,
10877                                          SourceLocation Loc) {
10878   if (!CGF.HaveInsertPoint())
10879     return;
10880 
10881   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10882 
10883   llvm::Value *NumTeamsVal =
10884       NumTeams
10885           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10886                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10887           : CGF.Builder.getInt32(0);
10888 
10889   llvm::Value *ThreadLimitVal =
10890       ThreadLimit
10891           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10892                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10893           : CGF.Builder.getInt32(0);
10894 
10895   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10896   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10897                                      ThreadLimitVal};
10898   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10899                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10900                       PushNumTeamsArgs);
10901 }
10902 
10903 void CGOpenMPRuntime::emitTargetDataCalls(
10904     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10905     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10906   if (!CGF.HaveInsertPoint())
10907     return;
10908 
10909   // Action used to replace the default codegen action and turn privatization
10910   // off.
10911   PrePostActionTy NoPrivAction;
10912 
10913   // Generate the code for the opening of the data environment. Capture all the
10914   // arguments of the runtime call by reference because they are used in the
10915   // closing of the region.
10916   auto &&BeginThenGen = [this, &D, Device, &Info,
10917                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10918     // Fill up the arrays with all the mapped variables.
10919     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10920 
10921     // Get map clause information.
10922     MappableExprsHandler MEHandler(D, CGF);
10923     MEHandler.generateAllInfo(CombinedInfo);
10924 
10925     // Fill up the arrays and create the arguments.
10926     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10927                          /*IsNonContiguous=*/true);
10928 
10929     llvm::Value *BasePointersArrayArg = nullptr;
10930     llvm::Value *PointersArrayArg = nullptr;
10931     llvm::Value *SizesArrayArg = nullptr;
10932     llvm::Value *MapTypesArrayArg = nullptr;
10933     llvm::Value *MapNamesArrayArg = nullptr;
10934     llvm::Value *MappersArrayArg = nullptr;
10935     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10936                                  SizesArrayArg, MapTypesArrayArg,
10937                                  MapNamesArrayArg, MappersArrayArg, Info);
10938 
10939     // Emit device ID if any.
10940     llvm::Value *DeviceID = nullptr;
10941     if (Device) {
10942       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10943                                            CGF.Int64Ty, /*isSigned=*/true);
10944     } else {
10945       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10946     }
10947 
10948     // Emit the number of elements in the offloading arrays.
10949     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10950     //
10951     // Source location for the ident struct
10952     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10953 
10954     llvm::Value *OffloadingArgs[] = {RTLoc,
10955                                      DeviceID,
10956                                      PointerNum,
10957                                      BasePointersArrayArg,
10958                                      PointersArrayArg,
10959                                      SizesArrayArg,
10960                                      MapTypesArrayArg,
10961                                      MapNamesArrayArg,
10962                                      MappersArrayArg};
10963     CGF.EmitRuntimeCall(
10964         OMPBuilder.getOrCreateRuntimeFunction(
10965             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10966         OffloadingArgs);
10967 
10968     // If device pointer privatization is required, emit the body of the region
10969     // here. It will have to be duplicated: with and without privatization.
10970     if (!Info.CaptureDeviceAddrMap.empty())
10971       CodeGen(CGF);
10972   };
10973 
10974   // Generate code for the closing of the data region.
10975   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10976                                                 PrePostActionTy &) {
10977     assert(Info.isValid() && "Invalid data environment closing arguments.");
10978 
10979     llvm::Value *BasePointersArrayArg = nullptr;
10980     llvm::Value *PointersArrayArg = nullptr;
10981     llvm::Value *SizesArrayArg = nullptr;
10982     llvm::Value *MapTypesArrayArg = nullptr;
10983     llvm::Value *MapNamesArrayArg = nullptr;
10984     llvm::Value *MappersArrayArg = nullptr;
10985     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10986                                  SizesArrayArg, MapTypesArrayArg,
10987                                  MapNamesArrayArg, MappersArrayArg, Info,
10988                                  {/*ForEndCall=*/true});
10989 
10990     // Emit device ID if any.
10991     llvm::Value *DeviceID = nullptr;
10992     if (Device) {
10993       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10994                                            CGF.Int64Ty, /*isSigned=*/true);
10995     } else {
10996       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10997     }
10998 
10999     // Emit the number of elements in the offloading arrays.
11000     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11001 
11002     // Source location for the ident struct
11003     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11004 
11005     llvm::Value *OffloadingArgs[] = {RTLoc,
11006                                      DeviceID,
11007                                      PointerNum,
11008                                      BasePointersArrayArg,
11009                                      PointersArrayArg,
11010                                      SizesArrayArg,
11011                                      MapTypesArrayArg,
11012                                      MapNamesArrayArg,
11013                                      MappersArrayArg};
11014     CGF.EmitRuntimeCall(
11015         OMPBuilder.getOrCreateRuntimeFunction(
11016             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11017         OffloadingArgs);
11018   };
11019 
11020   // If we need device pointer privatization, we need to emit the body of the
11021   // region with no privatization in the 'else' branch of the conditional.
11022   // Otherwise, we don't have to do anything.
11023   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11024                                                          PrePostActionTy &) {
11025     if (!Info.CaptureDeviceAddrMap.empty()) {
11026       CodeGen.setAction(NoPrivAction);
11027       CodeGen(CGF);
11028     }
11029   };
11030 
11031   // We don't have to do anything to close the region if the if clause evaluates
11032   // to false.
11033   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11034 
11035   if (IfCond) {
11036     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11037   } else {
11038     RegionCodeGenTy RCG(BeginThenGen);
11039     RCG(CGF);
11040   }
11041 
11042   // If we don't require privatization of device pointers, we emit the body in
11043   // between the runtime calls. This avoids duplicating the body code.
11044   if (Info.CaptureDeviceAddrMap.empty()) {
11045     CodeGen.setAction(NoPrivAction);
11046     CodeGen(CGF);
11047   }
11048 
11049   if (IfCond) {
11050     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11051   } else {
11052     RegionCodeGenTy RCG(EndThenGen);
11053     RCG(CGF);
11054   }
11055 }
11056 
11057 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11058     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11059     const Expr *Device) {
11060   if (!CGF.HaveInsertPoint())
11061     return;
11062 
11063   assert((isa<OMPTargetEnterDataDirective>(D) ||
11064           isa<OMPTargetExitDataDirective>(D) ||
11065           isa<OMPTargetUpdateDirective>(D)) &&
11066          "Expecting either target enter, exit data, or update directives.");
11067 
11068   CodeGenFunction::OMPTargetDataInfo InputInfo;
11069   llvm::Value *MapTypesArray = nullptr;
11070   llvm::Value *MapNamesArray = nullptr;
11071   // Generate the code for the opening of the data environment.
11072   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11073                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11074     // Emit device ID if any.
11075     llvm::Value *DeviceID = nullptr;
11076     if (Device) {
11077       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11078                                            CGF.Int64Ty, /*isSigned=*/true);
11079     } else {
11080       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11081     }
11082 
11083     // Emit the number of elements in the offloading arrays.
11084     llvm::Constant *PointerNum =
11085         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11086 
11087     // Source location for the ident struct
11088     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11089 
11090     llvm::Value *OffloadingArgs[] = {RTLoc,
11091                                      DeviceID,
11092                                      PointerNum,
11093                                      InputInfo.BasePointersArray.getPointer(),
11094                                      InputInfo.PointersArray.getPointer(),
11095                                      InputInfo.SizesArray.getPointer(),
11096                                      MapTypesArray,
11097                                      MapNamesArray,
11098                                      InputInfo.MappersArray.getPointer()};
11099 
11100     // Select the right runtime function call for each standalone
11101     // directive.
11102     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11103     RuntimeFunction RTLFn;
11104     switch (D.getDirectiveKind()) {
11105     case OMPD_target_enter_data:
11106       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11107                         : OMPRTL___tgt_target_data_begin_mapper;
11108       break;
11109     case OMPD_target_exit_data:
11110       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11111                         : OMPRTL___tgt_target_data_end_mapper;
11112       break;
11113     case OMPD_target_update:
11114       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11115                         : OMPRTL___tgt_target_data_update_mapper;
11116       break;
11117     case OMPD_parallel:
11118     case OMPD_for:
11119     case OMPD_parallel_for:
11120     case OMPD_parallel_master:
11121     case OMPD_parallel_sections:
11122     case OMPD_for_simd:
11123     case OMPD_parallel_for_simd:
11124     case OMPD_cancel:
11125     case OMPD_cancellation_point:
11126     case OMPD_ordered:
11127     case OMPD_threadprivate:
11128     case OMPD_allocate:
11129     case OMPD_task:
11130     case OMPD_simd:
11131     case OMPD_tile:
11132     case OMPD_unroll:
11133     case OMPD_sections:
11134     case OMPD_section:
11135     case OMPD_single:
11136     case OMPD_master:
11137     case OMPD_critical:
11138     case OMPD_taskyield:
11139     case OMPD_barrier:
11140     case OMPD_taskwait:
11141     case OMPD_taskgroup:
11142     case OMPD_atomic:
11143     case OMPD_flush:
11144     case OMPD_depobj:
11145     case OMPD_scan:
11146     case OMPD_teams:
11147     case OMPD_target_data:
11148     case OMPD_distribute:
11149     case OMPD_distribute_simd:
11150     case OMPD_distribute_parallel_for:
11151     case OMPD_distribute_parallel_for_simd:
11152     case OMPD_teams_distribute:
11153     case OMPD_teams_distribute_simd:
11154     case OMPD_teams_distribute_parallel_for:
11155     case OMPD_teams_distribute_parallel_for_simd:
11156     case OMPD_declare_simd:
11157     case OMPD_declare_variant:
11158     case OMPD_begin_declare_variant:
11159     case OMPD_end_declare_variant:
11160     case OMPD_declare_target:
11161     case OMPD_end_declare_target:
11162     case OMPD_declare_reduction:
11163     case OMPD_declare_mapper:
11164     case OMPD_taskloop:
11165     case OMPD_taskloop_simd:
11166     case OMPD_master_taskloop:
11167     case OMPD_master_taskloop_simd:
11168     case OMPD_parallel_master_taskloop:
11169     case OMPD_parallel_master_taskloop_simd:
11170     case OMPD_target:
11171     case OMPD_target_simd:
11172     case OMPD_target_teams_distribute:
11173     case OMPD_target_teams_distribute_simd:
11174     case OMPD_target_teams_distribute_parallel_for:
11175     case OMPD_target_teams_distribute_parallel_for_simd:
11176     case OMPD_target_teams:
11177     case OMPD_target_parallel:
11178     case OMPD_target_parallel_for:
11179     case OMPD_target_parallel_for_simd:
11180     case OMPD_requires:
11181     case OMPD_unknown:
11182     default:
11183       llvm_unreachable("Unexpected standalone target data directive.");
11184       break;
11185     }
11186     CGF.EmitRuntimeCall(
11187         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11188         OffloadingArgs);
11189   };
11190 
11191   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11192                           &MapNamesArray](CodeGenFunction &CGF,
11193                                           PrePostActionTy &) {
11194     // Fill up the arrays with all the mapped variables.
11195     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11196 
11197     // Get map clause information.
11198     MappableExprsHandler MEHandler(D, CGF);
11199     MEHandler.generateAllInfo(CombinedInfo);
11200 
11201     TargetDataInfo Info;
11202     // Fill up the arrays and create the arguments.
11203     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11204                          /*IsNonContiguous=*/true);
11205     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11206                              D.hasClausesOfKind<OMPNowaitClause>();
11207     emitOffloadingArraysArgument(
11208         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11209         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11210         {/*ForEndTask=*/false});
11211     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11212     InputInfo.BasePointersArray =
11213         Address(Info.BasePointersArray, CGM.getPointerAlign());
11214     InputInfo.PointersArray =
11215         Address(Info.PointersArray, CGM.getPointerAlign());
11216     InputInfo.SizesArray =
11217         Address(Info.SizesArray, CGM.getPointerAlign());
11218     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11219     MapTypesArray = Info.MapTypesArray;
11220     MapNamesArray = Info.MapNamesArray;
11221     if (RequiresOuterTask)
11222       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11223     else
11224       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11225   };
11226 
11227   if (IfCond) {
11228     emitIfClause(CGF, IfCond, TargetThenGen,
11229                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11230   } else {
11231     RegionCodeGenTy ThenRCG(TargetThenGen);
11232     ThenRCG(CGF);
11233   }
11234 }
11235 
11236 namespace {
11237   /// Kind of parameter in a function with 'declare simd' directive.
11238   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11239   /// Attribute set of the parameter.
11240   struct ParamAttrTy {
11241     ParamKindTy Kind = Vector;
11242     llvm::APSInt StrideOrArg;
11243     llvm::APSInt Alignment;
11244   };
11245 } // namespace
11246 
11247 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11248                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11249   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11250   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11251   // of that clause. The VLEN value must be power of 2.
11252   // In other case the notion of the function`s "characteristic data type" (CDT)
11253   // is used to compute the vector length.
11254   // CDT is defined in the following order:
11255   //   a) For non-void function, the CDT is the return type.
11256   //   b) If the function has any non-uniform, non-linear parameters, then the
11257   //   CDT is the type of the first such parameter.
11258   //   c) If the CDT determined by a) or b) above is struct, union, or class
11259   //   type which is pass-by-value (except for the type that maps to the
11260   //   built-in complex data type), the characteristic data type is int.
11261   //   d) If none of the above three cases is applicable, the CDT is int.
11262   // The VLEN is then determined based on the CDT and the size of vector
11263   // register of that ISA for which current vector version is generated. The
11264   // VLEN is computed using the formula below:
11265   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11266   // where vector register size specified in section 3.2.1 Registers and the
11267   // Stack Frame of original AMD64 ABI document.
11268   QualType RetType = FD->getReturnType();
11269   if (RetType.isNull())
11270     return 0;
11271   ASTContext &C = FD->getASTContext();
11272   QualType CDT;
11273   if (!RetType.isNull() && !RetType->isVoidType()) {
11274     CDT = RetType;
11275   } else {
11276     unsigned Offset = 0;
11277     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11278       if (ParamAttrs[Offset].Kind == Vector)
11279         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11280       ++Offset;
11281     }
11282     if (CDT.isNull()) {
11283       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11284         if (ParamAttrs[I + Offset].Kind == Vector) {
11285           CDT = FD->getParamDecl(I)->getType();
11286           break;
11287         }
11288       }
11289     }
11290   }
11291   if (CDT.isNull())
11292     CDT = C.IntTy;
11293   CDT = CDT->getCanonicalTypeUnqualified();
11294   if (CDT->isRecordType() || CDT->isUnionType())
11295     CDT = C.IntTy;
11296   return C.getTypeSize(CDT);
11297 }
11298 
11299 static void
11300 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11301                            const llvm::APSInt &VLENVal,
11302                            ArrayRef<ParamAttrTy> ParamAttrs,
11303                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11304   struct ISADataTy {
11305     char ISA;
11306     unsigned VecRegSize;
11307   };
11308   ISADataTy ISAData[] = {
11309       {
11310           'b', 128
11311       }, // SSE
11312       {
11313           'c', 256
11314       }, // AVX
11315       {
11316           'd', 256
11317       }, // AVX2
11318       {
11319           'e', 512
11320       }, // AVX512
11321   };
11322   llvm::SmallVector<char, 2> Masked;
11323   switch (State) {
11324   case OMPDeclareSimdDeclAttr::BS_Undefined:
11325     Masked.push_back('N');
11326     Masked.push_back('M');
11327     break;
11328   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11329     Masked.push_back('N');
11330     break;
11331   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11332     Masked.push_back('M');
11333     break;
11334   }
11335   for (char Mask : Masked) {
11336     for (const ISADataTy &Data : ISAData) {
11337       SmallString<256> Buffer;
11338       llvm::raw_svector_ostream Out(Buffer);
11339       Out << "_ZGV" << Data.ISA << Mask;
11340       if (!VLENVal) {
11341         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11342         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11343         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11344       } else {
11345         Out << VLENVal;
11346       }
11347       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11348         switch (ParamAttr.Kind){
11349         case LinearWithVarStride:
11350           Out << 's' << ParamAttr.StrideOrArg;
11351           break;
11352         case Linear:
11353           Out << 'l';
11354           if (ParamAttr.StrideOrArg != 1)
11355             Out << ParamAttr.StrideOrArg;
11356           break;
11357         case Uniform:
11358           Out << 'u';
11359           break;
11360         case Vector:
11361           Out << 'v';
11362           break;
11363         }
11364         if (!!ParamAttr.Alignment)
11365           Out << 'a' << ParamAttr.Alignment;
11366       }
11367       Out << '_' << Fn->getName();
11368       Fn->addFnAttr(Out.str());
11369     }
11370   }
11371 }
11372 
11373 // This are the Functions that are needed to mangle the name of the
11374 // vector functions generated by the compiler, according to the rules
11375 // defined in the "Vector Function ABI specifications for AArch64",
11376 // available at
11377 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11378 
11379 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11380 ///
11381 /// TODO: Need to implement the behavior for reference marked with a
11382 /// var or no linear modifiers (1.b in the section). For this, we
11383 /// need to extend ParamKindTy to support the linear modifiers.
11384 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11385   QT = QT.getCanonicalType();
11386 
11387   if (QT->isVoidType())
11388     return false;
11389 
11390   if (Kind == ParamKindTy::Uniform)
11391     return false;
11392 
11393   if (Kind == ParamKindTy::Linear)
11394     return false;
11395 
11396   // TODO: Handle linear references with modifiers
11397 
11398   if (Kind == ParamKindTy::LinearWithVarStride)
11399     return false;
11400 
11401   return true;
11402 }
11403 
11404 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11405 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11406   QT = QT.getCanonicalType();
11407   unsigned Size = C.getTypeSize(QT);
11408 
11409   // Only scalars and complex within 16 bytes wide set PVB to true.
11410   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11411     return false;
11412 
11413   if (QT->isFloatingType())
11414     return true;
11415 
11416   if (QT->isIntegerType())
11417     return true;
11418 
11419   if (QT->isPointerType())
11420     return true;
11421 
11422   // TODO: Add support for complex types (section 3.1.2, item 2).
11423 
11424   return false;
11425 }
11426 
11427 /// Computes the lane size (LS) of a return type or of an input parameter,
11428 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11429 /// TODO: Add support for references, section 3.2.1, item 1.
11430 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11431   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11432     QualType PTy = QT.getCanonicalType()->getPointeeType();
11433     if (getAArch64PBV(PTy, C))
11434       return C.getTypeSize(PTy);
11435   }
11436   if (getAArch64PBV(QT, C))
11437     return C.getTypeSize(QT);
11438 
11439   return C.getTypeSize(C.getUIntPtrType());
11440 }
11441 
11442 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11443 // signature of the scalar function, as defined in 3.2.2 of the
11444 // AAVFABI.
11445 static std::tuple<unsigned, unsigned, bool>
11446 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11447   QualType RetType = FD->getReturnType().getCanonicalType();
11448 
11449   ASTContext &C = FD->getASTContext();
11450 
11451   bool OutputBecomesInput = false;
11452 
11453   llvm::SmallVector<unsigned, 8> Sizes;
11454   if (!RetType->isVoidType()) {
11455     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11456     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11457       OutputBecomesInput = true;
11458   }
11459   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11460     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11461     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11462   }
11463 
11464   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11465   // The LS of a function parameter / return value can only be a power
11466   // of 2, starting from 8 bits, up to 128.
11467   assert(std::all_of(Sizes.begin(), Sizes.end(),
11468                      [](unsigned Size) {
11469                        return Size == 8 || Size == 16 || Size == 32 ||
11470                               Size == 64 || Size == 128;
11471                      }) &&
11472          "Invalid size");
11473 
11474   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11475                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11476                          OutputBecomesInput);
11477 }
11478 
11479 /// Mangle the parameter part of the vector function name according to
11480 /// their OpenMP classification. The mangling function is defined in
11481 /// section 3.5 of the AAVFABI.
11482 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11483   SmallString<256> Buffer;
11484   llvm::raw_svector_ostream Out(Buffer);
11485   for (const auto &ParamAttr : ParamAttrs) {
11486     switch (ParamAttr.Kind) {
11487     case LinearWithVarStride:
11488       Out << "ls" << ParamAttr.StrideOrArg;
11489       break;
11490     case Linear:
11491       Out << 'l';
11492       // Don't print the step value if it is not present or if it is
11493       // equal to 1.
11494       if (ParamAttr.StrideOrArg != 1)
11495         Out << ParamAttr.StrideOrArg;
11496       break;
11497     case Uniform:
11498       Out << 'u';
11499       break;
11500     case Vector:
11501       Out << 'v';
11502       break;
11503     }
11504 
11505     if (!!ParamAttr.Alignment)
11506       Out << 'a' << ParamAttr.Alignment;
11507   }
11508 
11509   return std::string(Out.str());
11510 }
11511 
11512 // Function used to add the attribute. The parameter `VLEN` is
11513 // templated to allow the use of "x" when targeting scalable functions
11514 // for SVE.
11515 template <typename T>
11516 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11517                                  char ISA, StringRef ParSeq,
11518                                  StringRef MangledName, bool OutputBecomesInput,
11519                                  llvm::Function *Fn) {
11520   SmallString<256> Buffer;
11521   llvm::raw_svector_ostream Out(Buffer);
11522   Out << Prefix << ISA << LMask << VLEN;
11523   if (OutputBecomesInput)
11524     Out << "v";
11525   Out << ParSeq << "_" << MangledName;
11526   Fn->addFnAttr(Out.str());
11527 }
11528 
11529 // Helper function to generate the Advanced SIMD names depending on
11530 // the value of the NDS when simdlen is not present.
11531 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11532                                       StringRef Prefix, char ISA,
11533                                       StringRef ParSeq, StringRef MangledName,
11534                                       bool OutputBecomesInput,
11535                                       llvm::Function *Fn) {
11536   switch (NDS) {
11537   case 8:
11538     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11539                          OutputBecomesInput, Fn);
11540     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11541                          OutputBecomesInput, Fn);
11542     break;
11543   case 16:
11544     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11545                          OutputBecomesInput, Fn);
11546     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11547                          OutputBecomesInput, Fn);
11548     break;
11549   case 32:
11550     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11551                          OutputBecomesInput, Fn);
11552     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11553                          OutputBecomesInput, Fn);
11554     break;
11555   case 64:
11556   case 128:
11557     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11558                          OutputBecomesInput, Fn);
11559     break;
11560   default:
11561     llvm_unreachable("Scalar type is too wide.");
11562   }
11563 }
11564 
11565 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11566 static void emitAArch64DeclareSimdFunction(
11567     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11568     ArrayRef<ParamAttrTy> ParamAttrs,
11569     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11570     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11571 
11572   // Get basic data for building the vector signature.
11573   const auto Data = getNDSWDS(FD, ParamAttrs);
11574   const unsigned NDS = std::get<0>(Data);
11575   const unsigned WDS = std::get<1>(Data);
11576   const bool OutputBecomesInput = std::get<2>(Data);
11577 
11578   // Check the values provided via `simdlen` by the user.
11579   // 1. A `simdlen(1)` doesn't produce vector signatures,
11580   if (UserVLEN == 1) {
11581     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11582         DiagnosticsEngine::Warning,
11583         "The clause simdlen(1) has no effect when targeting aarch64.");
11584     CGM.getDiags().Report(SLoc, DiagID);
11585     return;
11586   }
11587 
11588   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11589   // Advanced SIMD output.
11590   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11591     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11592         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11593                                     "power of 2 when targeting Advanced SIMD.");
11594     CGM.getDiags().Report(SLoc, DiagID);
11595     return;
11596   }
11597 
11598   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11599   // limits.
11600   if (ISA == 's' && UserVLEN != 0) {
11601     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11602       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11603           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11604                                       "lanes in the architectural constraints "
11605                                       "for SVE (min is 128-bit, max is "
11606                                       "2048-bit, by steps of 128-bit)");
11607       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11608       return;
11609     }
11610   }
11611 
11612   // Sort out parameter sequence.
11613   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11614   StringRef Prefix = "_ZGV";
11615   // Generate simdlen from user input (if any).
11616   if (UserVLEN) {
11617     if (ISA == 's') {
11618       // SVE generates only a masked function.
11619       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11620                            OutputBecomesInput, Fn);
11621     } else {
11622       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11623       // Advanced SIMD generates one or two functions, depending on
11624       // the `[not]inbranch` clause.
11625       switch (State) {
11626       case OMPDeclareSimdDeclAttr::BS_Undefined:
11627         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11628                              OutputBecomesInput, Fn);
11629         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11630                              OutputBecomesInput, Fn);
11631         break;
11632       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11633         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11634                              OutputBecomesInput, Fn);
11635         break;
11636       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11637         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11638                              OutputBecomesInput, Fn);
11639         break;
11640       }
11641     }
11642   } else {
11643     // If no user simdlen is provided, follow the AAVFABI rules for
11644     // generating the vector length.
11645     if (ISA == 's') {
11646       // SVE, section 3.4.1, item 1.
11647       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11648                            OutputBecomesInput, Fn);
11649     } else {
11650       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11651       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11652       // two vector names depending on the use of the clause
11653       // `[not]inbranch`.
11654       switch (State) {
11655       case OMPDeclareSimdDeclAttr::BS_Undefined:
11656         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11657                                   OutputBecomesInput, Fn);
11658         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11659                                   OutputBecomesInput, Fn);
11660         break;
11661       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11662         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11663                                   OutputBecomesInput, Fn);
11664         break;
11665       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11666         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11667                                   OutputBecomesInput, Fn);
11668         break;
11669       }
11670     }
11671   }
11672 }
11673 
11674 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11675                                               llvm::Function *Fn) {
11676   ASTContext &C = CGM.getContext();
11677   FD = FD->getMostRecentDecl();
11678   // Map params to their positions in function decl.
11679   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11680   if (isa<CXXMethodDecl>(FD))
11681     ParamPositions.try_emplace(FD, 0);
11682   unsigned ParamPos = ParamPositions.size();
11683   for (const ParmVarDecl *P : FD->parameters()) {
11684     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11685     ++ParamPos;
11686   }
11687   while (FD) {
11688     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11689       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11690       // Mark uniform parameters.
11691       for (const Expr *E : Attr->uniforms()) {
11692         E = E->IgnoreParenImpCasts();
11693         unsigned Pos;
11694         if (isa<CXXThisExpr>(E)) {
11695           Pos = ParamPositions[FD];
11696         } else {
11697           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11698                                 ->getCanonicalDecl();
11699           Pos = ParamPositions[PVD];
11700         }
11701         ParamAttrs[Pos].Kind = Uniform;
11702       }
11703       // Get alignment info.
11704       auto NI = Attr->alignments_begin();
11705       for (const Expr *E : Attr->aligneds()) {
11706         E = E->IgnoreParenImpCasts();
11707         unsigned Pos;
11708         QualType ParmTy;
11709         if (isa<CXXThisExpr>(E)) {
11710           Pos = ParamPositions[FD];
11711           ParmTy = E->getType();
11712         } else {
11713           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11714                                 ->getCanonicalDecl();
11715           Pos = ParamPositions[PVD];
11716           ParmTy = PVD->getType();
11717         }
11718         ParamAttrs[Pos].Alignment =
11719             (*NI)
11720                 ? (*NI)->EvaluateKnownConstInt(C)
11721                 : llvm::APSInt::getUnsigned(
11722                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11723                           .getQuantity());
11724         ++NI;
11725       }
11726       // Mark linear parameters.
11727       auto SI = Attr->steps_begin();
11728       auto MI = Attr->modifiers_begin();
11729       for (const Expr *E : Attr->linears()) {
11730         E = E->IgnoreParenImpCasts();
11731         unsigned Pos;
11732         // Rescaling factor needed to compute the linear parameter
11733         // value in the mangled name.
11734         unsigned PtrRescalingFactor = 1;
11735         if (isa<CXXThisExpr>(E)) {
11736           Pos = ParamPositions[FD];
11737         } else {
11738           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11739                                 ->getCanonicalDecl();
11740           Pos = ParamPositions[PVD];
11741           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11742             PtrRescalingFactor = CGM.getContext()
11743                                      .getTypeSizeInChars(P->getPointeeType())
11744                                      .getQuantity();
11745         }
11746         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11747         ParamAttr.Kind = Linear;
11748         // Assuming a stride of 1, for `linear` without modifiers.
11749         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11750         if (*SI) {
11751           Expr::EvalResult Result;
11752           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11753             if (const auto *DRE =
11754                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11755               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11756                 ParamAttr.Kind = LinearWithVarStride;
11757                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11758                     ParamPositions[StridePVD->getCanonicalDecl()]);
11759               }
11760             }
11761           } else {
11762             ParamAttr.StrideOrArg = Result.Val.getInt();
11763           }
11764         }
11765         // If we are using a linear clause on a pointer, we need to
11766         // rescale the value of linear_step with the byte size of the
11767         // pointee type.
11768         if (Linear == ParamAttr.Kind)
11769           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11770         ++SI;
11771         ++MI;
11772       }
11773       llvm::APSInt VLENVal;
11774       SourceLocation ExprLoc;
11775       const Expr *VLENExpr = Attr->getSimdlen();
11776       if (VLENExpr) {
11777         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11778         ExprLoc = VLENExpr->getExprLoc();
11779       }
11780       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11781       if (CGM.getTriple().isX86()) {
11782         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11783       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11784         unsigned VLEN = VLENVal.getExtValue();
11785         StringRef MangledName = Fn->getName();
11786         if (CGM.getTarget().hasFeature("sve"))
11787           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11788                                          MangledName, 's', 128, Fn, ExprLoc);
11789         if (CGM.getTarget().hasFeature("neon"))
11790           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11791                                          MangledName, 'n', 128, Fn, ExprLoc);
11792       }
11793     }
11794     FD = FD->getPreviousDecl();
11795   }
11796 }
11797 
11798 namespace {
11799 /// Cleanup action for doacross support.
11800 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11801 public:
11802   static const int DoacrossFinArgs = 2;
11803 
11804 private:
11805   llvm::FunctionCallee RTLFn;
11806   llvm::Value *Args[DoacrossFinArgs];
11807 
11808 public:
11809   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11810                     ArrayRef<llvm::Value *> CallArgs)
11811       : RTLFn(RTLFn) {
11812     assert(CallArgs.size() == DoacrossFinArgs);
11813     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11814   }
11815   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11816     if (!CGF.HaveInsertPoint())
11817       return;
11818     CGF.EmitRuntimeCall(RTLFn, Args);
11819   }
11820 };
11821 } // namespace
11822 
11823 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11824                                        const OMPLoopDirective &D,
11825                                        ArrayRef<Expr *> NumIterations) {
11826   if (!CGF.HaveInsertPoint())
11827     return;
11828 
11829   ASTContext &C = CGM.getContext();
11830   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11831   RecordDecl *RD;
11832   if (KmpDimTy.isNull()) {
11833     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11834     //  kmp_int64 lo; // lower
11835     //  kmp_int64 up; // upper
11836     //  kmp_int64 st; // stride
11837     // };
11838     RD = C.buildImplicitRecord("kmp_dim");
11839     RD->startDefinition();
11840     addFieldToRecordDecl(C, RD, Int64Ty);
11841     addFieldToRecordDecl(C, RD, Int64Ty);
11842     addFieldToRecordDecl(C, RD, Int64Ty);
11843     RD->completeDefinition();
11844     KmpDimTy = C.getRecordType(RD);
11845   } else {
11846     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11847   }
11848   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11849   QualType ArrayTy =
11850       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11851 
11852   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11853   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11854   enum { LowerFD = 0, UpperFD, StrideFD };
11855   // Fill dims with data.
11856   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11857     LValue DimsLVal = CGF.MakeAddrLValue(
11858         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11859     // dims.upper = num_iterations;
11860     LValue UpperLVal = CGF.EmitLValueForField(
11861         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11862     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11863         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11864         Int64Ty, NumIterations[I]->getExprLoc());
11865     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11866     // dims.stride = 1;
11867     LValue StrideLVal = CGF.EmitLValueForField(
11868         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11869     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11870                           StrideLVal);
11871   }
11872 
11873   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11874   // kmp_int32 num_dims, struct kmp_dim * dims);
11875   llvm::Value *Args[] = {
11876       emitUpdateLocation(CGF, D.getBeginLoc()),
11877       getThreadID(CGF, D.getBeginLoc()),
11878       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11879       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11880           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11881           CGM.VoidPtrTy)};
11882 
11883   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11884       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11885   CGF.EmitRuntimeCall(RTLFn, Args);
11886   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11887       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11888   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11889       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11890   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11891                                              llvm::makeArrayRef(FiniArgs));
11892 }
11893 
11894 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11895                                           const OMPDependClause *C) {
11896   QualType Int64Ty =
11897       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11898   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11899   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11900       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11901   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11902   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11903     const Expr *CounterVal = C->getLoopData(I);
11904     assert(CounterVal);
11905     llvm::Value *CntVal = CGF.EmitScalarConversion(
11906         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11907         CounterVal->getExprLoc());
11908     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11909                           /*Volatile=*/false, Int64Ty);
11910   }
11911   llvm::Value *Args[] = {
11912       emitUpdateLocation(CGF, C->getBeginLoc()),
11913       getThreadID(CGF, C->getBeginLoc()),
11914       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11915   llvm::FunctionCallee RTLFn;
11916   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11917     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11918                                                   OMPRTL___kmpc_doacross_post);
11919   } else {
11920     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11921     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11922                                                   OMPRTL___kmpc_doacross_wait);
11923   }
11924   CGF.EmitRuntimeCall(RTLFn, Args);
11925 }
11926 
11927 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11928                                llvm::FunctionCallee Callee,
11929                                ArrayRef<llvm::Value *> Args) const {
11930   assert(Loc.isValid() && "Outlined function call location must be valid.");
11931   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11932 
11933   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11934     if (Fn->doesNotThrow()) {
11935       CGF.EmitNounwindRuntimeCall(Fn, Args);
11936       return;
11937     }
11938   }
11939   CGF.EmitRuntimeCall(Callee, Args);
11940 }
11941 
11942 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11943     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11944     ArrayRef<llvm::Value *> Args) const {
11945   emitCall(CGF, Loc, OutlinedFn, Args);
11946 }
11947 
11948 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11949   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11950     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11951       HasEmittedDeclareTargetRegion = true;
11952 }
11953 
11954 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11955                                              const VarDecl *NativeParam,
11956                                              const VarDecl *TargetParam) const {
11957   return CGF.GetAddrOfLocalVar(NativeParam);
11958 }
11959 
11960 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11961                                                    const VarDecl *VD) {
11962   if (!VD)
11963     return Address::invalid();
11964   Address UntiedAddr = Address::invalid();
11965   Address UntiedRealAddr = Address::invalid();
11966   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11967   if (It != FunctionToUntiedTaskStackMap.end()) {
11968     const UntiedLocalVarsAddressesMap &UntiedData =
11969         UntiedLocalVarsStack[It->second];
11970     auto I = UntiedData.find(VD);
11971     if (I != UntiedData.end()) {
11972       UntiedAddr = I->second.first;
11973       UntiedRealAddr = I->second.second;
11974     }
11975   }
11976   const VarDecl *CVD = VD->getCanonicalDecl();
11977   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11978     // Use the default allocation.
11979     if (!isAllocatableDecl(VD))
11980       return UntiedAddr;
11981     llvm::Value *Size;
11982     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11983     if (CVD->getType()->isVariablyModifiedType()) {
11984       Size = CGF.getTypeSize(CVD->getType());
11985       // Align the size: ((size + align - 1) / align) * align
11986       Size = CGF.Builder.CreateNUWAdd(
11987           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11988       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11989       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11990     } else {
11991       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11992       Size = CGM.getSize(Sz.alignTo(Align));
11993     }
11994     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11995     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11996     assert(AA->getAllocator() &&
11997            "Expected allocator expression for non-default allocator.");
11998     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11999     // According to the standard, the original allocator type is a enum
12000     // (integer). Convert to pointer type, if required.
12001     Allocator = CGF.EmitScalarConversion(
12002         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12003         AA->getAllocator()->getExprLoc());
12004     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12005 
12006     llvm::Value *Addr =
12007         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12008                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12009                             Args, getName({CVD->getName(), ".void.addr"}));
12010     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12011         CGM.getModule(), OMPRTL___kmpc_free);
12012     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12013     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12014         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12015     if (UntiedAddr.isValid())
12016       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12017 
12018     // Cleanup action for allocate support.
12019     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12020       llvm::FunctionCallee RTLFn;
12021       SourceLocation::UIntTy LocEncoding;
12022       Address Addr;
12023       const Expr *Allocator;
12024 
12025     public:
12026       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12027                            SourceLocation::UIntTy LocEncoding, Address Addr,
12028                            const Expr *Allocator)
12029           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12030             Allocator(Allocator) {}
12031       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12032         if (!CGF.HaveInsertPoint())
12033           return;
12034         llvm::Value *Args[3];
12035         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12036             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12037         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12038             Addr.getPointer(), CGF.VoidPtrTy);
12039         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12040         // According to the standard, the original allocator type is a enum
12041         // (integer). Convert to pointer type, if required.
12042         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12043                                             CGF.getContext().VoidPtrTy,
12044                                             Allocator->getExprLoc());
12045         Args[2] = AllocVal;
12046 
12047         CGF.EmitRuntimeCall(RTLFn, Args);
12048       }
12049     };
12050     Address VDAddr =
12051         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12052     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12053         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12054         VDAddr, AA->getAllocator());
12055     if (UntiedRealAddr.isValid())
12056       if (auto *Region =
12057               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12058         Region->emitUntiedSwitch(CGF);
12059     return VDAddr;
12060   }
12061   return UntiedAddr;
12062 }
12063 
12064 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12065                                              const VarDecl *VD) const {
12066   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12067   if (It == FunctionToUntiedTaskStackMap.end())
12068     return false;
12069   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12070 }
12071 
12072 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12073     CodeGenModule &CGM, const OMPLoopDirective &S)
12074     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12075   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12076   if (!NeedToPush)
12077     return;
12078   NontemporalDeclsSet &DS =
12079       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12080   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12081     for (const Stmt *Ref : C->private_refs()) {
12082       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12083       const ValueDecl *VD;
12084       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12085         VD = DRE->getDecl();
12086       } else {
12087         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12088         assert((ME->isImplicitCXXThis() ||
12089                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12090                "Expected member of current class.");
12091         VD = ME->getMemberDecl();
12092       }
12093       DS.insert(VD);
12094     }
12095   }
12096 }
12097 
12098 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12099   if (!NeedToPush)
12100     return;
12101   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12102 }
12103 
12104 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12105     CodeGenFunction &CGF,
12106     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12107                           std::pair<Address, Address>> &LocalVars)
12108     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12109   if (!NeedToPush)
12110     return;
12111   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12112       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12113   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12114 }
12115 
12116 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12117   if (!NeedToPush)
12118     return;
12119   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12120 }
12121 
12122 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12123   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12124 
12125   return llvm::any_of(
12126       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12127       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12128 }
12129 
12130 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12131     const OMPExecutableDirective &S,
12132     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12133     const {
12134   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12135   // Vars in target/task regions must be excluded completely.
12136   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12137       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12138     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12139     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12140     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12141     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12142       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12143         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12144     }
12145   }
12146   // Exclude vars in private clauses.
12147   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12148     for (const Expr *Ref : C->varlists()) {
12149       if (!Ref->getType()->isScalarType())
12150         continue;
12151       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12152       if (!DRE)
12153         continue;
12154       NeedToCheckForLPCs.insert(DRE->getDecl());
12155     }
12156   }
12157   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12158     for (const Expr *Ref : C->varlists()) {
12159       if (!Ref->getType()->isScalarType())
12160         continue;
12161       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12162       if (!DRE)
12163         continue;
12164       NeedToCheckForLPCs.insert(DRE->getDecl());
12165     }
12166   }
12167   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12168     for (const Expr *Ref : C->varlists()) {
12169       if (!Ref->getType()->isScalarType())
12170         continue;
12171       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12172       if (!DRE)
12173         continue;
12174       NeedToCheckForLPCs.insert(DRE->getDecl());
12175     }
12176   }
12177   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12178     for (const Expr *Ref : C->varlists()) {
12179       if (!Ref->getType()->isScalarType())
12180         continue;
12181       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12182       if (!DRE)
12183         continue;
12184       NeedToCheckForLPCs.insert(DRE->getDecl());
12185     }
12186   }
12187   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12188     for (const Expr *Ref : C->varlists()) {
12189       if (!Ref->getType()->isScalarType())
12190         continue;
12191       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12192       if (!DRE)
12193         continue;
12194       NeedToCheckForLPCs.insert(DRE->getDecl());
12195     }
12196   }
12197   for (const Decl *VD : NeedToCheckForLPCs) {
12198     for (const LastprivateConditionalData &Data :
12199          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12200       if (Data.DeclToUniqueName.count(VD) > 0) {
12201         if (!Data.Disabled)
12202           NeedToAddForLPCsAsDisabled.insert(VD);
12203         break;
12204       }
12205     }
12206   }
12207 }
12208 
12209 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12210     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12211     : CGM(CGF.CGM),
12212       Action((CGM.getLangOpts().OpenMP >= 50 &&
12213               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12214                            [](const OMPLastprivateClause *C) {
12215                              return C->getKind() ==
12216                                     OMPC_LASTPRIVATE_conditional;
12217                            }))
12218                  ? ActionToDo::PushAsLastprivateConditional
12219                  : ActionToDo::DoNotPush) {
12220   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12221   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12222     return;
12223   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12224          "Expected a push action.");
12225   LastprivateConditionalData &Data =
12226       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12227   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12228     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12229       continue;
12230 
12231     for (const Expr *Ref : C->varlists()) {
12232       Data.DeclToUniqueName.insert(std::make_pair(
12233           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12234           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12235     }
12236   }
12237   Data.IVLVal = IVLVal;
12238   Data.Fn = CGF.CurFn;
12239 }
12240 
12241 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12242     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12243     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12244   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12245   if (CGM.getLangOpts().OpenMP < 50)
12246     return;
12247   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12248   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12249   if (!NeedToAddForLPCsAsDisabled.empty()) {
12250     Action = ActionToDo::DisableLastprivateConditional;
12251     LastprivateConditionalData &Data =
12252         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12253     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12254       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12255     Data.Fn = CGF.CurFn;
12256     Data.Disabled = true;
12257   }
12258 }
12259 
12260 CGOpenMPRuntime::LastprivateConditionalRAII
12261 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12262     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12263   return LastprivateConditionalRAII(CGF, S);
12264 }
12265 
12266 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12267   if (CGM.getLangOpts().OpenMP < 50)
12268     return;
12269   if (Action == ActionToDo::DisableLastprivateConditional) {
12270     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12271            "Expected list of disabled private vars.");
12272     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12273   }
12274   if (Action == ActionToDo::PushAsLastprivateConditional) {
12275     assert(
12276         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12277         "Expected list of lastprivate conditional vars.");
12278     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12279   }
12280 }
12281 
12282 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12283                                                         const VarDecl *VD) {
12284   ASTContext &C = CGM.getContext();
12285   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12286   if (I == LastprivateConditionalToTypes.end())
12287     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12288   QualType NewType;
12289   const FieldDecl *VDField;
12290   const FieldDecl *FiredField;
12291   LValue BaseLVal;
12292   auto VI = I->getSecond().find(VD);
12293   if (VI == I->getSecond().end()) {
12294     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12295     RD->startDefinition();
12296     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12297     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12298     RD->completeDefinition();
12299     NewType = C.getRecordType(RD);
12300     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12301     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12302     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12303   } else {
12304     NewType = std::get<0>(VI->getSecond());
12305     VDField = std::get<1>(VI->getSecond());
12306     FiredField = std::get<2>(VI->getSecond());
12307     BaseLVal = std::get<3>(VI->getSecond());
12308   }
12309   LValue FiredLVal =
12310       CGF.EmitLValueForField(BaseLVal, FiredField);
12311   CGF.EmitStoreOfScalar(
12312       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12313       FiredLVal);
12314   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12315 }
12316 
12317 namespace {
12318 /// Checks if the lastprivate conditional variable is referenced in LHS.
12319 class LastprivateConditionalRefChecker final
12320     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12321   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12322   const Expr *FoundE = nullptr;
12323   const Decl *FoundD = nullptr;
12324   StringRef UniqueDeclName;
12325   LValue IVLVal;
12326   llvm::Function *FoundFn = nullptr;
12327   SourceLocation Loc;
12328 
12329 public:
12330   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12331     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12332          llvm::reverse(LPM)) {
12333       auto It = D.DeclToUniqueName.find(E->getDecl());
12334       if (It == D.DeclToUniqueName.end())
12335         continue;
12336       if (D.Disabled)
12337         return false;
12338       FoundE = E;
12339       FoundD = E->getDecl()->getCanonicalDecl();
12340       UniqueDeclName = It->second;
12341       IVLVal = D.IVLVal;
12342       FoundFn = D.Fn;
12343       break;
12344     }
12345     return FoundE == E;
12346   }
12347   bool VisitMemberExpr(const MemberExpr *E) {
12348     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12349       return false;
12350     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12351          llvm::reverse(LPM)) {
12352       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12353       if (It == D.DeclToUniqueName.end())
12354         continue;
12355       if (D.Disabled)
12356         return false;
12357       FoundE = E;
12358       FoundD = E->getMemberDecl()->getCanonicalDecl();
12359       UniqueDeclName = It->second;
12360       IVLVal = D.IVLVal;
12361       FoundFn = D.Fn;
12362       break;
12363     }
12364     return FoundE == E;
12365   }
12366   bool VisitStmt(const Stmt *S) {
12367     for (const Stmt *Child : S->children()) {
12368       if (!Child)
12369         continue;
12370       if (const auto *E = dyn_cast<Expr>(Child))
12371         if (!E->isGLValue())
12372           continue;
12373       if (Visit(Child))
12374         return true;
12375     }
12376     return false;
12377   }
12378   explicit LastprivateConditionalRefChecker(
12379       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12380       : LPM(LPM) {}
12381   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12382   getFoundData() const {
12383     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12384   }
12385 };
12386 } // namespace
12387 
12388 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12389                                                        LValue IVLVal,
12390                                                        StringRef UniqueDeclName,
12391                                                        LValue LVal,
12392                                                        SourceLocation Loc) {
12393   // Last updated loop counter for the lastprivate conditional var.
12394   // int<xx> last_iv = 0;
12395   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12396   llvm::Constant *LastIV =
12397       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12398   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12399       IVLVal.getAlignment().getAsAlign());
12400   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12401 
12402   // Last value of the lastprivate conditional.
12403   // decltype(priv_a) last_a;
12404   llvm::Constant *Last = getOrCreateInternalVariable(
12405       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12406   cast<llvm::GlobalVariable>(Last)->setAlignment(
12407       LVal.getAlignment().getAsAlign());
12408   LValue LastLVal =
12409       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12410 
12411   // Global loop counter. Required to handle inner parallel-for regions.
12412   // iv
12413   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12414 
12415   // #pragma omp critical(a)
12416   // if (last_iv <= iv) {
12417   //   last_iv = iv;
12418   //   last_a = priv_a;
12419   // }
12420   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12421                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12422     Action.Enter(CGF);
12423     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12424     // (last_iv <= iv) ? Check if the variable is updated and store new
12425     // value in global var.
12426     llvm::Value *CmpRes;
12427     if (IVLVal.getType()->isSignedIntegerType()) {
12428       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12429     } else {
12430       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12431              "Loop iteration variable must be integer.");
12432       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12433     }
12434     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12435     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12436     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12437     // {
12438     CGF.EmitBlock(ThenBB);
12439 
12440     //   last_iv = iv;
12441     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12442 
12443     //   last_a = priv_a;
12444     switch (CGF.getEvaluationKind(LVal.getType())) {
12445     case TEK_Scalar: {
12446       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12447       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12448       break;
12449     }
12450     case TEK_Complex: {
12451       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12452       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12453       break;
12454     }
12455     case TEK_Aggregate:
12456       llvm_unreachable(
12457           "Aggregates are not supported in lastprivate conditional.");
12458     }
12459     // }
12460     CGF.EmitBranch(ExitBB);
12461     // There is no need to emit line number for unconditional branch.
12462     (void)ApplyDebugLocation::CreateEmpty(CGF);
12463     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12464   };
12465 
12466   if (CGM.getLangOpts().OpenMPSimd) {
12467     // Do not emit as a critical region as no parallel region could be emitted.
12468     RegionCodeGenTy ThenRCG(CodeGen);
12469     ThenRCG(CGF);
12470   } else {
12471     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12472   }
12473 }
12474 
12475 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12476                                                          const Expr *LHS) {
12477   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12478     return;
12479   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12480   if (!Checker.Visit(LHS))
12481     return;
12482   const Expr *FoundE;
12483   const Decl *FoundD;
12484   StringRef UniqueDeclName;
12485   LValue IVLVal;
12486   llvm::Function *FoundFn;
12487   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12488       Checker.getFoundData();
12489   if (FoundFn != CGF.CurFn) {
12490     // Special codegen for inner parallel regions.
12491     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12492     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12493     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12494            "Lastprivate conditional is not found in outer region.");
12495     QualType StructTy = std::get<0>(It->getSecond());
12496     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12497     LValue PrivLVal = CGF.EmitLValue(FoundE);
12498     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12499         PrivLVal.getAddress(CGF),
12500         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12501     LValue BaseLVal =
12502         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12503     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12504     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12505                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12506                         FiredLVal, llvm::AtomicOrdering::Unordered,
12507                         /*IsVolatile=*/true, /*isInit=*/false);
12508     return;
12509   }
12510 
12511   // Private address of the lastprivate conditional in the current context.
12512   // priv_a
12513   LValue LVal = CGF.EmitLValue(FoundE);
12514   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12515                                    FoundE->getExprLoc());
12516 }
12517 
12518 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12519     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12520     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12521   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12522     return;
12523   auto Range = llvm::reverse(LastprivateConditionalStack);
12524   auto It = llvm::find_if(
12525       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12526   if (It == Range.end() || It->Fn != CGF.CurFn)
12527     return;
12528   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12529   assert(LPCI != LastprivateConditionalToTypes.end() &&
12530          "Lastprivates must be registered already.");
12531   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12532   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12533   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12534   for (const auto &Pair : It->DeclToUniqueName) {
12535     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12536     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12537       continue;
12538     auto I = LPCI->getSecond().find(Pair.first);
12539     assert(I != LPCI->getSecond().end() &&
12540            "Lastprivate must be rehistered already.");
12541     // bool Cmp = priv_a.Fired != 0;
12542     LValue BaseLVal = std::get<3>(I->getSecond());
12543     LValue FiredLVal =
12544         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12545     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12546     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12547     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12548     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12549     // if (Cmp) {
12550     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12551     CGF.EmitBlock(ThenBB);
12552     Address Addr = CGF.GetAddrOfLocalVar(VD);
12553     LValue LVal;
12554     if (VD->getType()->isReferenceType())
12555       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12556                                            AlignmentSource::Decl);
12557     else
12558       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12559                                 AlignmentSource::Decl);
12560     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12561                                      D.getBeginLoc());
12562     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12563     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12564     // }
12565   }
12566 }
12567 
12568 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12569     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12570     SourceLocation Loc) {
12571   if (CGF.getLangOpts().OpenMP < 50)
12572     return;
12573   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12574   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12575          "Unknown lastprivate conditional variable.");
12576   StringRef UniqueName = It->second;
12577   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12578   // The variable was not updated in the region - exit.
12579   if (!GV)
12580     return;
12581   LValue LPLVal = CGF.MakeAddrLValue(
12582       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12583   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12584   CGF.EmitStoreOfScalar(Res, PrivLVal);
12585 }
12586 
12587 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12588     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12589     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12590   llvm_unreachable("Not supported in SIMD-only mode");
12591 }
12592 
12593 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12594     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12595     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12596   llvm_unreachable("Not supported in SIMD-only mode");
12597 }
12598 
12599 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12600     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12601     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12602     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12603     bool Tied, unsigned &NumberOfParts) {
12604   llvm_unreachable("Not supported in SIMD-only mode");
12605 }
12606 
12607 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12608                                            SourceLocation Loc,
12609                                            llvm::Function *OutlinedFn,
12610                                            ArrayRef<llvm::Value *> CapturedVars,
12611                                            const Expr *IfCond) {
12612   llvm_unreachable("Not supported in SIMD-only mode");
12613 }
12614 
12615 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12616     CodeGenFunction &CGF, StringRef CriticalName,
12617     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12618     const Expr *Hint) {
12619   llvm_unreachable("Not supported in SIMD-only mode");
12620 }
12621 
12622 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12623                                            const RegionCodeGenTy &MasterOpGen,
12624                                            SourceLocation Loc) {
12625   llvm_unreachable("Not supported in SIMD-only mode");
12626 }
12627 
12628 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12629                                            const RegionCodeGenTy &MasterOpGen,
12630                                            SourceLocation Loc,
12631                                            const Expr *Filter) {
12632   llvm_unreachable("Not supported in SIMD-only mode");
12633 }
12634 
12635 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12636                                             SourceLocation Loc) {
12637   llvm_unreachable("Not supported in SIMD-only mode");
12638 }
12639 
12640 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12641     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12642     SourceLocation Loc) {
12643   llvm_unreachable("Not supported in SIMD-only mode");
12644 }
12645 
12646 void CGOpenMPSIMDRuntime::emitSingleRegion(
12647     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12648     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12649     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12650     ArrayRef<const Expr *> AssignmentOps) {
12651   llvm_unreachable("Not supported in SIMD-only mode");
12652 }
12653 
12654 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12655                                             const RegionCodeGenTy &OrderedOpGen,
12656                                             SourceLocation Loc,
12657                                             bool IsThreads) {
12658   llvm_unreachable("Not supported in SIMD-only mode");
12659 }
12660 
12661 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12662                                           SourceLocation Loc,
12663                                           OpenMPDirectiveKind Kind,
12664                                           bool EmitChecks,
12665                                           bool ForceSimpleCall) {
12666   llvm_unreachable("Not supported in SIMD-only mode");
12667 }
12668 
12669 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12670     CodeGenFunction &CGF, SourceLocation Loc,
12671     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12672     bool Ordered, const DispatchRTInput &DispatchValues) {
12673   llvm_unreachable("Not supported in SIMD-only mode");
12674 }
12675 
12676 void CGOpenMPSIMDRuntime::emitForStaticInit(
12677     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12678     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12679   llvm_unreachable("Not supported in SIMD-only mode");
12680 }
12681 
12682 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12683     CodeGenFunction &CGF, SourceLocation Loc,
12684     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12685   llvm_unreachable("Not supported in SIMD-only mode");
12686 }
12687 
12688 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12689                                                      SourceLocation Loc,
12690                                                      unsigned IVSize,
12691                                                      bool IVSigned) {
12692   llvm_unreachable("Not supported in SIMD-only mode");
12693 }
12694 
12695 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12696                                               SourceLocation Loc,
12697                                               OpenMPDirectiveKind DKind) {
12698   llvm_unreachable("Not supported in SIMD-only mode");
12699 }
12700 
12701 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12702                                               SourceLocation Loc,
12703                                               unsigned IVSize, bool IVSigned,
12704                                               Address IL, Address LB,
12705                                               Address UB, Address ST) {
12706   llvm_unreachable("Not supported in SIMD-only mode");
12707 }
12708 
12709 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12710                                                llvm::Value *NumThreads,
12711                                                SourceLocation Loc) {
12712   llvm_unreachable("Not supported in SIMD-only mode");
12713 }
12714 
12715 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12716                                              ProcBindKind ProcBind,
12717                                              SourceLocation Loc) {
12718   llvm_unreachable("Not supported in SIMD-only mode");
12719 }
12720 
12721 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12722                                                     const VarDecl *VD,
12723                                                     Address VDAddr,
12724                                                     SourceLocation Loc) {
12725   llvm_unreachable("Not supported in SIMD-only mode");
12726 }
12727 
12728 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12729     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12730     CodeGenFunction *CGF) {
12731   llvm_unreachable("Not supported in SIMD-only mode");
12732 }
12733 
12734 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12735     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12736   llvm_unreachable("Not supported in SIMD-only mode");
12737 }
12738 
12739 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12740                                     ArrayRef<const Expr *> Vars,
12741                                     SourceLocation Loc,
12742                                     llvm::AtomicOrdering AO) {
12743   llvm_unreachable("Not supported in SIMD-only mode");
12744 }
12745 
12746 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12747                                        const OMPExecutableDirective &D,
12748                                        llvm::Function *TaskFunction,
12749                                        QualType SharedsTy, Address Shareds,
12750                                        const Expr *IfCond,
12751                                        const OMPTaskDataTy &Data) {
12752   llvm_unreachable("Not supported in SIMD-only mode");
12753 }
12754 
12755 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12756     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12757     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12758     const Expr *IfCond, const OMPTaskDataTy &Data) {
12759   llvm_unreachable("Not supported in SIMD-only mode");
12760 }
12761 
12762 void CGOpenMPSIMDRuntime::emitReduction(
12763     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12764     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12765     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12766   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12767   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12768                                  ReductionOps, Options);
12769 }
12770 
12771 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12772     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12773     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12774   llvm_unreachable("Not supported in SIMD-only mode");
12775 }
12776 
12777 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12778                                                 SourceLocation Loc,
12779                                                 bool IsWorksharingReduction) {
12780   llvm_unreachable("Not supported in SIMD-only mode");
12781 }
12782 
12783 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12784                                                   SourceLocation Loc,
12785                                                   ReductionCodeGen &RCG,
12786                                                   unsigned N) {
12787   llvm_unreachable("Not supported in SIMD-only mode");
12788 }
12789 
12790 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12791                                                   SourceLocation Loc,
12792                                                   llvm::Value *ReductionsPtr,
12793                                                   LValue SharedLVal) {
12794   llvm_unreachable("Not supported in SIMD-only mode");
12795 }
12796 
12797 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12798                                            SourceLocation Loc) {
12799   llvm_unreachable("Not supported in SIMD-only mode");
12800 }
12801 
12802 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12803     CodeGenFunction &CGF, SourceLocation Loc,
12804     OpenMPDirectiveKind CancelRegion) {
12805   llvm_unreachable("Not supported in SIMD-only mode");
12806 }
12807 
12808 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12809                                          SourceLocation Loc, const Expr *IfCond,
12810                                          OpenMPDirectiveKind CancelRegion) {
12811   llvm_unreachable("Not supported in SIMD-only mode");
12812 }
12813 
12814 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12815     const OMPExecutableDirective &D, StringRef ParentName,
12816     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12817     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12818   llvm_unreachable("Not supported in SIMD-only mode");
12819 }
12820 
12821 void CGOpenMPSIMDRuntime::emitTargetCall(
12822     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12823     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12824     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12825     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12826                                      const OMPLoopDirective &D)>
12827         SizeEmitter) {
12828   llvm_unreachable("Not supported in SIMD-only mode");
12829 }
12830 
12831 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12832   llvm_unreachable("Not supported in SIMD-only mode");
12833 }
12834 
12835 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12836   llvm_unreachable("Not supported in SIMD-only mode");
12837 }
12838 
12839 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12840   return false;
12841 }
12842 
12843 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12844                                         const OMPExecutableDirective &D,
12845                                         SourceLocation Loc,
12846                                         llvm::Function *OutlinedFn,
12847                                         ArrayRef<llvm::Value *> CapturedVars) {
12848   llvm_unreachable("Not supported in SIMD-only mode");
12849 }
12850 
12851 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12852                                              const Expr *NumTeams,
12853                                              const Expr *ThreadLimit,
12854                                              SourceLocation Loc) {
12855   llvm_unreachable("Not supported in SIMD-only mode");
12856 }
12857 
12858 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12859     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12860     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12861   llvm_unreachable("Not supported in SIMD-only mode");
12862 }
12863 
12864 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12865     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12866     const Expr *Device) {
12867   llvm_unreachable("Not supported in SIMD-only mode");
12868 }
12869 
12870 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12871                                            const OMPLoopDirective &D,
12872                                            ArrayRef<Expr *> NumIterations) {
12873   llvm_unreachable("Not supported in SIMD-only mode");
12874 }
12875 
12876 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12877                                               const OMPDependClause *C) {
12878   llvm_unreachable("Not supported in SIMD-only mode");
12879 }
12880 
12881 const VarDecl *
12882 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12883                                         const VarDecl *NativeParam) const {
12884   llvm_unreachable("Not supported in SIMD-only mode");
12885 }
12886 
12887 Address
12888 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12889                                          const VarDecl *NativeParam,
12890                                          const VarDecl *TargetParam) const {
12891   llvm_unreachable("Not supported in SIMD-only mode");
12892 }
12893