1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI,
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI,
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
753   // Check whether we've reached the end.
754   llvm::Value *Done =
755       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
756   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
757   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
758 
759   // Done.
760   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
761 }
762 
763 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
764   return CGF.EmitOMPSharedLValue(E);
765 }
766 
767 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
768                                             const Expr *E) {
769   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
770     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
771   return LValue();
772 }
773 
774 void ReductionCodeGen::emitAggregateInitialization(
775     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
776     const OMPDeclareReductionDecl *DRD) {
777   // Emit VarDecl with copy init for arrays.
778   // Get the address of the original variable captured in current
779   // captured region.
780   const auto *PrivateVD =
781       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
782   bool EmitDeclareReductionInit =
783       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
784   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
785                        EmitDeclareReductionInit,
786                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
787                                                 : PrivateVD->getInit(),
788                        DRD, SharedLVal.getAddress(CGF));
789 }
790 
791 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
792                                    ArrayRef<const Expr *> Origs,
793                                    ArrayRef<const Expr *> Privates,
794                                    ArrayRef<const Expr *> ReductionOps) {
795   ClausesData.reserve(Shareds.size());
796   SharedAddresses.reserve(Shareds.size());
797   Sizes.reserve(Shareds.size());
798   BaseDecls.reserve(Shareds.size());
799   const auto *IOrig = Origs.begin();
800   const auto *IPriv = Privates.begin();
801   const auto *IRed = ReductionOps.begin();
802   for (const Expr *Ref : Shareds) {
803     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
804     std::advance(IOrig, 1);
805     std::advance(IPriv, 1);
806     std::advance(IRed, 1);
807   }
808 }
809 
810 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
811   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
812          "Number of generated lvalues must be exactly N.");
813   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
814   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
815   SharedAddresses.emplace_back(First, Second);
816   if (ClausesData[N].Shared == ClausesData[N].Ref) {
817     OrigAddresses.emplace_back(First, Second);
818   } else {
819     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
820     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
821     OrigAddresses.emplace_back(First, Second);
822   }
823 }
824 
825 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
826   const auto *PrivateVD =
827       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
828   QualType PrivateType = PrivateVD->getType();
829   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
830   if (!PrivateType->isVariablyModifiedType()) {
831     Sizes.emplace_back(
832         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
833         nullptr);
834     return;
835   }
836   llvm::Value *Size;
837   llvm::Value *SizeInChars;
838   auto *ElemType =
839       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
840           ->getElementType();
841   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842   if (AsArraySection) {
843     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
844                                      OrigAddresses[N].first.getPointer(CGF));
845     Size = CGF.Builder.CreateNUWAdd(
846         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
847     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
848   } else {
849     SizeInChars =
850         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
851     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
852   }
853   Sizes.emplace_back(SizeInChars, Size);
854   CodeGenFunction::OpaqueValueMapping OpaqueMap(
855       CGF,
856       cast<OpaqueValueExpr>(
857           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
858       RValue::get(Size));
859   CGF.EmitVariablyModifiedType(PrivateType);
860 }
861 
862 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
863                                          llvm::Value *Size) {
864   const auto *PrivateVD =
865       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
866   QualType PrivateType = PrivateVD->getType();
867   if (!PrivateType->isVariablyModifiedType()) {
868     assert(!Size && !Sizes[N].second &&
869            "Size should be nullptr for non-variably modified reduction "
870            "items.");
871     return;
872   }
873   CodeGenFunction::OpaqueValueMapping OpaqueMap(
874       CGF,
875       cast<OpaqueValueExpr>(
876           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
877       RValue::get(Size));
878   CGF.EmitVariablyModifiedType(PrivateType);
879 }
880 
881 void ReductionCodeGen::emitInitialization(
882     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
883     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
884   assert(SharedAddresses.size() > N && "No variable was generated");
885   const auto *PrivateVD =
886       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
887   const OMPDeclareReductionDecl *DRD =
888       getReductionInit(ClausesData[N].ReductionOp);
889   QualType PrivateType = PrivateVD->getType();
890   PrivateAddr = CGF.Builder.CreateElementBitCast(
891       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
892   QualType SharedType = SharedAddresses[N].first.getType();
893   SharedLVal = CGF.MakeAddrLValue(
894       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
895                                        CGF.ConvertTypeForMem(SharedType)),
896       SharedType, SharedAddresses[N].first.getBaseInfo(),
897       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
898   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
899     if (DRD && DRD->getInitializer())
900       (void)DefaultInit(CGF);
901     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
902   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
903     (void)DefaultInit(CGF);
904     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
905                                      PrivateAddr, SharedLVal.getAddress(CGF),
906                                      SharedLVal.getType());
907   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
908              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
909     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
910                          PrivateVD->getType().getQualifiers(),
911                          /*IsInitializer=*/false);
912   }
913 }
914 
915 bool ReductionCodeGen::needCleanups(unsigned N) {
916   const auto *PrivateVD =
917       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
918   QualType PrivateType = PrivateVD->getType();
919   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
920   return DTorKind != QualType::DK_none;
921 }
922 
923 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
924                                     Address PrivateAddr) {
925   const auto *PrivateVD =
926       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
927   QualType PrivateType = PrivateVD->getType();
928   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
929   if (needCleanups(N)) {
930     PrivateAddr = CGF.Builder.CreateElementBitCast(
931         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
932     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
933   }
934 }
935 
936 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
937                           LValue BaseLV) {
938   BaseTy = BaseTy.getNonReferenceType();
939   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
940          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
941     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
942       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
943     } else {
944       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
945       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
946     }
947     BaseTy = BaseTy->getPointeeType();
948   }
949   return CGF.MakeAddrLValue(
950       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
951                                        CGF.ConvertTypeForMem(ElTy)),
952       BaseLV.getType(), BaseLV.getBaseInfo(),
953       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
954 }
955 
956 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
957                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
958                           llvm::Value *Addr) {
959   Address Tmp = Address::invalid();
960   Address TopTmp = Address::invalid();
961   Address MostTopTmp = Address::invalid();
962   BaseTy = BaseTy.getNonReferenceType();
963   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
964          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
965     Tmp = CGF.CreateMemTemp(BaseTy);
966     if (TopTmp.isValid())
967       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
968     else
969       MostTopTmp = Tmp;
970     TopTmp = Tmp;
971     BaseTy = BaseTy->getPointeeType();
972   }
973   llvm::Type *Ty = BaseLVType;
974   if (Tmp.isValid())
975     Ty = Tmp.getElementType();
976   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
977   if (Tmp.isValid()) {
978     CGF.Builder.CreateStore(Addr, Tmp);
979     return MostTopTmp;
980   }
981   return Address(Addr, BaseLVAlignment);
982 }
983 
984 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
985   const VarDecl *OrigVD = nullptr;
986   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
987     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
988     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
989       Base = TempOASE->getBase()->IgnoreParenImpCasts();
990     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
991       Base = TempASE->getBase()->IgnoreParenImpCasts();
992     DE = cast<DeclRefExpr>(Base);
993     OrigVD = cast<VarDecl>(DE->getDecl());
994   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
995     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
996     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
997       Base = TempASE->getBase()->IgnoreParenImpCasts();
998     DE = cast<DeclRefExpr>(Base);
999     OrigVD = cast<VarDecl>(DE->getDecl());
1000   }
1001   return OrigVD;
1002 }
1003 
1004 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1005                                                Address PrivateAddr) {
1006   const DeclRefExpr *DE;
1007   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1008     BaseDecls.emplace_back(OrigVD);
1009     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1010     LValue BaseLValue =
1011         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1012                     OriginalBaseLValue);
1013     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1014         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1015     llvm::Value *PrivatePointer =
1016         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1017             PrivateAddr.getPointer(),
1018             SharedAddresses[N].first.getAddress(CGF).getType());
1019     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1020     return castToBase(CGF, OrigVD->getType(),
1021                       SharedAddresses[N].first.getType(),
1022                       OriginalBaseLValue.getAddress(CGF).getType(),
1023                       OriginalBaseLValue.getAlignment(), Ptr);
1024   }
1025   BaseDecls.emplace_back(
1026       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1027   return PrivateAddr;
1028 }
1029 
1030 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1031   const OMPDeclareReductionDecl *DRD =
1032       getReductionInit(ClausesData[N].ReductionOp);
1033   return DRD && DRD->getInitializer();
1034 }
1035 
1036 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1037   return CGF.EmitLoadOfPointerLValue(
1038       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1039       getThreadIDVariable()->getType()->castAs<PointerType>());
1040 }
1041 
1042 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1043   if (!CGF.HaveInsertPoint())
1044     return;
1045   // 1.2.2 OpenMP Language Terminology
1046   // Structured block - An executable statement with a single entry at the
1047   // top and a single exit at the bottom.
1048   // The point of exit cannot be a branch out of the structured block.
1049   // longjmp() and throw() must not violate the entry/exit criteria.
1050   CGF.EHStack.pushTerminate();
1051   if (S)
1052     CGF.incrementProfileCounter(S);
1053   CodeGen(CGF);
1054   CGF.EHStack.popTerminate();
1055 }
1056 
1057 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1058     CodeGenFunction &CGF) {
1059   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1060                             getThreadIDVariable()->getType(),
1061                             AlignmentSource::Decl);
1062 }
1063 
1064 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1065                                        QualType FieldTy) {
1066   auto *Field = FieldDecl::Create(
1067       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1068       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1069       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1070   Field->setAccess(AS_public);
1071   DC->addDecl(Field);
1072   return Field;
1073 }
1074 
1075 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1076                                  StringRef Separator)
1077     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1078       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1079   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1080 
1081   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1082   OMPBuilder.initialize();
1083   loadOffloadInfoMetadata();
1084 }
1085 
1086 void CGOpenMPRuntime::clear() {
1087   InternalVars.clear();
1088   // Clean non-target variable declarations possibly used only in debug info.
1089   for (const auto &Data : EmittedNonTargetVariables) {
1090     if (!Data.getValue().pointsToAliveValue())
1091       continue;
1092     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1093     if (!GV)
1094       continue;
1095     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1096       continue;
1097     GV->eraseFromParent();
1098   }
1099 }
1100 
1101 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1102   SmallString<128> Buffer;
1103   llvm::raw_svector_ostream OS(Buffer);
1104   StringRef Sep = FirstSeparator;
1105   for (StringRef Part : Parts) {
1106     OS << Sep << Part;
1107     Sep = Separator;
1108   }
1109   return std::string(OS.str());
1110 }
1111 
1112 static llvm::Function *
1113 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1114                           const Expr *CombinerInitializer, const VarDecl *In,
1115                           const VarDecl *Out, bool IsCombiner) {
1116   // void .omp_combiner.(Ty *in, Ty *out);
1117   ASTContext &C = CGM.getContext();
1118   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1119   FunctionArgList Args;
1120   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1121                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1122   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1123                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124   Args.push_back(&OmpOutParm);
1125   Args.push_back(&OmpInParm);
1126   const CGFunctionInfo &FnInfo =
1127       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1128   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1129   std::string Name = CGM.getOpenMPRuntime().getName(
1130       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1131   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1132                                     Name, &CGM.getModule());
1133   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1134   if (CGM.getLangOpts().Optimize) {
1135     Fn->removeFnAttr(llvm::Attribute::NoInline);
1136     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1137     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1138   }
1139   CodeGenFunction CGF(CGM);
1140   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1141   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1142   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1143                     Out->getLocation());
1144   CodeGenFunction::OMPPrivateScope Scope(CGF);
1145   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1146   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1147     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1148         .getAddress(CGF);
1149   });
1150   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1151   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   (void)Scope.Privatize();
1156   if (!IsCombiner && Out->hasInit() &&
1157       !CGF.isTrivialInitializer(Out->getInit())) {
1158     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1159                          Out->getType().getQualifiers(),
1160                          /*IsInitializer=*/true);
1161   }
1162   if (CombinerInitializer)
1163     CGF.EmitIgnoredExpr(CombinerInitializer);
1164   Scope.ForceCleanup();
1165   CGF.FinishFunction();
1166   return Fn;
1167 }
1168 
1169 void CGOpenMPRuntime::emitUserDefinedReduction(
1170     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1171   if (UDRMap.count(D) > 0)
1172     return;
1173   llvm::Function *Combiner = emitCombinerOrInitializer(
1174       CGM, D->getType(), D->getCombiner(),
1175       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1176       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1177       /*IsCombiner=*/true);
1178   llvm::Function *Initializer = nullptr;
1179   if (const Expr *Init = D->getInitializer()) {
1180     Initializer = emitCombinerOrInitializer(
1181         CGM, D->getType(),
1182         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1183                                                                      : nullptr,
1184         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1185         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1186         /*IsCombiner=*/false);
1187   }
1188   UDRMap.try_emplace(D, Combiner, Initializer);
1189   if (CGF) {
1190     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1191     Decls.second.push_back(D);
1192   }
1193 }
1194 
1195 std::pair<llvm::Function *, llvm::Function *>
1196 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1197   auto I = UDRMap.find(D);
1198   if (I != UDRMap.end())
1199     return I->second;
1200   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1201   return UDRMap.lookup(D);
1202 }
1203 
1204 namespace {
1205 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1206 // Builder if one is present.
1207 struct PushAndPopStackRAII {
1208   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1209                       bool HasCancel, llvm::omp::Directive Kind)
1210       : OMPBuilder(OMPBuilder) {
1211     if (!OMPBuilder)
1212       return;
1213 
1214     // The following callback is the crucial part of clangs cleanup process.
1215     //
1216     // NOTE:
1217     // Once the OpenMPIRBuilder is used to create parallel regions (and
1218     // similar), the cancellation destination (Dest below) is determined via
1219     // IP. That means if we have variables to finalize we split the block at IP,
1220     // use the new block (=BB) as destination to build a JumpDest (via
1221     // getJumpDestInCurrentScope(BB)) which then is fed to
1222     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1223     // to push & pop an FinalizationInfo object.
1224     // The FiniCB will still be needed but at the point where the
1225     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1226     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1227       assert(IP.getBlock()->end() == IP.getPoint() &&
1228              "Clang CG should cause non-terminated block!");
1229       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1230       CGF.Builder.restoreIP(IP);
1231       CodeGenFunction::JumpDest Dest =
1232           CGF.getOMPCancelDestination(OMPD_parallel);
1233       CGF.EmitBranchThroughCleanup(Dest);
1234     };
1235 
1236     // TODO: Remove this once we emit parallel regions through the
1237     //       OpenMPIRBuilder as it can do this setup internally.
1238     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1239     OMPBuilder->pushFinalizationCB(std::move(FI));
1240   }
1241   ~PushAndPopStackRAII() {
1242     if (OMPBuilder)
1243       OMPBuilder->popFinalizationCB();
1244   }
1245   llvm::OpenMPIRBuilder *OMPBuilder;
1246 };
1247 } // namespace
1248 
1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1250     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1251     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1252     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1253   assert(ThreadIDVar->getType()->isPointerType() &&
1254          "thread id variable must be of type kmp_int32 *");
1255   CodeGenFunction CGF(CGM, true);
1256   bool HasCancel = false;
1257   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1258     HasCancel = OPD->hasCancel();
1259   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1260     HasCancel = OPD->hasCancel();
1261   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1262     HasCancel = OPSD->hasCancel();
1263   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1264     HasCancel = OPFD->hasCancel();
1265   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1266     HasCancel = OPFD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275 
1276   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1277   //       parallel region to make cancellation barriers work properly.
1278   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1279   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1280   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1281                                     HasCancel, OutlinedHelperName);
1282   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1283   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1284 }
1285 
1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1287     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1289   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1290   return emitParallelOrTeamsOutlinedFunction(
1291       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1292 }
1293 
1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1295     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1297   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1298   return emitParallelOrTeamsOutlinedFunction(
1299       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1300 }
1301 
1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1303     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1304     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1305     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1306     bool Tied, unsigned &NumberOfParts) {
1307   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1308                                               PrePostActionTy &) {
1309     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1310     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1311     llvm::Value *TaskArgs[] = {
1312         UpLoc, ThreadID,
1313         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1314                                     TaskTVar->getType()->castAs<PointerType>())
1315             .getPointer(CGF)};
1316     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1317                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1318                         TaskArgs);
1319   };
1320   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1321                                                             UntiedCodeGen);
1322   CodeGen.setAction(Action);
1323   assert(!ThreadIDVar->getType()->isPointerType() &&
1324          "thread id variable must be of type kmp_int32 for tasks");
1325   const OpenMPDirectiveKind Region =
1326       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1327                                                       : OMPD_task;
1328   const CapturedStmt *CS = D.getCapturedStmt(Region);
1329   bool HasCancel = false;
1330   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1333     HasCancel = TD->hasCancel();
1334   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338 
1339   CodeGenFunction CGF(CGM, true);
1340   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1341                                         InnermostKind, HasCancel, Action);
1342   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1343   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1344   if (!Tied)
1345     NumberOfParts = Action.getNumberOfParts();
1346   return Res;
1347 }
1348 
1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1350                              const RecordDecl *RD, const CGRecordLayout &RL,
1351                              ArrayRef<llvm::Constant *> Data) {
1352   llvm::StructType *StructTy = RL.getLLVMType();
1353   unsigned PrevIdx = 0;
1354   ConstantInitBuilder CIBuilder(CGM);
1355   auto DI = Data.begin();
1356   for (const FieldDecl *FD : RD->fields()) {
1357     unsigned Idx = RL.getLLVMFieldNo(FD);
1358     // Fill the alignment.
1359     for (unsigned I = PrevIdx; I < Idx; ++I)
1360       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1361     PrevIdx = Idx + 1;
1362     Fields.add(*DI);
1363     ++DI;
1364   }
1365 }
1366 
1367 template <class... As>
1368 static llvm::GlobalVariable *
1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1370                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1371                    As &&... Args) {
1372   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1373   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1374   ConstantInitBuilder CIBuilder(CGM);
1375   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1376   buildStructValue(Fields, CGM, RD, RL, Data);
1377   return Fields.finishAndCreateGlobal(
1378       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1379       std::forward<As>(Args)...);
1380 }
1381 
1382 template <typename T>
1383 static void
1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1385                                          ArrayRef<llvm::Constant *> Data,
1386                                          T &Parent) {
1387   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1388   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1389   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1390   buildStructValue(Fields, CGM, RD, RL, Data);
1391   Fields.finishAndAddTo(Parent);
1392 }
1393 
1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1395                                              bool AtCurrentPoint) {
1396   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1397   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1398 
1399   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1400   if (AtCurrentPoint) {
1401     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1402         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1403   } else {
1404     Elem.second.ServiceInsertPt =
1405         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1406     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1407   }
1408 }
1409 
1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1411   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1412   if (Elem.second.ServiceInsertPt) {
1413     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1414     Elem.second.ServiceInsertPt = nullptr;
1415     Ptr->eraseFromParent();
1416   }
1417 }
1418 
1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1420                                                   SourceLocation Loc,
1421                                                   SmallString<128> &Buffer) {
1422   llvm::raw_svector_ostream OS(Buffer);
1423   // Build debug location
1424   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1425   OS << ";" << PLoc.getFilename() << ";";
1426   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1427     OS << FD->getQualifiedNameAsString();
1428   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1429   return OS.str();
1430 }
1431 
1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1433                                                  SourceLocation Loc,
1434                                                  unsigned Flags) {
1435   llvm::Constant *SrcLocStr;
1436   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1437       Loc.isInvalid()) {
1438     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1439   } else {
1440     std::string FunctionName = "";
1441     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1442       FunctionName = FD->getQualifiedNameAsString();
1443     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1444     const char *FileName = PLoc.getFilename();
1445     unsigned Line = PLoc.getLine();
1446     unsigned Column = PLoc.getColumn();
1447     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1448                                                 Line, Column);
1449   }
1450   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1451   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1452                                      Reserved2Flags);
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1456                                           SourceLocation Loc) {
1457   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1458   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1459   // the clang invariants used below might be broken.
1460   if (CGM.getLangOpts().OpenMPIRBuilder) {
1461     SmallString<128> Buffer;
1462     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1463     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1465     return OMPBuilder.getOrCreateThreadID(
1466         OMPBuilder.getOrCreateIdent(SrcLocStr));
1467   }
1468 
1469   llvm::Value *ThreadID = nullptr;
1470   // Check whether we've already cached a load of the thread id in this
1471   // function.
1472   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473   if (I != OpenMPLocThreadIDMap.end()) {
1474     ThreadID = I->second.ThreadID;
1475     if (ThreadID != nullptr)
1476       return ThreadID;
1477   }
1478   // If exceptions are enabled, do not use parameter to avoid possible crash.
1479   if (auto *OMPRegionInfo =
1480           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481     if (OMPRegionInfo->getThreadIDVariable()) {
1482       // Check if this an outlined function with thread id passed as argument.
1483       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486           !CGF.getLangOpts().CXXExceptions ||
1487           CGF.Builder.GetInsertBlock() == TopBlock ||
1488           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490               TopBlock ||
1491           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492               CGF.Builder.GetInsertBlock()) {
1493         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494         // If value loaded in entry block, cache it and use it everywhere in
1495         // function.
1496         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498           Elem.second.ThreadID = ThreadID;
1499         }
1500         return ThreadID;
1501       }
1502     }
1503   }
1504 
1505   // This is not an outlined function region - need to call __kmpc_int32
1506   // kmpc_global_thread_num(ident_t *loc).
1507   // Generate thread id value and cache this value for use across the
1508   // function.
1509   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510   if (!Elem.second.ServiceInsertPt)
1511     setLocThreadIdInsertPt(CGF);
1512   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514   llvm::CallInst *Call = CGF.Builder.CreateCall(
1515       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516                                             OMPRTL___kmpc_global_thread_num),
1517       emitUpdateLocation(CGF, Loc));
1518   Call->setCallingConv(CGF.getRuntimeCC());
1519   Elem.second.ThreadID = Call;
1520   return Call;
1521 }
1522 
1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526     clearLocThreadIdInsertPt(CGF);
1527     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528   }
1529   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531       UDRMap.erase(D);
1532     FunctionUDRMap.erase(CGF.CurFn);
1533   }
1534   auto I = FunctionUDMMap.find(CGF.CurFn);
1535   if (I != FunctionUDMMap.end()) {
1536     for(const auto *D : I->second)
1537       UDMMap.erase(D);
1538     FunctionUDMMap.erase(I);
1539   }
1540   LastprivateConditionalToTypes.erase(CGF.CurFn);
1541   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542 }
1543 
1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545   return OMPBuilder.IdentPtr;
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549   if (!Kmpc_MicroTy) {
1550     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554   }
1555   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556 }
1557 
1558 llvm::FunctionCallee
1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1563                                             : "__kmpc_for_static_init_4u")
1564                                 : (IVSigned ? "__kmpc_for_static_init_8"
1565                                             : "__kmpc_for_static_init_8u");
1566   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1567   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1568   llvm::Type *TypeParams[] = {
1569     getIdentTyPointerTy(),                     // loc
1570     CGM.Int32Ty,                               // tid
1571     CGM.Int32Ty,                               // schedtype
1572     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1573     PtrTy,                                     // p_lower
1574     PtrTy,                                     // p_upper
1575     PtrTy,                                     // p_stride
1576     ITy,                                       // incr
1577     ITy                                        // chunk
1578   };
1579   auto *FnTy =
1580       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1581   return CGM.CreateRuntimeFunction(FnTy, Name);
1582 }
1583 
1584 llvm::FunctionCallee
1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1586   assert((IVSize == 32 || IVSize == 64) &&
1587          "IV size is not compatible with the omp runtime");
1588   StringRef Name =
1589       IVSize == 32
1590           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1591           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1592   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1593   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1594                                CGM.Int32Ty,           // tid
1595                                CGM.Int32Ty,           // schedtype
1596                                ITy,                   // lower
1597                                ITy,                   // upper
1598                                ITy,                   // stride
1599                                ITy                    // chunk
1600   };
1601   auto *FnTy =
1602       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1603   return CGM.CreateRuntimeFunction(FnTy, Name);
1604 }
1605 
1606 llvm::FunctionCallee
1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1608   assert((IVSize == 32 || IVSize == 64) &&
1609          "IV size is not compatible with the omp runtime");
1610   StringRef Name =
1611       IVSize == 32
1612           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1613           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1614   llvm::Type *TypeParams[] = {
1615       getIdentTyPointerTy(), // loc
1616       CGM.Int32Ty,           // tid
1617   };
1618   auto *FnTy =
1619       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1620   return CGM.CreateRuntimeFunction(FnTy, Name);
1621 }
1622 
1623 llvm::FunctionCallee
1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1625   assert((IVSize == 32 || IVSize == 64) &&
1626          "IV size is not compatible with the omp runtime");
1627   StringRef Name =
1628       IVSize == 32
1629           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1630           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1631   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1632   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1633   llvm::Type *TypeParams[] = {
1634     getIdentTyPointerTy(),                     // loc
1635     CGM.Int32Ty,                               // tid
1636     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1637     PtrTy,                                     // p_lower
1638     PtrTy,                                     // p_upper
1639     PtrTy                                      // p_stride
1640   };
1641   auto *FnTy =
1642       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1643   return CGM.CreateRuntimeFunction(FnTy, Name);
1644 }
1645 
1646 /// Obtain information that uniquely identifies a target entry. This
1647 /// consists of the file and device IDs as well as line number associated with
1648 /// the relevant entry source location.
1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1650                                      unsigned &DeviceID, unsigned &FileID,
1651                                      unsigned &LineNum) {
1652   SourceManager &SM = C.getSourceManager();
1653 
1654   // The loc should be always valid and have a file ID (the user cannot use
1655   // #pragma directives in macros)
1656 
1657   assert(Loc.isValid() && "Source location is expected to be always valid.");
1658 
1659   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1660   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1661 
1662   llvm::sys::fs::UniqueID ID;
1663   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1664     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1665     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1666     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1667       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1668           << PLoc.getFilename() << EC.message();
1669   }
1670 
1671   DeviceID = ID.getDevice();
1672   FileID = ID.getFile();
1673   LineNum = PLoc.getLine();
1674 }
1675 
1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1677   if (CGM.getLangOpts().OpenMPSimd)
1678     return Address::invalid();
1679   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1680       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1681   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1682               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1683                HasRequiresUnifiedSharedMemory))) {
1684     SmallString<64> PtrName;
1685     {
1686       llvm::raw_svector_ostream OS(PtrName);
1687       OS << CGM.getMangledName(GlobalDecl(VD));
1688       if (!VD->isExternallyVisible()) {
1689         unsigned DeviceID, FileID, Line;
1690         getTargetEntryUniqueInfo(CGM.getContext(),
1691                                  VD->getCanonicalDecl()->getBeginLoc(),
1692                                  DeviceID, FileID, Line);
1693         OS << llvm::format("_%x", FileID);
1694       }
1695       OS << "_decl_tgt_ref_ptr";
1696     }
1697     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1698     if (!Ptr) {
1699       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1700       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1701                                         PtrName);
1702 
1703       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1704       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1705 
1706       if (!CGM.getLangOpts().OpenMPIsDevice)
1707         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1708       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1709     }
1710     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1711   }
1712   return Address::invalid();
1713 }
1714 
1715 llvm::Constant *
1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1717   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1718          !CGM.getContext().getTargetInfo().isTLSSupported());
1719   // Lookup the entry, lazily creating it if necessary.
1720   std::string Suffix = getName({"cache", ""});
1721   return getOrCreateInternalVariable(
1722       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1723 }
1724 
1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1726                                                 const VarDecl *VD,
1727                                                 Address VDAddr,
1728                                                 SourceLocation Loc) {
1729   if (CGM.getLangOpts().OpenMPUseTLS &&
1730       CGM.getContext().getTargetInfo().isTLSSupported())
1731     return VDAddr;
1732 
1733   llvm::Type *VarTy = VDAddr.getElementType();
1734   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1735                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1736                                                        CGM.Int8PtrTy),
1737                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1738                          getOrCreateThreadPrivateCache(VD)};
1739   return Address(CGF.EmitRuntimeCall(
1740                      OMPBuilder.getOrCreateRuntimeFunction(
1741                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1742                      Args),
1743                  VDAddr.getAlignment());
1744 }
1745 
1746 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1747     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1748     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1749   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1750   // library.
1751   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1752   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1753                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1754                       OMPLoc);
1755   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1756   // to register constructor/destructor for variable.
1757   llvm::Value *Args[] = {
1758       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1759       Ctor, CopyCtor, Dtor};
1760   CGF.EmitRuntimeCall(
1761       OMPBuilder.getOrCreateRuntimeFunction(
1762           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1763       Args);
1764 }
1765 
1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1767     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1768     bool PerformInit, CodeGenFunction *CGF) {
1769   if (CGM.getLangOpts().OpenMPUseTLS &&
1770       CGM.getContext().getTargetInfo().isTLSSupported())
1771     return nullptr;
1772 
1773   VD = VD->getDefinition(CGM.getContext());
1774   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1775     QualType ASTTy = VD->getType();
1776 
1777     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1778     const Expr *Init = VD->getAnyInitializer();
1779     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1780       // Generate function that re-emits the declaration's initializer into the
1781       // threadprivate copy of the variable VD
1782       CodeGenFunction CtorCGF(CGM);
1783       FunctionArgList Args;
1784       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1785                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1786                             ImplicitParamDecl::Other);
1787       Args.push_back(&Dst);
1788 
1789       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1790           CGM.getContext().VoidPtrTy, Args);
1791       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1792       std::string Name = getName({"__kmpc_global_ctor_", ""});
1793       llvm::Function *Fn =
1794           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1795       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1796                             Args, Loc, Loc);
1797       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1798           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1799           CGM.getContext().VoidPtrTy, Dst.getLocation());
1800       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1801       Arg = CtorCGF.Builder.CreateElementBitCast(
1802           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1803       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1804                                /*IsInitializer=*/true);
1805       ArgVal = CtorCGF.EmitLoadOfScalar(
1806           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807           CGM.getContext().VoidPtrTy, Dst.getLocation());
1808       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1809       CtorCGF.FinishFunction();
1810       Ctor = Fn;
1811     }
1812     if (VD->getType().isDestructedType() != QualType::DK_none) {
1813       // Generate function that emits destructor call for the threadprivate copy
1814       // of the variable VD
1815       CodeGenFunction DtorCGF(CGM);
1816       FunctionArgList Args;
1817       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1818                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1819                             ImplicitParamDecl::Other);
1820       Args.push_back(&Dst);
1821 
1822       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1823           CGM.getContext().VoidTy, Args);
1824       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1825       std::string Name = getName({"__kmpc_global_dtor_", ""});
1826       llvm::Function *Fn =
1827           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1828       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1829       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1830                             Loc, Loc);
1831       // Create a scope with an artificial location for the body of this function.
1832       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1833       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1834           DtorCGF.GetAddrOfLocalVar(&Dst),
1835           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1836       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1837                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1838                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1839       DtorCGF.FinishFunction();
1840       Dtor = Fn;
1841     }
1842     // Do not emit init function if it is not required.
1843     if (!Ctor && !Dtor)
1844       return nullptr;
1845 
1846     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1847     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1848                                                /*isVarArg=*/false)
1849                            ->getPointerTo();
1850     // Copying constructor for the threadprivate variable.
1851     // Must be NULL - reserved by runtime, but currently it requires that this
1852     // parameter is always NULL. Otherwise it fires assertion.
1853     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1854     if (Ctor == nullptr) {
1855       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1856                                              /*isVarArg=*/false)
1857                          ->getPointerTo();
1858       Ctor = llvm::Constant::getNullValue(CtorTy);
1859     }
1860     if (Dtor == nullptr) {
1861       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                              /*isVarArg=*/false)
1863                          ->getPointerTo();
1864       Dtor = llvm::Constant::getNullValue(DtorTy);
1865     }
1866     if (!CGF) {
1867       auto *InitFunctionTy =
1868           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1869       std::string Name = getName({"__omp_threadprivate_init_", ""});
1870       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1871           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1872       CodeGenFunction InitCGF(CGM);
1873       FunctionArgList ArgList;
1874       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1875                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1876                             Loc, Loc);
1877       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878       InitCGF.FinishFunction();
1879       return InitFunction;
1880     }
1881     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882   }
1883   return nullptr;
1884 }
1885 
1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1887                                                      llvm::GlobalVariable *Addr,
1888                                                      bool PerformInit) {
1889   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1890       !CGM.getLangOpts().OpenMPIsDevice)
1891     return false;
1892   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1893       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1894   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1895       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1896        HasRequiresUnifiedSharedMemory))
1897     return CGM.getLangOpts().OpenMPIsDevice;
1898   VD = VD->getDefinition(CGM.getContext());
1899   assert(VD && "Unknown VarDecl");
1900 
1901   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1902     return CGM.getLangOpts().OpenMPIsDevice;
1903 
1904   QualType ASTTy = VD->getType();
1905   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1906 
1907   // Produce the unique prefix to identify the new target regions. We use
1908   // the source location of the variable declaration which we know to not
1909   // conflict with any target region.
1910   unsigned DeviceID;
1911   unsigned FileID;
1912   unsigned Line;
1913   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1914   SmallString<128> Buffer, Out;
1915   {
1916     llvm::raw_svector_ostream OS(Buffer);
1917     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1918        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1919   }
1920 
1921   const Expr *Init = VD->getAnyInitializer();
1922   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1923     llvm::Constant *Ctor;
1924     llvm::Constant *ID;
1925     if (CGM.getLangOpts().OpenMPIsDevice) {
1926       // Generate function that re-emits the declaration's initializer into
1927       // the threadprivate copy of the variable VD
1928       CodeGenFunction CtorCGF(CGM);
1929 
1930       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1931       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1932       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1933           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1934       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1935       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1938       CtorCGF.EmitAnyExprToMem(Init,
1939                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1940                                Init->getType().getQualifiers(),
1941                                /*IsInitializer=*/true);
1942       CtorCGF.FinishFunction();
1943       Ctor = Fn;
1944       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1945       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1946     } else {
1947       Ctor = new llvm::GlobalVariable(
1948           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1949           llvm::GlobalValue::PrivateLinkage,
1950           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1951       ID = Ctor;
1952     }
1953 
1954     // Register the information for the entry associated with the constructor.
1955     Out.clear();
1956     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1957         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1958         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1959   }
1960   if (VD->getType().isDestructedType() != QualType::DK_none) {
1961     llvm::Constant *Dtor;
1962     llvm::Constant *ID;
1963     if (CGM.getLangOpts().OpenMPIsDevice) {
1964       // Generate function that emits destructor call for the threadprivate
1965       // copy of the variable VD
1966       CodeGenFunction DtorCGF(CGM);
1967 
1968       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1969       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1970       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1971           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1972       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1973       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1974                             FunctionArgList(), Loc, Loc);
1975       // Create a scope with an artificial location for the body of this
1976       // function.
1977       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1978       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1979                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1980                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1981       DtorCGF.FinishFunction();
1982       Dtor = Fn;
1983       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1984       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1985     } else {
1986       Dtor = new llvm::GlobalVariable(
1987           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1988           llvm::GlobalValue::PrivateLinkage,
1989           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1990       ID = Dtor;
1991     }
1992     // Register the information for the entry associated with the destructor.
1993     Out.clear();
1994     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1995         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1996         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1997   }
1998   return CGM.getLangOpts().OpenMPIsDevice;
1999 }
2000 
2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2002                                                           QualType VarType,
2003                                                           StringRef Name) {
2004   std::string Suffix = getName({"artificial", ""});
2005   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2006   llvm::Value *GAddr =
2007       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2008   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2009       CGM.getTarget().isTLSSupported()) {
2010     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2011     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2012   }
2013   std::string CacheSuffix = getName({"cache", ""});
2014   llvm::Value *Args[] = {
2015       emitUpdateLocation(CGF, SourceLocation()),
2016       getThreadID(CGF, SourceLocation()),
2017       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2018       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2019                                 /*isSigned=*/false),
2020       getOrCreateInternalVariable(
2021           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2022   return Address(
2023       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2024           CGF.EmitRuntimeCall(
2025               OMPBuilder.getOrCreateRuntimeFunction(
2026                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2027               Args),
2028           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2029       CGM.getContext().getTypeAlignInChars(VarType));
2030 }
2031 
2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2033                                    const RegionCodeGenTy &ThenGen,
2034                                    const RegionCodeGenTy &ElseGen) {
2035   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2036 
2037   // If the condition constant folds and can be elided, try to avoid emitting
2038   // the condition and the dead arm of the if/else.
2039   bool CondConstant;
2040   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2041     if (CondConstant)
2042       ThenGen(CGF);
2043     else
2044       ElseGen(CGF);
2045     return;
2046   }
2047 
2048   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2049   // emit the conditional branch.
2050   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2051   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2052   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2053   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2054 
2055   // Emit the 'then' code.
2056   CGF.EmitBlock(ThenBlock);
2057   ThenGen(CGF);
2058   CGF.EmitBranch(ContBlock);
2059   // Emit the 'else' code if present.
2060   // There is no need to emit line number for unconditional branch.
2061   (void)ApplyDebugLocation::CreateEmpty(CGF);
2062   CGF.EmitBlock(ElseBlock);
2063   ElseGen(CGF);
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBranch(ContBlock);
2067   // Emit the continuation block for code after the if.
2068   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2069 }
2070 
2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2072                                        llvm::Function *OutlinedFn,
2073                                        ArrayRef<llvm::Value *> CapturedVars,
2074                                        const Expr *IfCond) {
2075   if (!CGF.HaveInsertPoint())
2076     return;
2077   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2078   auto &M = CGM.getModule();
2079   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2082     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2083     llvm::Value *Args[] = {
2084         RTLoc,
2085         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2086         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2087     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2088     RealArgs.append(std::begin(Args), std::end(Args));
2089     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2090 
2091     llvm::FunctionCallee RTLFn =
2092         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2093     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2094   };
2095   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2096                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2097     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2098     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2099     // Build calls:
2100     // __kmpc_serialized_parallel(&Loc, GTid);
2101     llvm::Value *Args[] = {RTLoc, ThreadID};
2102     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2103                             M, OMPRTL___kmpc_serialized_parallel),
2104                         Args);
2105 
2106     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2107     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2108     Address ZeroAddrBound =
2109         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2110                                          /*Name=*/".bound.zero.addr");
2111     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2112     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2113     // ThreadId for serialized parallels is 0.
2114     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2115     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2116     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2117 
2118     // Ensure we do not inline the function. This is trivially true for the ones
2119     // passed to __kmpc_fork_call but the ones calles in serialized regions
2120     // could be inlined. This is not a perfect but it is closer to the invariant
2121     // we want, namely, every data environment starts with a new function.
2122     // TODO: We should pass the if condition to the runtime function and do the
2123     //       handling there. Much cleaner code.
2124     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2125     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2126 
2127     // __kmpc_end_serialized_parallel(&Loc, GTid);
2128     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2129     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2130                             M, OMPRTL___kmpc_end_serialized_parallel),
2131                         EndArgs);
2132   };
2133   if (IfCond) {
2134     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2135   } else {
2136     RegionCodeGenTy ThenRCG(ThenGen);
2137     ThenRCG(CGF);
2138   }
2139 }
2140 
2141 // If we're inside an (outlined) parallel region, use the region info's
2142 // thread-ID variable (it is passed in a first argument of the outlined function
2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2144 // regular serial code region, get thread ID by calling kmp_int32
2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2146 // return the address of that temp.
2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2148                                              SourceLocation Loc) {
2149   if (auto *OMPRegionInfo =
2150           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2151     if (OMPRegionInfo->getThreadIDVariable())
2152       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2153 
2154   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2155   QualType Int32Ty =
2156       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2157   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2158   CGF.EmitStoreOfScalar(ThreadID,
2159                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2160 
2161   return ThreadIDTemp;
2162 }
2163 
2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2165     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2166   SmallString<256> Buffer;
2167   llvm::raw_svector_ostream Out(Buffer);
2168   Out << Name;
2169   StringRef RuntimeName = Out.str();
2170   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2171   if (Elem.second) {
2172     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2173            "OMP internal variable has different type than requested");
2174     return &*Elem.second;
2175   }
2176 
2177   return Elem.second = new llvm::GlobalVariable(
2178              CGM.getModule(), Ty, /*IsConstant*/ false,
2179              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2180              Elem.first(), /*InsertBefore=*/nullptr,
2181              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2182 }
2183 
2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2185   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2186   std::string Name = getName({Prefix, "var"});
2187   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2188 }
2189 
2190 namespace {
2191 /// Common pre(post)-action for different OpenMP constructs.
2192 class CommonActionTy final : public PrePostActionTy {
2193   llvm::FunctionCallee EnterCallee;
2194   ArrayRef<llvm::Value *> EnterArgs;
2195   llvm::FunctionCallee ExitCallee;
2196   ArrayRef<llvm::Value *> ExitArgs;
2197   bool Conditional;
2198   llvm::BasicBlock *ContBlock = nullptr;
2199 
2200 public:
2201   CommonActionTy(llvm::FunctionCallee EnterCallee,
2202                  ArrayRef<llvm::Value *> EnterArgs,
2203                  llvm::FunctionCallee ExitCallee,
2204                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2205       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2206         ExitArgs(ExitArgs), Conditional(Conditional) {}
2207   void Enter(CodeGenFunction &CGF) override {
2208     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2209     if (Conditional) {
2210       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2211       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2212       ContBlock = CGF.createBasicBlock("omp_if.end");
2213       // Generate the branch (If-stmt)
2214       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2215       CGF.EmitBlock(ThenBlock);
2216     }
2217   }
2218   void Done(CodeGenFunction &CGF) {
2219     // Emit the rest of blocks/branches
2220     CGF.EmitBranch(ContBlock);
2221     CGF.EmitBlock(ContBlock, true);
2222   }
2223   void Exit(CodeGenFunction &CGF) override {
2224     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2225   }
2226 };
2227 } // anonymous namespace
2228 
2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2230                                          StringRef CriticalName,
2231                                          const RegionCodeGenTy &CriticalOpGen,
2232                                          SourceLocation Loc, const Expr *Hint) {
2233   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2234   // CriticalOpGen();
2235   // __kmpc_end_critical(ident_t *, gtid, Lock);
2236   // Prepare arguments and build a call to __kmpc_critical
2237   if (!CGF.HaveInsertPoint())
2238     return;
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2240                          getCriticalRegionLock(CriticalName)};
2241   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2242                                                 std::end(Args));
2243   if (Hint) {
2244     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2245         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2246   }
2247   CommonActionTy Action(
2248       OMPBuilder.getOrCreateRuntimeFunction(
2249           CGM.getModule(),
2250           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2251       EnterArgs,
2252       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2253                                             OMPRTL___kmpc_end_critical),
2254       Args);
2255   CriticalOpGen.setAction(Action);
2256   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2257 }
2258 
2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2260                                        const RegionCodeGenTy &MasterOpGen,
2261                                        SourceLocation Loc) {
2262   if (!CGF.HaveInsertPoint())
2263     return;
2264   // if(__kmpc_master(ident_t *, gtid)) {
2265   //   MasterOpGen();
2266   //   __kmpc_end_master(ident_t *, gtid);
2267   // }
2268   // Prepare arguments and build a call to __kmpc_master
2269   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2270   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2271                             CGM.getModule(), OMPRTL___kmpc_master),
2272                         Args,
2273                         OMPBuilder.getOrCreateRuntimeFunction(
2274                             CGM.getModule(), OMPRTL___kmpc_end_master),
2275                         Args,
2276                         /*Conditional=*/true);
2277   MasterOpGen.setAction(Action);
2278   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2279   Action.Done(CGF);
2280 }
2281 
2282 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2283                                        const RegionCodeGenTy &MaskedOpGen,
2284                                        SourceLocation Loc, const Expr *Filter) {
2285   if (!CGF.HaveInsertPoint())
2286     return;
2287   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2288   //   MaskedOpGen();
2289   //   __kmpc_end_masked(iden_t *, gtid);
2290   // }
2291   // Prepare arguments and build a call to __kmpc_masked
2292   llvm::Value *FilterVal = Filter
2293                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2294                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2295   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2296                          FilterVal};
2297   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2298                             getThreadID(CGF, Loc)};
2299   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300                             CGM.getModule(), OMPRTL___kmpc_masked),
2301                         Args,
2302                         OMPBuilder.getOrCreateRuntimeFunction(
2303                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2304                         ArgsEnd,
2305                         /*Conditional=*/true);
2306   MaskedOpGen.setAction(Action);
2307   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2308   Action.Done(CGF);
2309 }
2310 
2311 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2312                                         SourceLocation Loc) {
2313   if (!CGF.HaveInsertPoint())
2314     return;
2315   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2316     OMPBuilder.createTaskyield(CGF.Builder);
2317   } else {
2318     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2319     llvm::Value *Args[] = {
2320         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2321         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2322     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2323                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2324                         Args);
2325   }
2326 
2327   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2328     Region->emitUntiedSwitch(CGF);
2329 }
2330 
2331 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2332                                           const RegionCodeGenTy &TaskgroupOpGen,
2333                                           SourceLocation Loc) {
2334   if (!CGF.HaveInsertPoint())
2335     return;
2336   // __kmpc_taskgroup(ident_t *, gtid);
2337   // TaskgroupOpGen();
2338   // __kmpc_end_taskgroup(ident_t *, gtid);
2339   // Prepare arguments and build a call to __kmpc_taskgroup
2340   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2341   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2342                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2343                         Args,
2344                         OMPBuilder.getOrCreateRuntimeFunction(
2345                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2346                         Args);
2347   TaskgroupOpGen.setAction(Action);
2348   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2349 }
2350 
2351 /// Given an array of pointers to variables, project the address of a
2352 /// given variable.
2353 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2354                                       unsigned Index, const VarDecl *Var) {
2355   // Pull out the pointer to the variable.
2356   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2357   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2358 
2359   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2360   Addr = CGF.Builder.CreateElementBitCast(
2361       Addr, CGF.ConvertTypeForMem(Var->getType()));
2362   return Addr;
2363 }
2364 
2365 static llvm::Value *emitCopyprivateCopyFunction(
2366     CodeGenModule &CGM, llvm::Type *ArgsType,
2367     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2368     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2369     SourceLocation Loc) {
2370   ASTContext &C = CGM.getContext();
2371   // void copy_func(void *LHSArg, void *RHSArg);
2372   FunctionArgList Args;
2373   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2374                            ImplicitParamDecl::Other);
2375   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2376                            ImplicitParamDecl::Other);
2377   Args.push_back(&LHSArg);
2378   Args.push_back(&RHSArg);
2379   const auto &CGFI =
2380       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2381   std::string Name =
2382       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2383   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2384                                     llvm::GlobalValue::InternalLinkage, Name,
2385                                     &CGM.getModule());
2386   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2387   Fn->setDoesNotRecurse();
2388   CodeGenFunction CGF(CGM);
2389   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2390   // Dest = (void*[n])(LHSArg);
2391   // Src = (void*[n])(RHSArg);
2392   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2393       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2394       ArgsType), CGF.getPointerAlign());
2395   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2396       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2397       ArgsType), CGF.getPointerAlign());
2398   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2399   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2400   // ...
2401   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2402   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2403     const auto *DestVar =
2404         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2405     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2406 
2407     const auto *SrcVar =
2408         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2409     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2410 
2411     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2412     QualType Type = VD->getType();
2413     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2414   }
2415   CGF.FinishFunction();
2416   return Fn;
2417 }
2418 
2419 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2420                                        const RegionCodeGenTy &SingleOpGen,
2421                                        SourceLocation Loc,
2422                                        ArrayRef<const Expr *> CopyprivateVars,
2423                                        ArrayRef<const Expr *> SrcExprs,
2424                                        ArrayRef<const Expr *> DstExprs,
2425                                        ArrayRef<const Expr *> AssignmentOps) {
2426   if (!CGF.HaveInsertPoint())
2427     return;
2428   assert(CopyprivateVars.size() == SrcExprs.size() &&
2429          CopyprivateVars.size() == DstExprs.size() &&
2430          CopyprivateVars.size() == AssignmentOps.size());
2431   ASTContext &C = CGM.getContext();
2432   // int32 did_it = 0;
2433   // if(__kmpc_single(ident_t *, gtid)) {
2434   //   SingleOpGen();
2435   //   __kmpc_end_single(ident_t *, gtid);
2436   //   did_it = 1;
2437   // }
2438   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439   // <copy_func>, did_it);
2440 
2441   Address DidIt = Address::invalid();
2442   if (!CopyprivateVars.empty()) {
2443     // int32 did_it = 0;
2444     QualType KmpInt32Ty =
2445         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2446     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2447     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2448   }
2449   // Prepare arguments and build a call to __kmpc_single
2450   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2451   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2452                             CGM.getModule(), OMPRTL___kmpc_single),
2453                         Args,
2454                         OMPBuilder.getOrCreateRuntimeFunction(
2455                             CGM.getModule(), OMPRTL___kmpc_end_single),
2456                         Args,
2457                         /*Conditional=*/true);
2458   SingleOpGen.setAction(Action);
2459   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2460   if (DidIt.isValid()) {
2461     // did_it = 1;
2462     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2463   }
2464   Action.Done(CGF);
2465   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2466   // <copy_func>, did_it);
2467   if (DidIt.isValid()) {
2468     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2469     QualType CopyprivateArrayTy = C.getConstantArrayType(
2470         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2471         /*IndexTypeQuals=*/0);
2472     // Create a list of all private variables for copyprivate.
2473     Address CopyprivateList =
2474         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2475     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2476       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2477       CGF.Builder.CreateStore(
2478           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2479               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2480               CGF.VoidPtrTy),
2481           Elem);
2482     }
2483     // Build function that copies private values from single region to all other
2484     // threads in the corresponding parallel region.
2485     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2486         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2487         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2488     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2489     Address CL =
2490       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2491                                                       CGF.VoidPtrTy);
2492     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2493     llvm::Value *Args[] = {
2494         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2495         getThreadID(CGF, Loc),        // i32 <gtid>
2496         BufSize,                      // size_t <buf_size>
2497         CL.getPointer(),              // void *<copyprivate list>
2498         CpyFn,                        // void (*) (void *, void *) <copy_func>
2499         DidItVal                      // i32 did_it
2500     };
2501     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2502                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2503                         Args);
2504   }
2505 }
2506 
2507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2508                                         const RegionCodeGenTy &OrderedOpGen,
2509                                         SourceLocation Loc, bool IsThreads) {
2510   if (!CGF.HaveInsertPoint())
2511     return;
2512   // __kmpc_ordered(ident_t *, gtid);
2513   // OrderedOpGen();
2514   // __kmpc_end_ordered(ident_t *, gtid);
2515   // Prepare arguments and build a call to __kmpc_ordered
2516   if (IsThreads) {
2517     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2518     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2519                               CGM.getModule(), OMPRTL___kmpc_ordered),
2520                           Args,
2521                           OMPBuilder.getOrCreateRuntimeFunction(
2522                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2523                           Args);
2524     OrderedOpGen.setAction(Action);
2525     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2526     return;
2527   }
2528   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2529 }
2530 
2531 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2532   unsigned Flags;
2533   if (Kind == OMPD_for)
2534     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2535   else if (Kind == OMPD_sections)
2536     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2537   else if (Kind == OMPD_single)
2538     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2539   else if (Kind == OMPD_barrier)
2540     Flags = OMP_IDENT_BARRIER_EXPL;
2541   else
2542     Flags = OMP_IDENT_BARRIER_IMPL;
2543   return Flags;
2544 }
2545 
2546 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2547     CodeGenFunction &CGF, const OMPLoopDirective &S,
2548     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2549   // Check if the loop directive is actually a doacross loop directive. In this
2550   // case choose static, 1 schedule.
2551   if (llvm::any_of(
2552           S.getClausesOfKind<OMPOrderedClause>(),
2553           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2554     ScheduleKind = OMPC_SCHEDULE_static;
2555     // Chunk size is 1 in this case.
2556     llvm::APInt ChunkSize(32, 1);
2557     ChunkExpr = IntegerLiteral::Create(
2558         CGF.getContext(), ChunkSize,
2559         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2560         SourceLocation());
2561   }
2562 }
2563 
2564 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2565                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2566                                       bool ForceSimpleCall) {
2567   // Check if we should use the OMPBuilder
2568   auto *OMPRegionInfo =
2569       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2570   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2571     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2572         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2573     return;
2574   }
2575 
2576   if (!CGF.HaveInsertPoint())
2577     return;
2578   // Build call __kmpc_cancel_barrier(loc, thread_id);
2579   // Build call __kmpc_barrier(loc, thread_id);
2580   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2581   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2582   // thread_id);
2583   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2584                          getThreadID(CGF, Loc)};
2585   if (OMPRegionInfo) {
2586     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2587       llvm::Value *Result = CGF.EmitRuntimeCall(
2588           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2589                                                 OMPRTL___kmpc_cancel_barrier),
2590           Args);
2591       if (EmitChecks) {
2592         // if (__kmpc_cancel_barrier()) {
2593         //   exit from construct;
2594         // }
2595         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2596         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2597         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2598         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2599         CGF.EmitBlock(ExitBB);
2600         //   exit from construct;
2601         CodeGenFunction::JumpDest CancelDestination =
2602             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2603         CGF.EmitBranchThroughCleanup(CancelDestination);
2604         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2605       }
2606       return;
2607     }
2608   }
2609   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2610                           CGM.getModule(), OMPRTL___kmpc_barrier),
2611                       Args);
2612 }
2613 
2614 /// Map the OpenMP loop schedule to the runtime enumeration.
2615 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2616                                           bool Chunked, bool Ordered) {
2617   switch (ScheduleKind) {
2618   case OMPC_SCHEDULE_static:
2619     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2620                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2621   case OMPC_SCHEDULE_dynamic:
2622     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2623   case OMPC_SCHEDULE_guided:
2624     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2625   case OMPC_SCHEDULE_runtime:
2626     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2627   case OMPC_SCHEDULE_auto:
2628     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2629   case OMPC_SCHEDULE_unknown:
2630     assert(!Chunked && "chunk was specified but schedule kind not known");
2631     return Ordered ? OMP_ord_static : OMP_sch_static;
2632   }
2633   llvm_unreachable("Unexpected runtime schedule");
2634 }
2635 
2636 /// Map the OpenMP distribute schedule to the runtime enumeration.
2637 static OpenMPSchedType
2638 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2639   // only static is allowed for dist_schedule
2640   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2641 }
2642 
2643 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2644                                          bool Chunked) const {
2645   OpenMPSchedType Schedule =
2646       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2647   return Schedule == OMP_sch_static;
2648 }
2649 
2650 bool CGOpenMPRuntime::isStaticNonchunked(
2651     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2652   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2653   return Schedule == OMP_dist_sch_static;
2654 }
2655 
2656 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2657                                       bool Chunked) const {
2658   OpenMPSchedType Schedule =
2659       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2660   return Schedule == OMP_sch_static_chunked;
2661 }
2662 
2663 bool CGOpenMPRuntime::isStaticChunked(
2664     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2665   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2666   return Schedule == OMP_dist_sch_static_chunked;
2667 }
2668 
2669 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2672   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2673   return Schedule != OMP_sch_static;
2674 }
2675 
2676 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2677                                   OpenMPScheduleClauseModifier M1,
2678                                   OpenMPScheduleClauseModifier M2) {
2679   int Modifier = 0;
2680   switch (M1) {
2681   case OMPC_SCHEDULE_MODIFIER_monotonic:
2682     Modifier = OMP_sch_modifier_monotonic;
2683     break;
2684   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2685     Modifier = OMP_sch_modifier_nonmonotonic;
2686     break;
2687   case OMPC_SCHEDULE_MODIFIER_simd:
2688     if (Schedule == OMP_sch_static_chunked)
2689       Schedule = OMP_sch_static_balanced_chunked;
2690     break;
2691   case OMPC_SCHEDULE_MODIFIER_last:
2692   case OMPC_SCHEDULE_MODIFIER_unknown:
2693     break;
2694   }
2695   switch (M2) {
2696   case OMPC_SCHEDULE_MODIFIER_monotonic:
2697     Modifier = OMP_sch_modifier_monotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2700     Modifier = OMP_sch_modifier_nonmonotonic;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_simd:
2703     if (Schedule == OMP_sch_static_chunked)
2704       Schedule = OMP_sch_static_balanced_chunked;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_last:
2707   case OMPC_SCHEDULE_MODIFIER_unknown:
2708     break;
2709   }
2710   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2711   // If the static schedule kind is specified or if the ordered clause is
2712   // specified, and if the nonmonotonic modifier is not specified, the effect is
2713   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2714   // modifier is specified, the effect is as if the nonmonotonic modifier is
2715   // specified.
2716   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2717     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2718           Schedule == OMP_sch_static_balanced_chunked ||
2719           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2720           Schedule == OMP_dist_sch_static_chunked ||
2721           Schedule == OMP_dist_sch_static))
2722       Modifier = OMP_sch_modifier_nonmonotonic;
2723   }
2724   return Schedule | Modifier;
2725 }
2726 
2727 void CGOpenMPRuntime::emitForDispatchInit(
2728     CodeGenFunction &CGF, SourceLocation Loc,
2729     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2730     bool Ordered, const DispatchRTInput &DispatchValues) {
2731   if (!CGF.HaveInsertPoint())
2732     return;
2733   OpenMPSchedType Schedule = getRuntimeSchedule(
2734       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2735   assert(Ordered ||
2736          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2737           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2738           Schedule != OMP_sch_static_balanced_chunked));
2739   // Call __kmpc_dispatch_init(
2740   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2741   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2742   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2743 
2744   // If the Chunk was not specified in the clause - use default value 1.
2745   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2746                                             : CGF.Builder.getIntN(IVSize, 1);
2747   llvm::Value *Args[] = {
2748       emitUpdateLocation(CGF, Loc),
2749       getThreadID(CGF, Loc),
2750       CGF.Builder.getInt32(addMonoNonMonoModifier(
2751           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2752       DispatchValues.LB,                                     // Lower
2753       DispatchValues.UB,                                     // Upper
2754       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2755       Chunk                                                  // Chunk
2756   };
2757   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2758 }
2759 
2760 static void emitForStaticInitCall(
2761     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2762     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2763     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2764     const CGOpenMPRuntime::StaticRTInput &Values) {
2765   if (!CGF.HaveInsertPoint())
2766     return;
2767 
2768   assert(!Values.Ordered);
2769   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2770          Schedule == OMP_sch_static_balanced_chunked ||
2771          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2772          Schedule == OMP_dist_sch_static ||
2773          Schedule == OMP_dist_sch_static_chunked);
2774 
2775   // Call __kmpc_for_static_init(
2776   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2777   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2778   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2779   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2780   llvm::Value *Chunk = Values.Chunk;
2781   if (Chunk == nullptr) {
2782     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2783             Schedule == OMP_dist_sch_static) &&
2784            "expected static non-chunked schedule");
2785     // If the Chunk was not specified in the clause - use default value 1.
2786     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2787   } else {
2788     assert((Schedule == OMP_sch_static_chunked ||
2789             Schedule == OMP_sch_static_balanced_chunked ||
2790             Schedule == OMP_ord_static_chunked ||
2791             Schedule == OMP_dist_sch_static_chunked) &&
2792            "expected static chunked schedule");
2793   }
2794   llvm::Value *Args[] = {
2795       UpdateLocation,
2796       ThreadId,
2797       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2798                                                   M2)), // Schedule type
2799       Values.IL.getPointer(),                           // &isLastIter
2800       Values.LB.getPointer(),                           // &LB
2801       Values.UB.getPointer(),                           // &UB
2802       Values.ST.getPointer(),                           // &Stride
2803       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2804       Chunk                                             // Chunk
2805   };
2806   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2807 }
2808 
2809 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2810                                         SourceLocation Loc,
2811                                         OpenMPDirectiveKind DKind,
2812                                         const OpenMPScheduleTy &ScheduleKind,
2813                                         const StaticRTInput &Values) {
2814   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2815       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2816   assert(isOpenMPWorksharingDirective(DKind) &&
2817          "Expected loop-based or sections-based directive.");
2818   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2819                                              isOpenMPLoopDirective(DKind)
2820                                                  ? OMP_IDENT_WORK_LOOP
2821                                                  : OMP_IDENT_WORK_SECTIONS);
2822   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2823   llvm::FunctionCallee StaticInitFunction =
2824       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2825   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2826   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2827                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2828 }
2829 
2830 void CGOpenMPRuntime::emitDistributeStaticInit(
2831     CodeGenFunction &CGF, SourceLocation Loc,
2832     OpenMPDistScheduleClauseKind SchedKind,
2833     const CGOpenMPRuntime::StaticRTInput &Values) {
2834   OpenMPSchedType ScheduleNum =
2835       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2836   llvm::Value *UpdatedLocation =
2837       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2838   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2839   llvm::FunctionCallee StaticInitFunction =
2840       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2841   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2842                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2843                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2844 }
2845 
2846 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2847                                           SourceLocation Loc,
2848                                           OpenMPDirectiveKind DKind) {
2849   if (!CGF.HaveInsertPoint())
2850     return;
2851   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2852   llvm::Value *Args[] = {
2853       emitUpdateLocation(CGF, Loc,
2854                          isOpenMPDistributeDirective(DKind)
2855                              ? OMP_IDENT_WORK_DISTRIBUTE
2856                              : isOpenMPLoopDirective(DKind)
2857                                    ? OMP_IDENT_WORK_LOOP
2858                                    : OMP_IDENT_WORK_SECTIONS),
2859       getThreadID(CGF, Loc)};
2860   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2861   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2862                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2863                       Args);
2864 }
2865 
2866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2867                                                  SourceLocation Loc,
2868                                                  unsigned IVSize,
2869                                                  bool IVSigned) {
2870   if (!CGF.HaveInsertPoint())
2871     return;
2872   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2873   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2874   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2875 }
2876 
2877 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2878                                           SourceLocation Loc, unsigned IVSize,
2879                                           bool IVSigned, Address IL,
2880                                           Address LB, Address UB,
2881                                           Address ST) {
2882   // Call __kmpc_dispatch_next(
2883   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2884   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2885   //          kmp_int[32|64] *p_stride);
2886   llvm::Value *Args[] = {
2887       emitUpdateLocation(CGF, Loc),
2888       getThreadID(CGF, Loc),
2889       IL.getPointer(), // &isLastIter
2890       LB.getPointer(), // &Lower
2891       UB.getPointer(), // &Upper
2892       ST.getPointer()  // &Stride
2893   };
2894   llvm::Value *Call =
2895       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2896   return CGF.EmitScalarConversion(
2897       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2898       CGF.getContext().BoolTy, Loc);
2899 }
2900 
2901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2902                                            llvm::Value *NumThreads,
2903                                            SourceLocation Loc) {
2904   if (!CGF.HaveInsertPoint())
2905     return;
2906   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2907   llvm::Value *Args[] = {
2908       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2909       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2910   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2911                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2912                       Args);
2913 }
2914 
2915 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2916                                          ProcBindKind ProcBind,
2917                                          SourceLocation Loc) {
2918   if (!CGF.HaveInsertPoint())
2919     return;
2920   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2921   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2922   llvm::Value *Args[] = {
2923       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2924       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2925   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2926                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2927                       Args);
2928 }
2929 
2930 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2931                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2932   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2933     OMPBuilder.createFlush(CGF.Builder);
2934   } else {
2935     if (!CGF.HaveInsertPoint())
2936       return;
2937     // Build call void __kmpc_flush(ident_t *loc)
2938     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2939                             CGM.getModule(), OMPRTL___kmpc_flush),
2940                         emitUpdateLocation(CGF, Loc));
2941   }
2942 }
2943 
2944 namespace {
2945 /// Indexes of fields for type kmp_task_t.
2946 enum KmpTaskTFields {
2947   /// List of shared variables.
2948   KmpTaskTShareds,
2949   /// Task routine.
2950   KmpTaskTRoutine,
2951   /// Partition id for the untied tasks.
2952   KmpTaskTPartId,
2953   /// Function with call of destructors for private variables.
2954   Data1,
2955   /// Task priority.
2956   Data2,
2957   /// (Taskloops only) Lower bound.
2958   KmpTaskTLowerBound,
2959   /// (Taskloops only) Upper bound.
2960   KmpTaskTUpperBound,
2961   /// (Taskloops only) Stride.
2962   KmpTaskTStride,
2963   /// (Taskloops only) Is last iteration flag.
2964   KmpTaskTLastIter,
2965   /// (Taskloops only) Reduction data.
2966   KmpTaskTReductions,
2967 };
2968 } // anonymous namespace
2969 
2970 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2971   return OffloadEntriesTargetRegion.empty() &&
2972          OffloadEntriesDeviceGlobalVar.empty();
2973 }
2974 
2975 /// Initialize target region entry.
2976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2977     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2978                                     StringRef ParentName, unsigned LineNum,
2979                                     unsigned Order) {
2980   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2981                                              "only required for the device "
2982                                              "code generation.");
2983   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2984       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2985                                    OMPTargetRegionEntryTargetRegion);
2986   ++OffloadingEntriesNum;
2987 }
2988 
2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2990     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2991                                   StringRef ParentName, unsigned LineNum,
2992                                   llvm::Constant *Addr, llvm::Constant *ID,
2993                                   OMPTargetRegionEntryKind Flags) {
2994   // If we are emitting code for a target, the entry is already initialized,
2995   // only has to be registered.
2996   if (CGM.getLangOpts().OpenMPIsDevice) {
2997     // This could happen if the device compilation is invoked standalone.
2998     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2999       return;
3000     auto &Entry =
3001         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3002     Entry.setAddress(Addr);
3003     Entry.setID(ID);
3004     Entry.setFlags(Flags);
3005   } else {
3006     if (Flags ==
3007             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3008         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3009                                  /*IgnoreAddressId*/ true))
3010       return;
3011     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3012            "Target region entry already registered!");
3013     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3014     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3015     ++OffloadingEntriesNum;
3016   }
3017 }
3018 
3019 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3020     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3021     bool IgnoreAddressId) const {
3022   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3023   if (PerDevice == OffloadEntriesTargetRegion.end())
3024     return false;
3025   auto PerFile = PerDevice->second.find(FileID);
3026   if (PerFile == PerDevice->second.end())
3027     return false;
3028   auto PerParentName = PerFile->second.find(ParentName);
3029   if (PerParentName == PerFile->second.end())
3030     return false;
3031   auto PerLine = PerParentName->second.find(LineNum);
3032   if (PerLine == PerParentName->second.end())
3033     return false;
3034   // Fail if this entry is already registered.
3035   if (!IgnoreAddressId &&
3036       (PerLine->second.getAddress() || PerLine->second.getID()))
3037     return false;
3038   return true;
3039 }
3040 
3041 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3042     const OffloadTargetRegionEntryInfoActTy &Action) {
3043   // Scan all target region entries and perform the provided action.
3044   for (const auto &D : OffloadEntriesTargetRegion)
3045     for (const auto &F : D.second)
3046       for (const auto &P : F.second)
3047         for (const auto &L : P.second)
3048           Action(D.first, F.first, P.first(), L.first, L.second);
3049 }
3050 
3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3052     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3053                                        OMPTargetGlobalVarEntryKind Flags,
3054                                        unsigned Order) {
3055   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3056                                              "only required for the device "
3057                                              "code generation.");
3058   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3059   ++OffloadingEntriesNum;
3060 }
3061 
3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3063     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3064                                      CharUnits VarSize,
3065                                      OMPTargetGlobalVarEntryKind Flags,
3066                                      llvm::GlobalValue::LinkageTypes Linkage) {
3067   if (CGM.getLangOpts().OpenMPIsDevice) {
3068     // This could happen if the device compilation is invoked standalone.
3069     if (!hasDeviceGlobalVarEntryInfo(VarName))
3070       return;
3071     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3072     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3073       if (Entry.getVarSize().isZero()) {
3074         Entry.setVarSize(VarSize);
3075         Entry.setLinkage(Linkage);
3076       }
3077       return;
3078     }
3079     Entry.setVarSize(VarSize);
3080     Entry.setLinkage(Linkage);
3081     Entry.setAddress(Addr);
3082   } else {
3083     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3084       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3085       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3086              "Entry not initialized!");
3087       if (Entry.getVarSize().isZero()) {
3088         Entry.setVarSize(VarSize);
3089         Entry.setLinkage(Linkage);
3090       }
3091       return;
3092     }
3093     OffloadEntriesDeviceGlobalVar.try_emplace(
3094         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3095     ++OffloadingEntriesNum;
3096   }
3097 }
3098 
3099 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3100     actOnDeviceGlobalVarEntriesInfo(
3101         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3102   // Scan all target region entries and perform the provided action.
3103   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3104     Action(E.getKey(), E.getValue());
3105 }
3106 
3107 void CGOpenMPRuntime::createOffloadEntry(
3108     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3109     llvm::GlobalValue::LinkageTypes Linkage) {
3110   StringRef Name = Addr->getName();
3111   llvm::Module &M = CGM.getModule();
3112   llvm::LLVMContext &C = M.getContext();
3113 
3114   // Create constant string with the name.
3115   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3116 
3117   std::string StringName = getName({"omp_offloading", "entry_name"});
3118   auto *Str = new llvm::GlobalVariable(
3119       M, StrPtrInit->getType(), /*isConstant=*/true,
3120       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3121   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3122 
3123   llvm::Constant *Data[] = {
3124       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3125       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3126       llvm::ConstantInt::get(CGM.SizeTy, Size),
3127       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3128       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3129   std::string EntryName = getName({"omp_offloading", "entry", ""});
3130   llvm::GlobalVariable *Entry = createGlobalStruct(
3131       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3132       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3133 
3134   // The entry has to be created in the section the linker expects it to be.
3135   Entry->setSection("omp_offloading_entries");
3136 }
3137 
3138 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3139   // Emit the offloading entries and metadata so that the device codegen side
3140   // can easily figure out what to emit. The produced metadata looks like
3141   // this:
3142   //
3143   // !omp_offload.info = !{!1, ...}
3144   //
3145   // Right now we only generate metadata for function that contain target
3146   // regions.
3147 
3148   // If we are in simd mode or there are no entries, we don't need to do
3149   // anything.
3150   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3151     return;
3152 
3153   llvm::Module &M = CGM.getModule();
3154   llvm::LLVMContext &C = M.getContext();
3155   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3156                          SourceLocation, StringRef>,
3157               16>
3158       OrderedEntries(OffloadEntriesInfoManager.size());
3159   llvm::SmallVector<StringRef, 16> ParentFunctions(
3160       OffloadEntriesInfoManager.size());
3161 
3162   // Auxiliary methods to create metadata values and strings.
3163   auto &&GetMDInt = [this](unsigned V) {
3164     return llvm::ConstantAsMetadata::get(
3165         llvm::ConstantInt::get(CGM.Int32Ty, V));
3166   };
3167 
3168   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3169 
3170   // Create the offloading info metadata node.
3171   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3172 
3173   // Create function that emits metadata for each target region entry;
3174   auto &&TargetRegionMetadataEmitter =
3175       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3176        &GetMDString](
3177           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3178           unsigned Line,
3179           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3180         // Generate metadata for target regions. Each entry of this metadata
3181         // contains:
3182         // - Entry 0 -> Kind of this type of metadata (0).
3183         // - Entry 1 -> Device ID of the file where the entry was identified.
3184         // - Entry 2 -> File ID of the file where the entry was identified.
3185         // - Entry 3 -> Mangled name of the function where the entry was
3186         // identified.
3187         // - Entry 4 -> Line in the file where the entry was identified.
3188         // - Entry 5 -> Order the entry was created.
3189         // The first element of the metadata node is the kind.
3190         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3191                                  GetMDInt(FileID),      GetMDString(ParentName),
3192                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3193 
3194         SourceLocation Loc;
3195         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3196                   E = CGM.getContext().getSourceManager().fileinfo_end();
3197              I != E; ++I) {
3198           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3199               I->getFirst()->getUniqueID().getFile() == FileID) {
3200             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3201                 I->getFirst(), Line, 1);
3202             break;
3203           }
3204         }
3205         // Save this entry in the right position of the ordered entries array.
3206         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3207         ParentFunctions[E.getOrder()] = ParentName;
3208 
3209         // Add metadata to the named metadata node.
3210         MD->addOperand(llvm::MDNode::get(C, Ops));
3211       };
3212 
3213   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3214       TargetRegionMetadataEmitter);
3215 
3216   // Create function that emits metadata for each device global variable entry;
3217   auto &&DeviceGlobalVarMetadataEmitter =
3218       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3219        MD](StringRef MangledName,
3220            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3221                &E) {
3222         // Generate metadata for global variables. Each entry of this metadata
3223         // contains:
3224         // - Entry 0 -> Kind of this type of metadata (1).
3225         // - Entry 1 -> Mangled name of the variable.
3226         // - Entry 2 -> Declare target kind.
3227         // - Entry 3 -> Order the entry was created.
3228         // The first element of the metadata node is the kind.
3229         llvm::Metadata *Ops[] = {
3230             GetMDInt(E.getKind()), GetMDString(MangledName),
3231             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3232 
3233         // Save this entry in the right position of the ordered entries array.
3234         OrderedEntries[E.getOrder()] =
3235             std::make_tuple(&E, SourceLocation(), MangledName);
3236 
3237         // Add metadata to the named metadata node.
3238         MD->addOperand(llvm::MDNode::get(C, Ops));
3239       };
3240 
3241   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3242       DeviceGlobalVarMetadataEmitter);
3243 
3244   for (const auto &E : OrderedEntries) {
3245     assert(std::get<0>(E) && "All ordered entries must exist!");
3246     if (const auto *CE =
3247             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3248                 std::get<0>(E))) {
3249       if (!CE->getID() || !CE->getAddress()) {
3250         // Do not blame the entry if the parent funtion is not emitted.
3251         StringRef FnName = ParentFunctions[CE->getOrder()];
3252         if (!CGM.GetGlobalValue(FnName))
3253           continue;
3254         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3255             DiagnosticsEngine::Error,
3256             "Offloading entry for target region in %0 is incorrect: either the "
3257             "address or the ID is invalid.");
3258         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3259         continue;
3260       }
3261       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3262                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3263     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3264                                              OffloadEntryInfoDeviceGlobalVar>(
3265                    std::get<0>(E))) {
3266       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3267           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3268               CE->getFlags());
3269       switch (Flags) {
3270       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3271         if (CGM.getLangOpts().OpenMPIsDevice &&
3272             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3273           continue;
3274         if (!CE->getAddress()) {
3275           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3276               DiagnosticsEngine::Error, "Offloading entry for declare target "
3277                                         "variable %0 is incorrect: the "
3278                                         "address is invalid.");
3279           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3280           continue;
3281         }
3282         // The vaiable has no definition - no need to add the entry.
3283         if (CE->getVarSize().isZero())
3284           continue;
3285         break;
3286       }
3287       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3288         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3289                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3290                "Declaret target link address is set.");
3291         if (CGM.getLangOpts().OpenMPIsDevice)
3292           continue;
3293         if (!CE->getAddress()) {
3294           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3295               DiagnosticsEngine::Error,
3296               "Offloading entry for declare target variable is incorrect: the "
3297               "address is invalid.");
3298           CGM.getDiags().Report(DiagID);
3299           continue;
3300         }
3301         break;
3302       }
3303       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3304                          CE->getVarSize().getQuantity(), Flags,
3305                          CE->getLinkage());
3306     } else {
3307       llvm_unreachable("Unsupported entry kind.");
3308     }
3309   }
3310 }
3311 
3312 /// Loads all the offload entries information from the host IR
3313 /// metadata.
3314 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3315   // If we are in target mode, load the metadata from the host IR. This code has
3316   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3317 
3318   if (!CGM.getLangOpts().OpenMPIsDevice)
3319     return;
3320 
3321   if (CGM.getLangOpts().OMPHostIRFile.empty())
3322     return;
3323 
3324   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3325   if (auto EC = Buf.getError()) {
3326     CGM.getDiags().Report(diag::err_cannot_open_file)
3327         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3328     return;
3329   }
3330 
3331   llvm::LLVMContext C;
3332   auto ME = expectedToErrorOrAndEmitErrors(
3333       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3334 
3335   if (auto EC = ME.getError()) {
3336     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3337         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3338     CGM.getDiags().Report(DiagID)
3339         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3340     return;
3341   }
3342 
3343   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3344   if (!MD)
3345     return;
3346 
3347   for (llvm::MDNode *MN : MD->operands()) {
3348     auto &&GetMDInt = [MN](unsigned Idx) {
3349       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3350       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3351     };
3352 
3353     auto &&GetMDString = [MN](unsigned Idx) {
3354       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3355       return V->getString();
3356     };
3357 
3358     switch (GetMDInt(0)) {
3359     default:
3360       llvm_unreachable("Unexpected metadata!");
3361       break;
3362     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3363         OffloadingEntryInfoTargetRegion:
3364       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3365           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3366           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3367           /*Order=*/GetMDInt(5));
3368       break;
3369     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3370         OffloadingEntryInfoDeviceGlobalVar:
3371       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3372           /*MangledName=*/GetMDString(1),
3373           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3374               /*Flags=*/GetMDInt(2)),
3375           /*Order=*/GetMDInt(3));
3376       break;
3377     }
3378   }
3379 }
3380 
3381 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3382   if (!KmpRoutineEntryPtrTy) {
3383     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3384     ASTContext &C = CGM.getContext();
3385     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3386     FunctionProtoType::ExtProtoInfo EPI;
3387     KmpRoutineEntryPtrQTy = C.getPointerType(
3388         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3389     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3390   }
3391 }
3392 
3393 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3394   // Make sure the type of the entry is already created. This is the type we
3395   // have to create:
3396   // struct __tgt_offload_entry{
3397   //   void      *addr;       // Pointer to the offload entry info.
3398   //                          // (function or global)
3399   //   char      *name;       // Name of the function or global.
3400   //   size_t     size;       // Size of the entry info (0 if it a function).
3401   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3402   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3403   // };
3404   if (TgtOffloadEntryQTy.isNull()) {
3405     ASTContext &C = CGM.getContext();
3406     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3407     RD->startDefinition();
3408     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3409     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3410     addFieldToRecordDecl(C, RD, C.getSizeType());
3411     addFieldToRecordDecl(
3412         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3413     addFieldToRecordDecl(
3414         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3415     RD->completeDefinition();
3416     RD->addAttr(PackedAttr::CreateImplicit(C));
3417     TgtOffloadEntryQTy = C.getRecordType(RD);
3418   }
3419   return TgtOffloadEntryQTy;
3420 }
3421 
3422 namespace {
3423 struct PrivateHelpersTy {
3424   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3425                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3426       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3427         PrivateElemInit(PrivateElemInit) {}
3428   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3429   const Expr *OriginalRef = nullptr;
3430   const VarDecl *Original = nullptr;
3431   const VarDecl *PrivateCopy = nullptr;
3432   const VarDecl *PrivateElemInit = nullptr;
3433   bool isLocalPrivate() const {
3434     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3435   }
3436 };
3437 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3438 } // anonymous namespace
3439 
3440 static bool isAllocatableDecl(const VarDecl *VD) {
3441   const VarDecl *CVD = VD->getCanonicalDecl();
3442   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3443     return false;
3444   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3445   // Use the default allocation.
3446   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3447             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3448            !AA->getAllocator());
3449 }
3450 
3451 static RecordDecl *
3452 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3453   if (!Privates.empty()) {
3454     ASTContext &C = CGM.getContext();
3455     // Build struct .kmp_privates_t. {
3456     //         /*  private vars  */
3457     //       };
3458     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3459     RD->startDefinition();
3460     for (const auto &Pair : Privates) {
3461       const VarDecl *VD = Pair.second.Original;
3462       QualType Type = VD->getType().getNonReferenceType();
3463       // If the private variable is a local variable with lvalue ref type,
3464       // allocate the pointer instead of the pointee type.
3465       if (Pair.second.isLocalPrivate()) {
3466         if (VD->getType()->isLValueReferenceType())
3467           Type = C.getPointerType(Type);
3468         if (isAllocatableDecl(VD))
3469           Type = C.getPointerType(Type);
3470       }
3471       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3472       if (VD->hasAttrs()) {
3473         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3474              E(VD->getAttrs().end());
3475              I != E; ++I)
3476           FD->addAttr(*I);
3477       }
3478     }
3479     RD->completeDefinition();
3480     return RD;
3481   }
3482   return nullptr;
3483 }
3484 
3485 static RecordDecl *
3486 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3487                          QualType KmpInt32Ty,
3488                          QualType KmpRoutineEntryPointerQTy) {
3489   ASTContext &C = CGM.getContext();
3490   // Build struct kmp_task_t {
3491   //         void *              shareds;
3492   //         kmp_routine_entry_t routine;
3493   //         kmp_int32           part_id;
3494   //         kmp_cmplrdata_t data1;
3495   //         kmp_cmplrdata_t data2;
3496   // For taskloops additional fields:
3497   //         kmp_uint64          lb;
3498   //         kmp_uint64          ub;
3499   //         kmp_int64           st;
3500   //         kmp_int32           liter;
3501   //         void *              reductions;
3502   //       };
3503   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3504   UD->startDefinition();
3505   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3506   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3507   UD->completeDefinition();
3508   QualType KmpCmplrdataTy = C.getRecordType(UD);
3509   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3510   RD->startDefinition();
3511   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3512   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3513   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3514   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3515   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3516   if (isOpenMPTaskLoopDirective(Kind)) {
3517     QualType KmpUInt64Ty =
3518         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3519     QualType KmpInt64Ty =
3520         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3521     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3522     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3523     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3524     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3525     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3526   }
3527   RD->completeDefinition();
3528   return RD;
3529 }
3530 
3531 static RecordDecl *
3532 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3533                                      ArrayRef<PrivateDataTy> Privates) {
3534   ASTContext &C = CGM.getContext();
3535   // Build struct kmp_task_t_with_privates {
3536   //         kmp_task_t task_data;
3537   //         .kmp_privates_t. privates;
3538   //       };
3539   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3540   RD->startDefinition();
3541   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3542   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3543     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3544   RD->completeDefinition();
3545   return RD;
3546 }
3547 
3548 /// Emit a proxy function which accepts kmp_task_t as the second
3549 /// argument.
3550 /// \code
3551 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3552 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3553 ///   For taskloops:
3554 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3555 ///   tt->reductions, tt->shareds);
3556 ///   return 0;
3557 /// }
3558 /// \endcode
3559 static llvm::Function *
3560 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3561                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3562                       QualType KmpTaskTWithPrivatesPtrQTy,
3563                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3564                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3565                       llvm::Value *TaskPrivatesMap) {
3566   ASTContext &C = CGM.getContext();
3567   FunctionArgList Args;
3568   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3569                             ImplicitParamDecl::Other);
3570   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3571                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3572                                 ImplicitParamDecl::Other);
3573   Args.push_back(&GtidArg);
3574   Args.push_back(&TaskTypeArg);
3575   const auto &TaskEntryFnInfo =
3576       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3577   llvm::FunctionType *TaskEntryTy =
3578       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3579   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3580   auto *TaskEntry = llvm::Function::Create(
3581       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3582   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3583   TaskEntry->setDoesNotRecurse();
3584   CodeGenFunction CGF(CGM);
3585   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3586                     Loc, Loc);
3587 
3588   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3589   // tt,
3590   // For taskloops:
3591   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3592   // tt->task_data.shareds);
3593   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3594       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3595   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3596       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3597       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3598   const auto *KmpTaskTWithPrivatesQTyRD =
3599       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3600   LValue Base =
3601       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3602   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3603   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3604   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3605   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3606 
3607   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3608   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3609   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3610       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3611       CGF.ConvertTypeForMem(SharedsPtrTy));
3612 
3613   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3614   llvm::Value *PrivatesParam;
3615   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3616     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3617     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3618         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3619   } else {
3620     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3621   }
3622 
3623   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3624                                TaskPrivatesMap,
3625                                CGF.Builder
3626                                    .CreatePointerBitCastOrAddrSpaceCast(
3627                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3628                                    .getPointer()};
3629   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3630                                           std::end(CommonArgs));
3631   if (isOpenMPTaskLoopDirective(Kind)) {
3632     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3633     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3634     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3635     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3636     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3637     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3638     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3639     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3640     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3641     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3642     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3643     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3644     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3645     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3646     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3647     CallArgs.push_back(LBParam);
3648     CallArgs.push_back(UBParam);
3649     CallArgs.push_back(StParam);
3650     CallArgs.push_back(LIParam);
3651     CallArgs.push_back(RParam);
3652   }
3653   CallArgs.push_back(SharedsParam);
3654 
3655   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3656                                                   CallArgs);
3657   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3658                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3659   CGF.FinishFunction();
3660   return TaskEntry;
3661 }
3662 
3663 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3664                                             SourceLocation Loc,
3665                                             QualType KmpInt32Ty,
3666                                             QualType KmpTaskTWithPrivatesPtrQTy,
3667                                             QualType KmpTaskTWithPrivatesQTy) {
3668   ASTContext &C = CGM.getContext();
3669   FunctionArgList Args;
3670   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3671                             ImplicitParamDecl::Other);
3672   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3673                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3674                                 ImplicitParamDecl::Other);
3675   Args.push_back(&GtidArg);
3676   Args.push_back(&TaskTypeArg);
3677   const auto &DestructorFnInfo =
3678       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3679   llvm::FunctionType *DestructorFnTy =
3680       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3681   std::string Name =
3682       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3683   auto *DestructorFn =
3684       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3685                              Name, &CGM.getModule());
3686   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3687                                     DestructorFnInfo);
3688   DestructorFn->setDoesNotRecurse();
3689   CodeGenFunction CGF(CGM);
3690   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3691                     Args, Loc, Loc);
3692 
3693   LValue Base = CGF.EmitLoadOfPointerLValue(
3694       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3695       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3696   const auto *KmpTaskTWithPrivatesQTyRD =
3697       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3698   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3699   Base = CGF.EmitLValueForField(Base, *FI);
3700   for (const auto *Field :
3701        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3702     if (QualType::DestructionKind DtorKind =
3703             Field->getType().isDestructedType()) {
3704       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3705       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3706     }
3707   }
3708   CGF.FinishFunction();
3709   return DestructorFn;
3710 }
3711 
3712 /// Emit a privates mapping function for correct handling of private and
3713 /// firstprivate variables.
3714 /// \code
3715 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3716 /// **noalias priv1,...,  <tyn> **noalias privn) {
3717 ///   *priv1 = &.privates.priv1;
3718 ///   ...;
3719 ///   *privn = &.privates.privn;
3720 /// }
3721 /// \endcode
3722 static llvm::Value *
3723 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3724                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3725                                ArrayRef<PrivateDataTy> Privates) {
3726   ASTContext &C = CGM.getContext();
3727   FunctionArgList Args;
3728   ImplicitParamDecl TaskPrivatesArg(
3729       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3731       ImplicitParamDecl::Other);
3732   Args.push_back(&TaskPrivatesArg);
3733   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3734   unsigned Counter = 1;
3735   for (const Expr *E : Data.PrivateVars) {
3736     Args.push_back(ImplicitParamDecl::Create(
3737         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3738         C.getPointerType(C.getPointerType(E->getType()))
3739             .withConst()
3740             .withRestrict(),
3741         ImplicitParamDecl::Other));
3742     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3743     PrivateVarsPos[VD] = Counter;
3744     ++Counter;
3745   }
3746   for (const Expr *E : Data.FirstprivateVars) {
3747     Args.push_back(ImplicitParamDecl::Create(
3748         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3749         C.getPointerType(C.getPointerType(E->getType()))
3750             .withConst()
3751             .withRestrict(),
3752         ImplicitParamDecl::Other));
3753     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3754     PrivateVarsPos[VD] = Counter;
3755     ++Counter;
3756   }
3757   for (const Expr *E : Data.LastprivateVars) {
3758     Args.push_back(ImplicitParamDecl::Create(
3759         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3760         C.getPointerType(C.getPointerType(E->getType()))
3761             .withConst()
3762             .withRestrict(),
3763         ImplicitParamDecl::Other));
3764     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3765     PrivateVarsPos[VD] = Counter;
3766     ++Counter;
3767   }
3768   for (const VarDecl *VD : Data.PrivateLocals) {
3769     QualType Ty = VD->getType().getNonReferenceType();
3770     if (VD->getType()->isLValueReferenceType())
3771       Ty = C.getPointerType(Ty);
3772     if (isAllocatableDecl(VD))
3773       Ty = C.getPointerType(Ty);
3774     Args.push_back(ImplicitParamDecl::Create(
3775         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3776         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3777         ImplicitParamDecl::Other));
3778     PrivateVarsPos[VD] = Counter;
3779     ++Counter;
3780   }
3781   const auto &TaskPrivatesMapFnInfo =
3782       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3783   llvm::FunctionType *TaskPrivatesMapTy =
3784       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3785   std::string Name =
3786       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3787   auto *TaskPrivatesMap = llvm::Function::Create(
3788       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3789       &CGM.getModule());
3790   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3791                                     TaskPrivatesMapFnInfo);
3792   if (CGM.getLangOpts().Optimize) {
3793     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3794     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3795     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3796   }
3797   CodeGenFunction CGF(CGM);
3798   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3799                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3800 
3801   // *privi = &.privates.privi;
3802   LValue Base = CGF.EmitLoadOfPointerLValue(
3803       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3804       TaskPrivatesArg.getType()->castAs<PointerType>());
3805   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3806   Counter = 0;
3807   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3808     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3809     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3810     LValue RefLVal =
3811         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3812     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3813         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3814     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3815     ++Counter;
3816   }
3817   CGF.FinishFunction();
3818   return TaskPrivatesMap;
3819 }
3820 
3821 /// Emit initialization for private variables in task-based directives.
3822 static void emitPrivatesInit(CodeGenFunction &CGF,
3823                              const OMPExecutableDirective &D,
3824                              Address KmpTaskSharedsPtr, LValue TDBase,
3825                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3826                              QualType SharedsTy, QualType SharedsPtrTy,
3827                              const OMPTaskDataTy &Data,
3828                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3829   ASTContext &C = CGF.getContext();
3830   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3831   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3832   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3833                                  ? OMPD_taskloop
3834                                  : OMPD_task;
3835   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3836   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3837   LValue SrcBase;
3838   bool IsTargetTask =
3839       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3840       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3841   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3842   // PointersArray, SizesArray, and MappersArray. The original variables for
3843   // these arrays are not captured and we get their addresses explicitly.
3844   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3845       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3846     SrcBase = CGF.MakeAddrLValue(
3847         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3848             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3849         SharedsTy);
3850   }
3851   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3852   for (const PrivateDataTy &Pair : Privates) {
3853     // Do not initialize private locals.
3854     if (Pair.second.isLocalPrivate()) {
3855       ++FI;
3856       continue;
3857     }
3858     const VarDecl *VD = Pair.second.PrivateCopy;
3859     const Expr *Init = VD->getAnyInitializer();
3860     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3861                              !CGF.isTrivialInitializer(Init)))) {
3862       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3863       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3864         const VarDecl *OriginalVD = Pair.second.Original;
3865         // Check if the variable is the target-based BasePointersArray,
3866         // PointersArray, SizesArray, or MappersArray.
3867         LValue SharedRefLValue;
3868         QualType Type = PrivateLValue.getType();
3869         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3870         if (IsTargetTask && !SharedField) {
3871           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3872                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3873                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3874                          ->getNumParams() == 0 &&
3875                  isa<TranslationUnitDecl>(
3876                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3877                          ->getDeclContext()) &&
3878                  "Expected artificial target data variable.");
3879           SharedRefLValue =
3880               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3881         } else if (ForDup) {
3882           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3883           SharedRefLValue = CGF.MakeAddrLValue(
3884               Address(SharedRefLValue.getPointer(CGF),
3885                       C.getDeclAlign(OriginalVD)),
3886               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3887               SharedRefLValue.getTBAAInfo());
3888         } else if (CGF.LambdaCaptureFields.count(
3889                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3890                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3891           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3892         } else {
3893           // Processing for implicitly captured variables.
3894           InlinedOpenMPRegionRAII Region(
3895               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3896               /*HasCancel=*/false, /*NoInheritance=*/true);
3897           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3898         }
3899         if (Type->isArrayType()) {
3900           // Initialize firstprivate array.
3901           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3902             // Perform simple memcpy.
3903             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3904           } else {
3905             // Initialize firstprivate array using element-by-element
3906             // initialization.
3907             CGF.EmitOMPAggregateAssign(
3908                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3909                 Type,
3910                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3911                                                   Address SrcElement) {
3912                   // Clean up any temporaries needed by the initialization.
3913                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3914                   InitScope.addPrivate(
3915                       Elem, [SrcElement]() -> Address { return SrcElement; });
3916                   (void)InitScope.Privatize();
3917                   // Emit initialization for single element.
3918                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3919                       CGF, &CapturesInfo);
3920                   CGF.EmitAnyExprToMem(Init, DestElement,
3921                                        Init->getType().getQualifiers(),
3922                                        /*IsInitializer=*/false);
3923                 });
3924           }
3925         } else {
3926           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3927           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3928             return SharedRefLValue.getAddress(CGF);
3929           });
3930           (void)InitScope.Privatize();
3931           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3932           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3933                              /*capturedByInit=*/false);
3934         }
3935       } else {
3936         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3937       }
3938     }
3939     ++FI;
3940   }
3941 }
3942 
3943 /// Check if duplication function is required for taskloops.
3944 static bool checkInitIsRequired(CodeGenFunction &CGF,
3945                                 ArrayRef<PrivateDataTy> Privates) {
3946   bool InitRequired = false;
3947   for (const PrivateDataTy &Pair : Privates) {
3948     if (Pair.second.isLocalPrivate())
3949       continue;
3950     const VarDecl *VD = Pair.second.PrivateCopy;
3951     const Expr *Init = VD->getAnyInitializer();
3952     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3953                                     !CGF.isTrivialInitializer(Init));
3954     if (InitRequired)
3955       break;
3956   }
3957   return InitRequired;
3958 }
3959 
3960 
3961 /// Emit task_dup function (for initialization of
3962 /// private/firstprivate/lastprivate vars and last_iter flag)
3963 /// \code
3964 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3965 /// lastpriv) {
3966 /// // setup lastprivate flag
3967 ///    task_dst->last = lastpriv;
3968 /// // could be constructor calls here...
3969 /// }
3970 /// \endcode
3971 static llvm::Value *
3972 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3973                     const OMPExecutableDirective &D,
3974                     QualType KmpTaskTWithPrivatesPtrQTy,
3975                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3976                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3977                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3978                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3979   ASTContext &C = CGM.getContext();
3980   FunctionArgList Args;
3981   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3982                            KmpTaskTWithPrivatesPtrQTy,
3983                            ImplicitParamDecl::Other);
3984   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3985                            KmpTaskTWithPrivatesPtrQTy,
3986                            ImplicitParamDecl::Other);
3987   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3988                                 ImplicitParamDecl::Other);
3989   Args.push_back(&DstArg);
3990   Args.push_back(&SrcArg);
3991   Args.push_back(&LastprivArg);
3992   const auto &TaskDupFnInfo =
3993       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3994   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3995   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3996   auto *TaskDup = llvm::Function::Create(
3997       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3998   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3999   TaskDup->setDoesNotRecurse();
4000   CodeGenFunction CGF(CGM);
4001   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4002                     Loc);
4003 
4004   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4005       CGF.GetAddrOfLocalVar(&DstArg),
4006       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4007   // task_dst->liter = lastpriv;
4008   if (WithLastIter) {
4009     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4010     LValue Base = CGF.EmitLValueForField(
4011         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4012     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4013     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4014         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4015     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4016   }
4017 
4018   // Emit initial values for private copies (if any).
4019   assert(!Privates.empty());
4020   Address KmpTaskSharedsPtr = Address::invalid();
4021   if (!Data.FirstprivateVars.empty()) {
4022     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4023         CGF.GetAddrOfLocalVar(&SrcArg),
4024         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4025     LValue Base = CGF.EmitLValueForField(
4026         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4027     KmpTaskSharedsPtr = Address(
4028         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4029                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4030                                                   KmpTaskTShareds)),
4031                              Loc),
4032         CGM.getNaturalTypeAlignment(SharedsTy));
4033   }
4034   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4035                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4036   CGF.FinishFunction();
4037   return TaskDup;
4038 }
4039 
4040 /// Checks if destructor function is required to be generated.
4041 /// \return true if cleanups are required, false otherwise.
4042 static bool
4043 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4044                          ArrayRef<PrivateDataTy> Privates) {
4045   for (const PrivateDataTy &P : Privates) {
4046     if (P.second.isLocalPrivate())
4047       continue;
4048     QualType Ty = P.second.Original->getType().getNonReferenceType();
4049     if (Ty.isDestructedType())
4050       return true;
4051   }
4052   return false;
4053 }
4054 
4055 namespace {
4056 /// Loop generator for OpenMP iterator expression.
4057 class OMPIteratorGeneratorScope final
4058     : public CodeGenFunction::OMPPrivateScope {
4059   CodeGenFunction &CGF;
4060   const OMPIteratorExpr *E = nullptr;
4061   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4062   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4063   OMPIteratorGeneratorScope() = delete;
4064   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4065 
4066 public:
4067   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4068       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4069     if (!E)
4070       return;
4071     SmallVector<llvm::Value *, 4> Uppers;
4072     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4073       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4074       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4075       addPrivate(VD, [&CGF, VD]() {
4076         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4077       });
4078       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4079       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4080         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4081                                  "counter.addr");
4082       });
4083     }
4084     Privatize();
4085 
4086     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4087       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4088       LValue CLVal =
4089           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4090                              HelperData.CounterVD->getType());
4091       // Counter = 0;
4092       CGF.EmitStoreOfScalar(
4093           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4094           CLVal);
4095       CodeGenFunction::JumpDest &ContDest =
4096           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4097       CodeGenFunction::JumpDest &ExitDest =
4098           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4099       // N = <number-of_iterations>;
4100       llvm::Value *N = Uppers[I];
4101       // cont:
4102       // if (Counter < N) goto body; else goto exit;
4103       CGF.EmitBlock(ContDest.getBlock());
4104       auto *CVal =
4105           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4106       llvm::Value *Cmp =
4107           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4108               ? CGF.Builder.CreateICmpSLT(CVal, N)
4109               : CGF.Builder.CreateICmpULT(CVal, N);
4110       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4111       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4112       // body:
4113       CGF.EmitBlock(BodyBB);
4114       // Iteri = Begini + Counter * Stepi;
4115       CGF.EmitIgnoredExpr(HelperData.Update);
4116     }
4117   }
4118   ~OMPIteratorGeneratorScope() {
4119     if (!E)
4120       return;
4121     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4122       // Counter = Counter + 1;
4123       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4124       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4125       // goto cont;
4126       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4127       // exit:
4128       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4129     }
4130   }
4131 };
4132 } // namespace
4133 
4134 static std::pair<llvm::Value *, llvm::Value *>
4135 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4136   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4137   llvm::Value *Addr;
4138   if (OASE) {
4139     const Expr *Base = OASE->getBase();
4140     Addr = CGF.EmitScalarExpr(Base);
4141   } else {
4142     Addr = CGF.EmitLValue(E).getPointer(CGF);
4143   }
4144   llvm::Value *SizeVal;
4145   QualType Ty = E->getType();
4146   if (OASE) {
4147     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4148     for (const Expr *SE : OASE->getDimensions()) {
4149       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4150       Sz = CGF.EmitScalarConversion(
4151           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4152       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4153     }
4154   } else if (const auto *ASE =
4155                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4156     LValue UpAddrLVal =
4157         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4158     llvm::Value *UpAddr =
4159         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4160     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4161     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4162     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4163   } else {
4164     SizeVal = CGF.getTypeSize(Ty);
4165   }
4166   return std::make_pair(Addr, SizeVal);
4167 }
4168 
4169 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4170 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4171   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4172   if (KmpTaskAffinityInfoTy.isNull()) {
4173     RecordDecl *KmpAffinityInfoRD =
4174         C.buildImplicitRecord("kmp_task_affinity_info_t");
4175     KmpAffinityInfoRD->startDefinition();
4176     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4177     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4178     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4179     KmpAffinityInfoRD->completeDefinition();
4180     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4181   }
4182 }
4183 
4184 CGOpenMPRuntime::TaskResultTy
4185 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4186                               const OMPExecutableDirective &D,
4187                               llvm::Function *TaskFunction, QualType SharedsTy,
4188                               Address Shareds, const OMPTaskDataTy &Data) {
4189   ASTContext &C = CGM.getContext();
4190   llvm::SmallVector<PrivateDataTy, 4> Privates;
4191   // Aggregate privates and sort them by the alignment.
4192   const auto *I = Data.PrivateCopies.begin();
4193   for (const Expr *E : Data.PrivateVars) {
4194     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4195     Privates.emplace_back(
4196         C.getDeclAlign(VD),
4197         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4198                          /*PrivateElemInit=*/nullptr));
4199     ++I;
4200   }
4201   I = Data.FirstprivateCopies.begin();
4202   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4203   for (const Expr *E : Data.FirstprivateVars) {
4204     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4205     Privates.emplace_back(
4206         C.getDeclAlign(VD),
4207         PrivateHelpersTy(
4208             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4209             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4210     ++I;
4211     ++IElemInitRef;
4212   }
4213   I = Data.LastprivateCopies.begin();
4214   for (const Expr *E : Data.LastprivateVars) {
4215     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4216     Privates.emplace_back(
4217         C.getDeclAlign(VD),
4218         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4219                          /*PrivateElemInit=*/nullptr));
4220     ++I;
4221   }
4222   for (const VarDecl *VD : Data.PrivateLocals) {
4223     if (isAllocatableDecl(VD))
4224       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4225     else
4226       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4227   }
4228   llvm::stable_sort(Privates,
4229                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4230                       return L.first > R.first;
4231                     });
4232   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4233   // Build type kmp_routine_entry_t (if not built yet).
4234   emitKmpRoutineEntryT(KmpInt32Ty);
4235   // Build type kmp_task_t (if not built yet).
4236   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4237     if (SavedKmpTaskloopTQTy.isNull()) {
4238       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4239           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4240     }
4241     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4242   } else {
4243     assert((D.getDirectiveKind() == OMPD_task ||
4244             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4245             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4246            "Expected taskloop, task or target directive");
4247     if (SavedKmpTaskTQTy.isNull()) {
4248       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4249           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4250     }
4251     KmpTaskTQTy = SavedKmpTaskTQTy;
4252   }
4253   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4254   // Build particular struct kmp_task_t for the given task.
4255   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4256       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4257   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4258   QualType KmpTaskTWithPrivatesPtrQTy =
4259       C.getPointerType(KmpTaskTWithPrivatesQTy);
4260   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4261   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4262       KmpTaskTWithPrivatesTy->getPointerTo();
4263   llvm::Value *KmpTaskTWithPrivatesTySize =
4264       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4265   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4266 
4267   // Emit initial values for private copies (if any).
4268   llvm::Value *TaskPrivatesMap = nullptr;
4269   llvm::Type *TaskPrivatesMapTy =
4270       std::next(TaskFunction->arg_begin(), 3)->getType();
4271   if (!Privates.empty()) {
4272     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4273     TaskPrivatesMap =
4274         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4275     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4276         TaskPrivatesMap, TaskPrivatesMapTy);
4277   } else {
4278     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4279         cast<llvm::PointerType>(TaskPrivatesMapTy));
4280   }
4281   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4282   // kmp_task_t *tt);
4283   llvm::Function *TaskEntry = emitProxyTaskFunction(
4284       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4285       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4286       TaskPrivatesMap);
4287 
4288   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4289   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4290   // kmp_routine_entry_t *task_entry);
4291   // Task flags. Format is taken from
4292   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4293   // description of kmp_tasking_flags struct.
4294   enum {
4295     TiedFlag = 0x1,
4296     FinalFlag = 0x2,
4297     DestructorsFlag = 0x8,
4298     PriorityFlag = 0x20,
4299     DetachableFlag = 0x40,
4300   };
4301   unsigned Flags = Data.Tied ? TiedFlag : 0;
4302   bool NeedsCleanup = false;
4303   if (!Privates.empty()) {
4304     NeedsCleanup =
4305         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4306     if (NeedsCleanup)
4307       Flags = Flags | DestructorsFlag;
4308   }
4309   if (Data.Priority.getInt())
4310     Flags = Flags | PriorityFlag;
4311   if (D.hasClausesOfKind<OMPDetachClause>())
4312     Flags = Flags | DetachableFlag;
4313   llvm::Value *TaskFlags =
4314       Data.Final.getPointer()
4315           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4316                                      CGF.Builder.getInt32(FinalFlag),
4317                                      CGF.Builder.getInt32(/*C=*/0))
4318           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4319   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4320   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4321   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4322       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4323       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4324           TaskEntry, KmpRoutineEntryPtrTy)};
4325   llvm::Value *NewTask;
4326   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4327     // Check if we have any device clause associated with the directive.
4328     const Expr *Device = nullptr;
4329     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4330       Device = C->getDevice();
4331     // Emit device ID if any otherwise use default value.
4332     llvm::Value *DeviceID;
4333     if (Device)
4334       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4335                                            CGF.Int64Ty, /*isSigned=*/true);
4336     else
4337       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4338     AllocArgs.push_back(DeviceID);
4339     NewTask = CGF.EmitRuntimeCall(
4340         OMPBuilder.getOrCreateRuntimeFunction(
4341             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4342         AllocArgs);
4343   } else {
4344     NewTask =
4345         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4346                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4347                             AllocArgs);
4348   }
4349   // Emit detach clause initialization.
4350   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4351   // task_descriptor);
4352   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4353     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4354     LValue EvtLVal = CGF.EmitLValue(Evt);
4355 
4356     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4357     // int gtid, kmp_task_t *task);
4358     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4359     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4360     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4361     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4362         OMPBuilder.getOrCreateRuntimeFunction(
4363             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4364         {Loc, Tid, NewTask});
4365     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4366                                       Evt->getExprLoc());
4367     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4368   }
4369   // Process affinity clauses.
4370   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4371     // Process list of affinity data.
4372     ASTContext &C = CGM.getContext();
4373     Address AffinitiesArray = Address::invalid();
4374     // Calculate number of elements to form the array of affinity data.
4375     llvm::Value *NumOfElements = nullptr;
4376     unsigned NumAffinities = 0;
4377     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4378       if (const Expr *Modifier = C->getModifier()) {
4379         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4380         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4381           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4382           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4383           NumOfElements =
4384               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4385         }
4386       } else {
4387         NumAffinities += C->varlist_size();
4388       }
4389     }
4390     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4391     // Fields ids in kmp_task_affinity_info record.
4392     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4393 
4394     QualType KmpTaskAffinityInfoArrayTy;
4395     if (NumOfElements) {
4396       NumOfElements = CGF.Builder.CreateNUWAdd(
4397           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4398       OpaqueValueExpr OVE(
4399           Loc,
4400           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4401           VK_PRValue);
4402       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4403                                                     RValue::get(NumOfElements));
4404       KmpTaskAffinityInfoArrayTy =
4405           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4406                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4407       // Properly emit variable-sized array.
4408       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4409                                            ImplicitParamDecl::Other);
4410       CGF.EmitVarDecl(*PD);
4411       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4412       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4413                                                 /*isSigned=*/false);
4414     } else {
4415       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4416           KmpTaskAffinityInfoTy,
4417           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4418           ArrayType::Normal, /*IndexTypeQuals=*/0);
4419       AffinitiesArray =
4420           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4421       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4422       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4423                                              /*isSigned=*/false);
4424     }
4425 
4426     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4427     // Fill array by elements without iterators.
4428     unsigned Pos = 0;
4429     bool HasIterator = false;
4430     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4431       if (C->getModifier()) {
4432         HasIterator = true;
4433         continue;
4434       }
4435       for (const Expr *E : C->varlists()) {
4436         llvm::Value *Addr;
4437         llvm::Value *Size;
4438         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4439         LValue Base =
4440             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4441                                KmpTaskAffinityInfoTy);
4442         // affs[i].base_addr = &<Affinities[i].second>;
4443         LValue BaseAddrLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4445         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4446                               BaseAddrLVal);
4447         // affs[i].len = sizeof(<Affinities[i].second>);
4448         LValue LenLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4450         CGF.EmitStoreOfScalar(Size, LenLVal);
4451         ++Pos;
4452       }
4453     }
4454     LValue PosLVal;
4455     if (HasIterator) {
4456       PosLVal = CGF.MakeAddrLValue(
4457           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4458           C.getSizeType());
4459       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4460     }
4461     // Process elements with iterators.
4462     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4463       const Expr *Modifier = C->getModifier();
4464       if (!Modifier)
4465         continue;
4466       OMPIteratorGeneratorScope IteratorScope(
4467           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4468       for (const Expr *E : C->varlists()) {
4469         llvm::Value *Addr;
4470         llvm::Value *Size;
4471         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4472         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4473         LValue Base = CGF.MakeAddrLValue(
4474             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4475                     AffinitiesArray.getAlignment()),
4476             KmpTaskAffinityInfoTy);
4477         // affs[i].base_addr = &<Affinities[i].second>;
4478         LValue BaseAddrLVal = CGF.EmitLValueForField(
4479             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4480         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4481                               BaseAddrLVal);
4482         // affs[i].len = sizeof(<Affinities[i].second>);
4483         LValue LenLVal = CGF.EmitLValueForField(
4484             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4485         CGF.EmitStoreOfScalar(Size, LenLVal);
4486         Idx = CGF.Builder.CreateNUWAdd(
4487             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4488         CGF.EmitStoreOfScalar(Idx, PosLVal);
4489       }
4490     }
4491     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4492     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4493     // naffins, kmp_task_affinity_info_t *affin_list);
4494     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4495     llvm::Value *GTid = getThreadID(CGF, Loc);
4496     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4497         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4498     // FIXME: Emit the function and ignore its result for now unless the
4499     // runtime function is properly implemented.
4500     (void)CGF.EmitRuntimeCall(
4501         OMPBuilder.getOrCreateRuntimeFunction(
4502             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4503         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4504   }
4505   llvm::Value *NewTaskNewTaskTTy =
4506       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4507           NewTask, KmpTaskTWithPrivatesPtrTy);
4508   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4509                                                KmpTaskTWithPrivatesQTy);
4510   LValue TDBase =
4511       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4512   // Fill the data in the resulting kmp_task_t record.
4513   // Copy shareds if there are any.
4514   Address KmpTaskSharedsPtr = Address::invalid();
4515   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4516     KmpTaskSharedsPtr =
4517         Address(CGF.EmitLoadOfScalar(
4518                     CGF.EmitLValueForField(
4519                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4520                                            KmpTaskTShareds)),
4521                     Loc),
4522                 CGM.getNaturalTypeAlignment(SharedsTy));
4523     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4524     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4525     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4526   }
4527   // Emit initial values for private copies (if any).
4528   TaskResultTy Result;
4529   if (!Privates.empty()) {
4530     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4531                      SharedsTy, SharedsPtrTy, Data, Privates,
4532                      /*ForDup=*/false);
4533     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4534         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4535       Result.TaskDupFn = emitTaskDupFunction(
4536           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4537           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4538           /*WithLastIter=*/!Data.LastprivateVars.empty());
4539     }
4540   }
4541   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4542   enum { Priority = 0, Destructors = 1 };
4543   // Provide pointer to function with destructors for privates.
4544   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4545   const RecordDecl *KmpCmplrdataUD =
4546       (*FI)->getType()->getAsUnionType()->getDecl();
4547   if (NeedsCleanup) {
4548     llvm::Value *DestructorFn = emitDestructorsFunction(
4549         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4550         KmpTaskTWithPrivatesQTy);
4551     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4552     LValue DestructorsLV = CGF.EmitLValueForField(
4553         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4554     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4555                               DestructorFn, KmpRoutineEntryPtrTy),
4556                           DestructorsLV);
4557   }
4558   // Set priority.
4559   if (Data.Priority.getInt()) {
4560     LValue Data2LV = CGF.EmitLValueForField(
4561         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4562     LValue PriorityLV = CGF.EmitLValueForField(
4563         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4564     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4565   }
4566   Result.NewTask = NewTask;
4567   Result.TaskEntry = TaskEntry;
4568   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4569   Result.TDBase = TDBase;
4570   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4571   return Result;
4572 }
4573 
4574 namespace {
4575 /// Dependence kind for RTL.
4576 enum RTLDependenceKindTy {
4577   DepIn = 0x01,
4578   DepInOut = 0x3,
4579   DepMutexInOutSet = 0x4
4580 };
4581 /// Fields ids in kmp_depend_info record.
4582 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4583 } // namespace
4584 
4585 /// Translates internal dependency kind into the runtime kind.
4586 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4587   RTLDependenceKindTy DepKind;
4588   switch (K) {
4589   case OMPC_DEPEND_in:
4590     DepKind = DepIn;
4591     break;
4592   // Out and InOut dependencies must use the same code.
4593   case OMPC_DEPEND_out:
4594   case OMPC_DEPEND_inout:
4595     DepKind = DepInOut;
4596     break;
4597   case OMPC_DEPEND_mutexinoutset:
4598     DepKind = DepMutexInOutSet;
4599     break;
4600   case OMPC_DEPEND_source:
4601   case OMPC_DEPEND_sink:
4602   case OMPC_DEPEND_depobj:
4603   case OMPC_DEPEND_unknown:
4604     llvm_unreachable("Unknown task dependence type");
4605   }
4606   return DepKind;
4607 }
4608 
4609 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4610 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4611                            QualType &FlagsTy) {
4612   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4613   if (KmpDependInfoTy.isNull()) {
4614     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4615     KmpDependInfoRD->startDefinition();
4616     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4617     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4618     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4619     KmpDependInfoRD->completeDefinition();
4620     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4621   }
4622 }
4623 
4624 std::pair<llvm::Value *, LValue>
4625 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4626                                    SourceLocation Loc) {
4627   ASTContext &C = CGM.getContext();
4628   QualType FlagsTy;
4629   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4630   RecordDecl *KmpDependInfoRD =
4631       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4632   LValue Base = CGF.EmitLoadOfPointerLValue(
4633       DepobjLVal.getAddress(CGF),
4634       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4635   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4636   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4637           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4638   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4639                             Base.getTBAAInfo());
4640   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4641       Addr.getPointer(),
4642       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4643   LValue NumDepsBase = CGF.MakeAddrLValue(
4644       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4645       Base.getBaseInfo(), Base.getTBAAInfo());
4646   // NumDeps = deps[i].base_addr;
4647   LValue BaseAddrLVal = CGF.EmitLValueForField(
4648       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4649   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4650   return std::make_pair(NumDeps, Base);
4651 }
4652 
4653 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4654                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4655                            const OMPTaskDataTy::DependData &Data,
4656                            Address DependenciesArray) {
4657   CodeGenModule &CGM = CGF.CGM;
4658   ASTContext &C = CGM.getContext();
4659   QualType FlagsTy;
4660   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4661   RecordDecl *KmpDependInfoRD =
4662       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4663   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4664 
4665   OMPIteratorGeneratorScope IteratorScope(
4666       CGF, cast_or_null<OMPIteratorExpr>(
4667                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4668                                  : nullptr));
4669   for (const Expr *E : Data.DepExprs) {
4670     llvm::Value *Addr;
4671     llvm::Value *Size;
4672     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4673     LValue Base;
4674     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4675       Base = CGF.MakeAddrLValue(
4676           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4677     } else {
4678       LValue &PosLVal = *Pos.get<LValue *>();
4679       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4680       Base = CGF.MakeAddrLValue(
4681           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4682                   DependenciesArray.getAlignment()),
4683           KmpDependInfoTy);
4684     }
4685     // deps[i].base_addr = &<Dependencies[i].second>;
4686     LValue BaseAddrLVal = CGF.EmitLValueForField(
4687         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4688     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4689                           BaseAddrLVal);
4690     // deps[i].len = sizeof(<Dependencies[i].second>);
4691     LValue LenLVal = CGF.EmitLValueForField(
4692         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4693     CGF.EmitStoreOfScalar(Size, LenLVal);
4694     // deps[i].flags = <Dependencies[i].first>;
4695     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4696     LValue FlagsLVal = CGF.EmitLValueForField(
4697         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4698     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4699                           FlagsLVal);
4700     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4701       ++(*P);
4702     } else {
4703       LValue &PosLVal = *Pos.get<LValue *>();
4704       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4705       Idx = CGF.Builder.CreateNUWAdd(Idx,
4706                                      llvm::ConstantInt::get(Idx->getType(), 1));
4707       CGF.EmitStoreOfScalar(Idx, PosLVal);
4708     }
4709   }
4710 }
4711 
4712 static SmallVector<llvm::Value *, 4>
4713 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4714                         const OMPTaskDataTy::DependData &Data) {
4715   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4716          "Expected depobj dependecy kind.");
4717   SmallVector<llvm::Value *, 4> Sizes;
4718   SmallVector<LValue, 4> SizeLVals;
4719   ASTContext &C = CGF.getContext();
4720   QualType FlagsTy;
4721   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4722   RecordDecl *KmpDependInfoRD =
4723       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4724   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4725   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4726   {
4727     OMPIteratorGeneratorScope IteratorScope(
4728         CGF, cast_or_null<OMPIteratorExpr>(
4729                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4730                                    : nullptr));
4731     for (const Expr *E : Data.DepExprs) {
4732       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4733       LValue Base = CGF.EmitLoadOfPointerLValue(
4734           DepobjLVal.getAddress(CGF),
4735           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4736       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4737           Base.getAddress(CGF), KmpDependInfoPtrT);
4738       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4739                                 Base.getTBAAInfo());
4740       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4741           Addr.getPointer(),
4742           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4743       LValue NumDepsBase = CGF.MakeAddrLValue(
4744           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4745           Base.getBaseInfo(), Base.getTBAAInfo());
4746       // NumDeps = deps[i].base_addr;
4747       LValue BaseAddrLVal = CGF.EmitLValueForField(
4748           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4749       llvm::Value *NumDeps =
4750           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4751       LValue NumLVal = CGF.MakeAddrLValue(
4752           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4753           C.getUIntPtrType());
4754       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4755                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4756       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4757       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4758       CGF.EmitStoreOfScalar(Add, NumLVal);
4759       SizeLVals.push_back(NumLVal);
4760     }
4761   }
4762   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4763     llvm::Value *Size =
4764         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4765     Sizes.push_back(Size);
4766   }
4767   return Sizes;
4768 }
4769 
4770 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4771                                LValue PosLVal,
4772                                const OMPTaskDataTy::DependData &Data,
4773                                Address DependenciesArray) {
4774   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4775          "Expected depobj dependecy kind.");
4776   ASTContext &C = CGF.getContext();
4777   QualType FlagsTy;
4778   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4779   RecordDecl *KmpDependInfoRD =
4780       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4781   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4782   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4783   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4784   {
4785     OMPIteratorGeneratorScope IteratorScope(
4786         CGF, cast_or_null<OMPIteratorExpr>(
4787                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4788                                    : nullptr));
4789     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4790       const Expr *E = Data.DepExprs[I];
4791       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4792       LValue Base = CGF.EmitLoadOfPointerLValue(
4793           DepobjLVal.getAddress(CGF),
4794           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4795       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4796           Base.getAddress(CGF), KmpDependInfoPtrT);
4797       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4798                                 Base.getTBAAInfo());
4799 
4800       // Get number of elements in a single depobj.
4801       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4802           Addr.getPointer(),
4803           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4804       LValue NumDepsBase = CGF.MakeAddrLValue(
4805           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4806           Base.getBaseInfo(), Base.getTBAAInfo());
4807       // NumDeps = deps[i].base_addr;
4808       LValue BaseAddrLVal = CGF.EmitLValueForField(
4809           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4810       llvm::Value *NumDeps =
4811           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4812 
4813       // memcopy dependency data.
4814       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4815           ElSize,
4816           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4817       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4818       Address DepAddr =
4819           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4820                   DependenciesArray.getAlignment());
4821       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4822 
4823       // Increase pos.
4824       // pos += size;
4825       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4826       CGF.EmitStoreOfScalar(Add, PosLVal);
4827     }
4828   }
4829 }
4830 
4831 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4832     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4833     SourceLocation Loc) {
4834   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4835         return D.DepExprs.empty();
4836       }))
4837     return std::make_pair(nullptr, Address::invalid());
4838   // Process list of dependencies.
4839   ASTContext &C = CGM.getContext();
4840   Address DependenciesArray = Address::invalid();
4841   llvm::Value *NumOfElements = nullptr;
4842   unsigned NumDependencies = std::accumulate(
4843       Dependencies.begin(), Dependencies.end(), 0,
4844       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4845         return D.DepKind == OMPC_DEPEND_depobj
4846                    ? V
4847                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4848       });
4849   QualType FlagsTy;
4850   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4851   bool HasDepobjDeps = false;
4852   bool HasRegularWithIterators = false;
4853   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4854   llvm::Value *NumOfRegularWithIterators =
4855       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4856   // Calculate number of depobj dependecies and regular deps with the iterators.
4857   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4858     if (D.DepKind == OMPC_DEPEND_depobj) {
4859       SmallVector<llvm::Value *, 4> Sizes =
4860           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4861       for (llvm::Value *Size : Sizes) {
4862         NumOfDepobjElements =
4863             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4864       }
4865       HasDepobjDeps = true;
4866       continue;
4867     }
4868     // Include number of iterations, if any.
4869     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4870       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4871         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4872         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4873         NumOfRegularWithIterators =
4874             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4875       }
4876       HasRegularWithIterators = true;
4877       continue;
4878     }
4879   }
4880 
4881   QualType KmpDependInfoArrayTy;
4882   if (HasDepobjDeps || HasRegularWithIterators) {
4883     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4884                                            /*isSigned=*/false);
4885     if (HasDepobjDeps) {
4886       NumOfElements =
4887           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4888     }
4889     if (HasRegularWithIterators) {
4890       NumOfElements =
4891           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4892     }
4893     OpaqueValueExpr OVE(Loc,
4894                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4895                         VK_PRValue);
4896     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4897                                                   RValue::get(NumOfElements));
4898     KmpDependInfoArrayTy =
4899         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4900                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4901     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4902     // Properly emit variable-sized array.
4903     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4904                                          ImplicitParamDecl::Other);
4905     CGF.EmitVarDecl(*PD);
4906     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4907     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4908                                               /*isSigned=*/false);
4909   } else {
4910     KmpDependInfoArrayTy = C.getConstantArrayType(
4911         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4912         ArrayType::Normal, /*IndexTypeQuals=*/0);
4913     DependenciesArray =
4914         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4915     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4916     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4917                                            /*isSigned=*/false);
4918   }
4919   unsigned Pos = 0;
4920   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4921     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4922         Dependencies[I].IteratorExpr)
4923       continue;
4924     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4925                    DependenciesArray);
4926   }
4927   // Copy regular dependecies with iterators.
4928   LValue PosLVal = CGF.MakeAddrLValue(
4929       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4930   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4931   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4932     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4933         !Dependencies[I].IteratorExpr)
4934       continue;
4935     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4936                    DependenciesArray);
4937   }
4938   // Copy final depobj arrays without iterators.
4939   if (HasDepobjDeps) {
4940     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4942         continue;
4943       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4944                          DependenciesArray);
4945     }
4946   }
4947   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4948       DependenciesArray, CGF.VoidPtrTy);
4949   return std::make_pair(NumOfElements, DependenciesArray);
4950 }
4951 
4952 Address CGOpenMPRuntime::emitDepobjDependClause(
4953     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4954     SourceLocation Loc) {
4955   if (Dependencies.DepExprs.empty())
4956     return Address::invalid();
4957   // Process list of dependencies.
4958   ASTContext &C = CGM.getContext();
4959   Address DependenciesArray = Address::invalid();
4960   unsigned NumDependencies = Dependencies.DepExprs.size();
4961   QualType FlagsTy;
4962   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4963   RecordDecl *KmpDependInfoRD =
4964       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4965 
4966   llvm::Value *Size;
4967   // Define type kmp_depend_info[<Dependencies.size()>];
4968   // For depobj reserve one extra element to store the number of elements.
4969   // It is required to handle depobj(x) update(in) construct.
4970   // kmp_depend_info[<Dependencies.size()>] deps;
4971   llvm::Value *NumDepsVal;
4972   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4973   if (const auto *IE =
4974           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4975     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4976     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4977       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4978       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4979       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4980     }
4981     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4982                                     NumDepsVal);
4983     CharUnits SizeInBytes =
4984         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4985     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4986     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4987     NumDepsVal =
4988         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4989   } else {
4990     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4991         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4992         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4993     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4994     Size = CGM.getSize(Sz.alignTo(Align));
4995     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4996   }
4997   // Need to allocate on the dynamic memory.
4998   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4999   // Use default allocator.
5000   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5001   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5002 
5003   llvm::Value *Addr =
5004       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5005                               CGM.getModule(), OMPRTL___kmpc_alloc),
5006                           Args, ".dep.arr.addr");
5007   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5008       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5009   DependenciesArray = Address(Addr, Align);
5010   // Write number of elements in the first element of array for depobj.
5011   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5012   // deps[i].base_addr = NumDependencies;
5013   LValue BaseAddrLVal = CGF.EmitLValueForField(
5014       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5015   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5016   llvm::PointerUnion<unsigned *, LValue *> Pos;
5017   unsigned Idx = 1;
5018   LValue PosLVal;
5019   if (Dependencies.IteratorExpr) {
5020     PosLVal = CGF.MakeAddrLValue(
5021         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5022         C.getSizeType());
5023     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5024                           /*IsInit=*/true);
5025     Pos = &PosLVal;
5026   } else {
5027     Pos = &Idx;
5028   }
5029   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5030   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5031       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5032   return DependenciesArray;
5033 }
5034 
5035 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5036                                         SourceLocation Loc) {
5037   ASTContext &C = CGM.getContext();
5038   QualType FlagsTy;
5039   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5040   LValue Base = CGF.EmitLoadOfPointerLValue(
5041       DepobjLVal.getAddress(CGF),
5042       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5043   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5044   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5046   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5047       Addr.getPointer(),
5048       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5049   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5050                                                                CGF.VoidPtrTy);
5051   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5052   // Use default allocator.
5053   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5054   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5055 
5056   // _kmpc_free(gtid, addr, nullptr);
5057   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5058                                 CGM.getModule(), OMPRTL___kmpc_free),
5059                             Args);
5060 }
5061 
5062 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5063                                        OpenMPDependClauseKind NewDepKind,
5064                                        SourceLocation Loc) {
5065   ASTContext &C = CGM.getContext();
5066   QualType FlagsTy;
5067   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5068   RecordDecl *KmpDependInfoRD =
5069       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5070   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5071   llvm::Value *NumDeps;
5072   LValue Base;
5073   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5074 
5075   Address Begin = Base.getAddress(CGF);
5076   // Cast from pointer to array type to pointer to single element.
5077   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5078   // The basic structure here is a while-do loop.
5079   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5080   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5081   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5082   CGF.EmitBlock(BodyBB);
5083   llvm::PHINode *ElementPHI =
5084       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5085   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5086   Begin = Address(ElementPHI, Begin.getAlignment());
5087   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5088                             Base.getTBAAInfo());
5089   // deps[i].flags = NewDepKind;
5090   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5091   LValue FlagsLVal = CGF.EmitLValueForField(
5092       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5093   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5094                         FlagsLVal);
5095 
5096   // Shift the address forward by one element.
5097   Address ElementNext =
5098       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5099   ElementPHI->addIncoming(ElementNext.getPointer(),
5100                           CGF.Builder.GetInsertBlock());
5101   llvm::Value *IsEmpty =
5102       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5103   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5104   // Done.
5105   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5106 }
5107 
5108 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5109                                    const OMPExecutableDirective &D,
5110                                    llvm::Function *TaskFunction,
5111                                    QualType SharedsTy, Address Shareds,
5112                                    const Expr *IfCond,
5113                                    const OMPTaskDataTy &Data) {
5114   if (!CGF.HaveInsertPoint())
5115     return;
5116 
5117   TaskResultTy Result =
5118       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5119   llvm::Value *NewTask = Result.NewTask;
5120   llvm::Function *TaskEntry = Result.TaskEntry;
5121   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5122   LValue TDBase = Result.TDBase;
5123   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5124   // Process list of dependences.
5125   Address DependenciesArray = Address::invalid();
5126   llvm::Value *NumOfElements;
5127   std::tie(NumOfElements, DependenciesArray) =
5128       emitDependClause(CGF, Data.Dependences, Loc);
5129 
5130   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5131   // libcall.
5132   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5133   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5134   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5135   // list is not empty
5136   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5137   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5138   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5139   llvm::Value *DepTaskArgs[7];
5140   if (!Data.Dependences.empty()) {
5141     DepTaskArgs[0] = UpLoc;
5142     DepTaskArgs[1] = ThreadID;
5143     DepTaskArgs[2] = NewTask;
5144     DepTaskArgs[3] = NumOfElements;
5145     DepTaskArgs[4] = DependenciesArray.getPointer();
5146     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5147     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5148   }
5149   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5150                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5151     if (!Data.Tied) {
5152       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5153       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5154       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5155     }
5156     if (!Data.Dependences.empty()) {
5157       CGF.EmitRuntimeCall(
5158           OMPBuilder.getOrCreateRuntimeFunction(
5159               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5160           DepTaskArgs);
5161     } else {
5162       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5163                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5164                           TaskArgs);
5165     }
5166     // Check if parent region is untied and build return for untied task;
5167     if (auto *Region =
5168             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5169       Region->emitUntiedSwitch(CGF);
5170   };
5171 
5172   llvm::Value *DepWaitTaskArgs[6];
5173   if (!Data.Dependences.empty()) {
5174     DepWaitTaskArgs[0] = UpLoc;
5175     DepWaitTaskArgs[1] = ThreadID;
5176     DepWaitTaskArgs[2] = NumOfElements;
5177     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5178     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5179     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5180   }
5181   auto &M = CGM.getModule();
5182   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5183                         TaskEntry, &Data, &DepWaitTaskArgs,
5184                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5185     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5186     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5187     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5188     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5189     // is specified.
5190     if (!Data.Dependences.empty())
5191       CGF.EmitRuntimeCall(
5192           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5193           DepWaitTaskArgs);
5194     // Call proxy_task_entry(gtid, new_task);
5195     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5196                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5197       Action.Enter(CGF);
5198       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5199       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5200                                                           OutlinedFnArgs);
5201     };
5202 
5203     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5204     // kmp_task_t *new_task);
5205     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5206     // kmp_task_t *new_task);
5207     RegionCodeGenTy RCG(CodeGen);
5208     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5209                               M, OMPRTL___kmpc_omp_task_begin_if0),
5210                           TaskArgs,
5211                           OMPBuilder.getOrCreateRuntimeFunction(
5212                               M, OMPRTL___kmpc_omp_task_complete_if0),
5213                           TaskArgs);
5214     RCG.setAction(Action);
5215     RCG(CGF);
5216   };
5217 
5218   if (IfCond) {
5219     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5220   } else {
5221     RegionCodeGenTy ThenRCG(ThenCodeGen);
5222     ThenRCG(CGF);
5223   }
5224 }
5225 
5226 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5227                                        const OMPLoopDirective &D,
5228                                        llvm::Function *TaskFunction,
5229                                        QualType SharedsTy, Address Shareds,
5230                                        const Expr *IfCond,
5231                                        const OMPTaskDataTy &Data) {
5232   if (!CGF.HaveInsertPoint())
5233     return;
5234   TaskResultTy Result =
5235       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5236   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5237   // libcall.
5238   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5239   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5240   // sched, kmp_uint64 grainsize, void *task_dup);
5241   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5242   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5243   llvm::Value *IfVal;
5244   if (IfCond) {
5245     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5246                                       /*isSigned=*/true);
5247   } else {
5248     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5249   }
5250 
5251   LValue LBLVal = CGF.EmitLValueForField(
5252       Result.TDBase,
5253       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5254   const auto *LBVar =
5255       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5256   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5257                        LBLVal.getQuals(),
5258                        /*IsInitializer=*/true);
5259   LValue UBLVal = CGF.EmitLValueForField(
5260       Result.TDBase,
5261       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5262   const auto *UBVar =
5263       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5264   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5265                        UBLVal.getQuals(),
5266                        /*IsInitializer=*/true);
5267   LValue StLVal = CGF.EmitLValueForField(
5268       Result.TDBase,
5269       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5270   const auto *StVar =
5271       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5272   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5273                        StLVal.getQuals(),
5274                        /*IsInitializer=*/true);
5275   // Store reductions address.
5276   LValue RedLVal = CGF.EmitLValueForField(
5277       Result.TDBase,
5278       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5279   if (Data.Reductions) {
5280     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5281   } else {
5282     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5283                                CGF.getContext().VoidPtrTy);
5284   }
5285   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5286   llvm::Value *TaskArgs[] = {
5287       UpLoc,
5288       ThreadID,
5289       Result.NewTask,
5290       IfVal,
5291       LBLVal.getPointer(CGF),
5292       UBLVal.getPointer(CGF),
5293       CGF.EmitLoadOfScalar(StLVal, Loc),
5294       llvm::ConstantInt::getSigned(
5295           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5296       llvm::ConstantInt::getSigned(
5297           CGF.IntTy, Data.Schedule.getPointer()
5298                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5299                          : NoSchedule),
5300       Data.Schedule.getPointer()
5301           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5302                                       /*isSigned=*/false)
5303           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5304       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5305                              Result.TaskDupFn, CGF.VoidPtrTy)
5306                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5307   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5308                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5309                       TaskArgs);
5310 }
5311 
5312 /// Emit reduction operation for each element of array (required for
5313 /// array sections) LHS op = RHS.
5314 /// \param Type Type of array.
5315 /// \param LHSVar Variable on the left side of the reduction operation
5316 /// (references element of array in original variable).
5317 /// \param RHSVar Variable on the right side of the reduction operation
5318 /// (references element of array in original variable).
5319 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5320 /// RHSVar.
5321 static void EmitOMPAggregateReduction(
5322     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5323     const VarDecl *RHSVar,
5324     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5325                                   const Expr *, const Expr *)> &RedOpGen,
5326     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5327     const Expr *UpExpr = nullptr) {
5328   // Perform element-by-element initialization.
5329   QualType ElementTy;
5330   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5331   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5332 
5333   // Drill down to the base element type on both arrays.
5334   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5335   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5336 
5337   llvm::Value *RHSBegin = RHSAddr.getPointer();
5338   llvm::Value *LHSBegin = LHSAddr.getPointer();
5339   // Cast from pointer to array type to pointer to single element.
5340   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5341   // The basic structure here is a while-do loop.
5342   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5343   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5344   llvm::Value *IsEmpty =
5345       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5346   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5347 
5348   // Enter the loop body, making that address the current address.
5349   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5350   CGF.EmitBlock(BodyBB);
5351 
5352   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5353 
5354   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5355       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5356   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5357   Address RHSElementCurrent =
5358       Address(RHSElementPHI,
5359               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5360 
5361   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5362       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5363   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5364   Address LHSElementCurrent =
5365       Address(LHSElementPHI,
5366               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5367 
5368   // Emit copy.
5369   CodeGenFunction::OMPPrivateScope Scope(CGF);
5370   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5371   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5372   Scope.Privatize();
5373   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5374   Scope.ForceCleanup();
5375 
5376   // Shift the address forward by one element.
5377   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5378       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5379   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5380       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5381   // Check whether we've reached the end.
5382   llvm::Value *Done =
5383       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5384   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5385   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5386   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5387 
5388   // Done.
5389   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5390 }
5391 
5392 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5393 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5394 /// UDR combiner function.
5395 static void emitReductionCombiner(CodeGenFunction &CGF,
5396                                   const Expr *ReductionOp) {
5397   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5398     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5399       if (const auto *DRE =
5400               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5401         if (const auto *DRD =
5402                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5403           std::pair<llvm::Function *, llvm::Function *> Reduction =
5404               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5405           RValue Func = RValue::get(Reduction.first);
5406           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5407           CGF.EmitIgnoredExpr(ReductionOp);
5408           return;
5409         }
5410   CGF.EmitIgnoredExpr(ReductionOp);
5411 }
5412 
5413 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5414     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5415     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5416     ArrayRef<const Expr *> ReductionOps) {
5417   ASTContext &C = CGM.getContext();
5418 
5419   // void reduction_func(void *LHSArg, void *RHSArg);
5420   FunctionArgList Args;
5421   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5422                            ImplicitParamDecl::Other);
5423   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5424                            ImplicitParamDecl::Other);
5425   Args.push_back(&LHSArg);
5426   Args.push_back(&RHSArg);
5427   const auto &CGFI =
5428       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5429   std::string Name = getName({"omp", "reduction", "reduction_func"});
5430   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5431                                     llvm::GlobalValue::InternalLinkage, Name,
5432                                     &CGM.getModule());
5433   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5434   Fn->setDoesNotRecurse();
5435   CodeGenFunction CGF(CGM);
5436   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5437 
5438   // Dst = (void*[n])(LHSArg);
5439   // Src = (void*[n])(RHSArg);
5440   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5441       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5442       ArgsType), CGF.getPointerAlign());
5443   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5444       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5445       ArgsType), CGF.getPointerAlign());
5446 
5447   //  ...
5448   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5449   //  ...
5450   CodeGenFunction::OMPPrivateScope Scope(CGF);
5451   auto IPriv = Privates.begin();
5452   unsigned Idx = 0;
5453   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5454     const auto *RHSVar =
5455         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5456     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5457       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5458     });
5459     const auto *LHSVar =
5460         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5461     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5462       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5463     });
5464     QualType PrivTy = (*IPriv)->getType();
5465     if (PrivTy->isVariablyModifiedType()) {
5466       // Get array size and emit VLA type.
5467       ++Idx;
5468       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5469       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5470       const VariableArrayType *VLA =
5471           CGF.getContext().getAsVariableArrayType(PrivTy);
5472       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5473       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5474           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5475       CGF.EmitVariablyModifiedType(PrivTy);
5476     }
5477   }
5478   Scope.Privatize();
5479   IPriv = Privates.begin();
5480   auto ILHS = LHSExprs.begin();
5481   auto IRHS = RHSExprs.begin();
5482   for (const Expr *E : ReductionOps) {
5483     if ((*IPriv)->getType()->isArrayType()) {
5484       // Emit reduction for array section.
5485       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5486       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5487       EmitOMPAggregateReduction(
5488           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5489           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5490             emitReductionCombiner(CGF, E);
5491           });
5492     } else {
5493       // Emit reduction for array subscript or single variable.
5494       emitReductionCombiner(CGF, E);
5495     }
5496     ++IPriv;
5497     ++ILHS;
5498     ++IRHS;
5499   }
5500   Scope.ForceCleanup();
5501   CGF.FinishFunction();
5502   return Fn;
5503 }
5504 
5505 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5506                                                   const Expr *ReductionOp,
5507                                                   const Expr *PrivateRef,
5508                                                   const DeclRefExpr *LHS,
5509                                                   const DeclRefExpr *RHS) {
5510   if (PrivateRef->getType()->isArrayType()) {
5511     // Emit reduction for array section.
5512     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5513     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5514     EmitOMPAggregateReduction(
5515         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5516         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5517           emitReductionCombiner(CGF, ReductionOp);
5518         });
5519   } else {
5520     // Emit reduction for array subscript or single variable.
5521     emitReductionCombiner(CGF, ReductionOp);
5522   }
5523 }
5524 
5525 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5526                                     ArrayRef<const Expr *> Privates,
5527                                     ArrayRef<const Expr *> LHSExprs,
5528                                     ArrayRef<const Expr *> RHSExprs,
5529                                     ArrayRef<const Expr *> ReductionOps,
5530                                     ReductionOptionsTy Options) {
5531   if (!CGF.HaveInsertPoint())
5532     return;
5533 
5534   bool WithNowait = Options.WithNowait;
5535   bool SimpleReduction = Options.SimpleReduction;
5536 
5537   // Next code should be emitted for reduction:
5538   //
5539   // static kmp_critical_name lock = { 0 };
5540   //
5541   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5542   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5543   //  ...
5544   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5545   //  *(Type<n>-1*)rhs[<n>-1]);
5546   // }
5547   //
5548   // ...
5549   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5550   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5551   // RedList, reduce_func, &<lock>)) {
5552   // case 1:
5553   //  ...
5554   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5555   //  ...
5556   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5557   // break;
5558   // case 2:
5559   //  ...
5560   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5561   //  ...
5562   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5563   // break;
5564   // default:;
5565   // }
5566   //
5567   // if SimpleReduction is true, only the next code is generated:
5568   //  ...
5569   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5570   //  ...
5571 
5572   ASTContext &C = CGM.getContext();
5573 
5574   if (SimpleReduction) {
5575     CodeGenFunction::RunCleanupsScope Scope(CGF);
5576     auto IPriv = Privates.begin();
5577     auto ILHS = LHSExprs.begin();
5578     auto IRHS = RHSExprs.begin();
5579     for (const Expr *E : ReductionOps) {
5580       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5581                                   cast<DeclRefExpr>(*IRHS));
5582       ++IPriv;
5583       ++ILHS;
5584       ++IRHS;
5585     }
5586     return;
5587   }
5588 
5589   // 1. Build a list of reduction variables.
5590   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5591   auto Size = RHSExprs.size();
5592   for (const Expr *E : Privates) {
5593     if (E->getType()->isVariablyModifiedType())
5594       // Reserve place for array size.
5595       ++Size;
5596   }
5597   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5598   QualType ReductionArrayTy =
5599       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5600                              /*IndexTypeQuals=*/0);
5601   Address ReductionList =
5602       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5603   auto IPriv = Privates.begin();
5604   unsigned Idx = 0;
5605   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5606     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5607     CGF.Builder.CreateStore(
5608         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5609             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5610         Elem);
5611     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5612       // Store array size.
5613       ++Idx;
5614       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5615       llvm::Value *Size = CGF.Builder.CreateIntCast(
5616           CGF.getVLASize(
5617                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5618               .NumElts,
5619           CGF.SizeTy, /*isSigned=*/false);
5620       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5621                               Elem);
5622     }
5623   }
5624 
5625   // 2. Emit reduce_func().
5626   llvm::Function *ReductionFn = emitReductionFunction(
5627       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5628       LHSExprs, RHSExprs, ReductionOps);
5629 
5630   // 3. Create static kmp_critical_name lock = { 0 };
5631   std::string Name = getName({"reduction"});
5632   llvm::Value *Lock = getCriticalRegionLock(Name);
5633 
5634   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5635   // RedList, reduce_func, &<lock>);
5636   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5637   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5638   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5639   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5640       ReductionList.getPointer(), CGF.VoidPtrTy);
5641   llvm::Value *Args[] = {
5642       IdentTLoc,                             // ident_t *<loc>
5643       ThreadId,                              // i32 <gtid>
5644       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5645       ReductionArrayTySize,                  // size_type sizeof(RedList)
5646       RL,                                    // void *RedList
5647       ReductionFn, // void (*) (void *, void *) <reduce_func>
5648       Lock         // kmp_critical_name *&<lock>
5649   };
5650   llvm::Value *Res = CGF.EmitRuntimeCall(
5651       OMPBuilder.getOrCreateRuntimeFunction(
5652           CGM.getModule(),
5653           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5654       Args);
5655 
5656   // 5. Build switch(res)
5657   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5658   llvm::SwitchInst *SwInst =
5659       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5660 
5661   // 6. Build case 1:
5662   //  ...
5663   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5664   //  ...
5665   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5666   // break;
5667   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5668   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5669   CGF.EmitBlock(Case1BB);
5670 
5671   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5672   llvm::Value *EndArgs[] = {
5673       IdentTLoc, // ident_t *<loc>
5674       ThreadId,  // i32 <gtid>
5675       Lock       // kmp_critical_name *&<lock>
5676   };
5677   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5678                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5679     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5680     auto IPriv = Privates.begin();
5681     auto ILHS = LHSExprs.begin();
5682     auto IRHS = RHSExprs.begin();
5683     for (const Expr *E : ReductionOps) {
5684       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5685                                      cast<DeclRefExpr>(*IRHS));
5686       ++IPriv;
5687       ++ILHS;
5688       ++IRHS;
5689     }
5690   };
5691   RegionCodeGenTy RCG(CodeGen);
5692   CommonActionTy Action(
5693       nullptr, llvm::None,
5694       OMPBuilder.getOrCreateRuntimeFunction(
5695           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5696                                       : OMPRTL___kmpc_end_reduce),
5697       EndArgs);
5698   RCG.setAction(Action);
5699   RCG(CGF);
5700 
5701   CGF.EmitBranch(DefaultBB);
5702 
5703   // 7. Build case 2:
5704   //  ...
5705   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5706   //  ...
5707   // break;
5708   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5709   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5710   CGF.EmitBlock(Case2BB);
5711 
5712   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5713                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5714     auto ILHS = LHSExprs.begin();
5715     auto IRHS = RHSExprs.begin();
5716     auto IPriv = Privates.begin();
5717     for (const Expr *E : ReductionOps) {
5718       const Expr *XExpr = nullptr;
5719       const Expr *EExpr = nullptr;
5720       const Expr *UpExpr = nullptr;
5721       BinaryOperatorKind BO = BO_Comma;
5722       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5723         if (BO->getOpcode() == BO_Assign) {
5724           XExpr = BO->getLHS();
5725           UpExpr = BO->getRHS();
5726         }
5727       }
5728       // Try to emit update expression as a simple atomic.
5729       const Expr *RHSExpr = UpExpr;
5730       if (RHSExpr) {
5731         // Analyze RHS part of the whole expression.
5732         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5733                 RHSExpr->IgnoreParenImpCasts())) {
5734           // If this is a conditional operator, analyze its condition for
5735           // min/max reduction operator.
5736           RHSExpr = ACO->getCond();
5737         }
5738         if (const auto *BORHS =
5739                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5740           EExpr = BORHS->getRHS();
5741           BO = BORHS->getOpcode();
5742         }
5743       }
5744       if (XExpr) {
5745         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5746         auto &&AtomicRedGen = [BO, VD,
5747                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5748                                     const Expr *EExpr, const Expr *UpExpr) {
5749           LValue X = CGF.EmitLValue(XExpr);
5750           RValue E;
5751           if (EExpr)
5752             E = CGF.EmitAnyExpr(EExpr);
5753           CGF.EmitOMPAtomicSimpleUpdateExpr(
5754               X, E, BO, /*IsXLHSInRHSPart=*/true,
5755               llvm::AtomicOrdering::Monotonic, Loc,
5756               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5757                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5758                 PrivateScope.addPrivate(
5759                     VD, [&CGF, VD, XRValue, Loc]() {
5760                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5761                       CGF.emitOMPSimpleStore(
5762                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5763                           VD->getType().getNonReferenceType(), Loc);
5764                       return LHSTemp;
5765                     });
5766                 (void)PrivateScope.Privatize();
5767                 return CGF.EmitAnyExpr(UpExpr);
5768               });
5769         };
5770         if ((*IPriv)->getType()->isArrayType()) {
5771           // Emit atomic reduction for array section.
5772           const auto *RHSVar =
5773               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5774           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5775                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5776         } else {
5777           // Emit atomic reduction for array subscript or single variable.
5778           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5779         }
5780       } else {
5781         // Emit as a critical region.
5782         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5783                                            const Expr *, const Expr *) {
5784           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5785           std::string Name = RT.getName({"atomic_reduction"});
5786           RT.emitCriticalRegion(
5787               CGF, Name,
5788               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5789                 Action.Enter(CGF);
5790                 emitReductionCombiner(CGF, E);
5791               },
5792               Loc);
5793         };
5794         if ((*IPriv)->getType()->isArrayType()) {
5795           const auto *LHSVar =
5796               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5797           const auto *RHSVar =
5798               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5799           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5800                                     CritRedGen);
5801         } else {
5802           CritRedGen(CGF, nullptr, nullptr, nullptr);
5803         }
5804       }
5805       ++ILHS;
5806       ++IRHS;
5807       ++IPriv;
5808     }
5809   };
5810   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5811   if (!WithNowait) {
5812     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5813     llvm::Value *EndArgs[] = {
5814         IdentTLoc, // ident_t *<loc>
5815         ThreadId,  // i32 <gtid>
5816         Lock       // kmp_critical_name *&<lock>
5817     };
5818     CommonActionTy Action(nullptr, llvm::None,
5819                           OMPBuilder.getOrCreateRuntimeFunction(
5820                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5821                           EndArgs);
5822     AtomicRCG.setAction(Action);
5823     AtomicRCG(CGF);
5824   } else {
5825     AtomicRCG(CGF);
5826   }
5827 
5828   CGF.EmitBranch(DefaultBB);
5829   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5830 }
5831 
5832 /// Generates unique name for artificial threadprivate variables.
5833 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5834 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5835                                       const Expr *Ref) {
5836   SmallString<256> Buffer;
5837   llvm::raw_svector_ostream Out(Buffer);
5838   const clang::DeclRefExpr *DE;
5839   const VarDecl *D = ::getBaseDecl(Ref, DE);
5840   if (!D)
5841     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5842   D = D->getCanonicalDecl();
5843   std::string Name = CGM.getOpenMPRuntime().getName(
5844       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5845   Out << Prefix << Name << "_"
5846       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5847   return std::string(Out.str());
5848 }
5849 
5850 /// Emits reduction initializer function:
5851 /// \code
5852 /// void @.red_init(void* %arg, void* %orig) {
5853 /// %0 = bitcast void* %arg to <type>*
5854 /// store <type> <init>, <type>* %0
5855 /// ret void
5856 /// }
5857 /// \endcode
5858 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5859                                            SourceLocation Loc,
5860                                            ReductionCodeGen &RCG, unsigned N) {
5861   ASTContext &C = CGM.getContext();
5862   QualType VoidPtrTy = C.VoidPtrTy;
5863   VoidPtrTy.addRestrict();
5864   FunctionArgList Args;
5865   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5866                           ImplicitParamDecl::Other);
5867   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5868                               ImplicitParamDecl::Other);
5869   Args.emplace_back(&Param);
5870   Args.emplace_back(&ParamOrig);
5871   const auto &FnInfo =
5872       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5873   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5874   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5875   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5876                                     Name, &CGM.getModule());
5877   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5878   Fn->setDoesNotRecurse();
5879   CodeGenFunction CGF(CGM);
5880   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5881   Address PrivateAddr = CGF.EmitLoadOfPointer(
5882       CGF.GetAddrOfLocalVar(&Param),
5883       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5884   llvm::Value *Size = nullptr;
5885   // If the size of the reduction item is non-constant, load it from global
5886   // threadprivate variable.
5887   if (RCG.getSizes(N).second) {
5888     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5889         CGF, CGM.getContext().getSizeType(),
5890         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5891     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5892                                 CGM.getContext().getSizeType(), Loc);
5893   }
5894   RCG.emitAggregateType(CGF, N, Size);
5895   LValue OrigLVal;
5896   // If initializer uses initializer from declare reduction construct, emit a
5897   // pointer to the address of the original reduction item (reuired by reduction
5898   // initializer)
5899   if (RCG.usesReductionInitializer(N)) {
5900     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5901     SharedAddr = CGF.EmitLoadOfPointer(
5902         SharedAddr,
5903         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5904     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5905   } else {
5906     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5907         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5908         CGM.getContext().VoidPtrTy);
5909   }
5910   // Emit the initializer:
5911   // %0 = bitcast void* %arg to <type>*
5912   // store <type> <init>, <type>* %0
5913   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5914                          [](CodeGenFunction &) { return false; });
5915   CGF.FinishFunction();
5916   return Fn;
5917 }
5918 
5919 /// Emits reduction combiner function:
5920 /// \code
5921 /// void @.red_comb(void* %arg0, void* %arg1) {
5922 /// %lhs = bitcast void* %arg0 to <type>*
5923 /// %rhs = bitcast void* %arg1 to <type>*
5924 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5925 /// store <type> %2, <type>* %lhs
5926 /// ret void
5927 /// }
5928 /// \endcode
5929 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5930                                            SourceLocation Loc,
5931                                            ReductionCodeGen &RCG, unsigned N,
5932                                            const Expr *ReductionOp,
5933                                            const Expr *LHS, const Expr *RHS,
5934                                            const Expr *PrivateRef) {
5935   ASTContext &C = CGM.getContext();
5936   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5937   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5938   FunctionArgList Args;
5939   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5940                                C.VoidPtrTy, ImplicitParamDecl::Other);
5941   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5942                             ImplicitParamDecl::Other);
5943   Args.emplace_back(&ParamInOut);
5944   Args.emplace_back(&ParamIn);
5945   const auto &FnInfo =
5946       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5947   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5948   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5949   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5950                                     Name, &CGM.getModule());
5951   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5952   Fn->setDoesNotRecurse();
5953   CodeGenFunction CGF(CGM);
5954   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5955   llvm::Value *Size = nullptr;
5956   // If the size of the reduction item is non-constant, load it from global
5957   // threadprivate variable.
5958   if (RCG.getSizes(N).second) {
5959     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5960         CGF, CGM.getContext().getSizeType(),
5961         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5962     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5963                                 CGM.getContext().getSizeType(), Loc);
5964   }
5965   RCG.emitAggregateType(CGF, N, Size);
5966   // Remap lhs and rhs variables to the addresses of the function arguments.
5967   // %lhs = bitcast void* %arg0 to <type>*
5968   // %rhs = bitcast void* %arg1 to <type>*
5969   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5970   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5971     // Pull out the pointer to the variable.
5972     Address PtrAddr = CGF.EmitLoadOfPointer(
5973         CGF.GetAddrOfLocalVar(&ParamInOut),
5974         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5975     return CGF.Builder.CreateElementBitCast(
5976         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5977   });
5978   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5979     // Pull out the pointer to the variable.
5980     Address PtrAddr = CGF.EmitLoadOfPointer(
5981         CGF.GetAddrOfLocalVar(&ParamIn),
5982         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5983     return CGF.Builder.CreateElementBitCast(
5984         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5985   });
5986   PrivateScope.Privatize();
5987   // Emit the combiner body:
5988   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5989   // store <type> %2, <type>* %lhs
5990   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5991       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5992       cast<DeclRefExpr>(RHS));
5993   CGF.FinishFunction();
5994   return Fn;
5995 }
5996 
5997 /// Emits reduction finalizer function:
5998 /// \code
5999 /// void @.red_fini(void* %arg) {
6000 /// %0 = bitcast void* %arg to <type>*
6001 /// <destroy>(<type>* %0)
6002 /// ret void
6003 /// }
6004 /// \endcode
6005 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6006                                            SourceLocation Loc,
6007                                            ReductionCodeGen &RCG, unsigned N) {
6008   if (!RCG.needCleanups(N))
6009     return nullptr;
6010   ASTContext &C = CGM.getContext();
6011   FunctionArgList Args;
6012   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6013                           ImplicitParamDecl::Other);
6014   Args.emplace_back(&Param);
6015   const auto &FnInfo =
6016       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6017   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6018   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6019   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6020                                     Name, &CGM.getModule());
6021   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6022   Fn->setDoesNotRecurse();
6023   CodeGenFunction CGF(CGM);
6024   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6025   Address PrivateAddr = CGF.EmitLoadOfPointer(
6026       CGF.GetAddrOfLocalVar(&Param),
6027       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6028   llvm::Value *Size = nullptr;
6029   // If the size of the reduction item is non-constant, load it from global
6030   // threadprivate variable.
6031   if (RCG.getSizes(N).second) {
6032     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6033         CGF, CGM.getContext().getSizeType(),
6034         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6035     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6036                                 CGM.getContext().getSizeType(), Loc);
6037   }
6038   RCG.emitAggregateType(CGF, N, Size);
6039   // Emit the finalizer body:
6040   // <destroy>(<type>* %0)
6041   RCG.emitCleanups(CGF, N, PrivateAddr);
6042   CGF.FinishFunction(Loc);
6043   return Fn;
6044 }
6045 
6046 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6047     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6048     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6049   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6050     return nullptr;
6051 
6052   // Build typedef struct:
6053   // kmp_taskred_input {
6054   //   void *reduce_shar; // shared reduction item
6055   //   void *reduce_orig; // original reduction item used for initialization
6056   //   size_t reduce_size; // size of data item
6057   //   void *reduce_init; // data initialization routine
6058   //   void *reduce_fini; // data finalization routine
6059   //   void *reduce_comb; // data combiner routine
6060   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6061   // } kmp_taskred_input_t;
6062   ASTContext &C = CGM.getContext();
6063   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6064   RD->startDefinition();
6065   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6066   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6067   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6068   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6069   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6070   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6071   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6072       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6073   RD->completeDefinition();
6074   QualType RDType = C.getRecordType(RD);
6075   unsigned Size = Data.ReductionVars.size();
6076   llvm::APInt ArraySize(/*numBits=*/64, Size);
6077   QualType ArrayRDType = C.getConstantArrayType(
6078       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6079   // kmp_task_red_input_t .rd_input.[Size];
6080   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6081   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6082                        Data.ReductionCopies, Data.ReductionOps);
6083   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6084     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6085     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6086                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6087     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6088         TaskRedInput.getPointer(), Idxs,
6089         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6090         ".rd_input.gep.");
6091     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6092     // ElemLVal.reduce_shar = &Shareds[Cnt];
6093     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6094     RCG.emitSharedOrigLValue(CGF, Cnt);
6095     llvm::Value *CastedShared =
6096         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6097     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6098     // ElemLVal.reduce_orig = &Origs[Cnt];
6099     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6100     llvm::Value *CastedOrig =
6101         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6102     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6103     RCG.emitAggregateType(CGF, Cnt);
6104     llvm::Value *SizeValInChars;
6105     llvm::Value *SizeVal;
6106     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6107     // We use delayed creation/initialization for VLAs and array sections. It is
6108     // required because runtime does not provide the way to pass the sizes of
6109     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6110     // threadprivate global variables are used to store these values and use
6111     // them in the functions.
6112     bool DelayedCreation = !!SizeVal;
6113     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6114                                                /*isSigned=*/false);
6115     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6116     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6117     // ElemLVal.reduce_init = init;
6118     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6119     llvm::Value *InitAddr =
6120         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6121     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6122     // ElemLVal.reduce_fini = fini;
6123     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6124     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6125     llvm::Value *FiniAddr = Fini
6126                                 ? CGF.EmitCastToVoidPtr(Fini)
6127                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6128     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6129     // ElemLVal.reduce_comb = comb;
6130     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6131     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6132         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6133         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6134     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6135     // ElemLVal.flags = 0;
6136     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6137     if (DelayedCreation) {
6138       CGF.EmitStoreOfScalar(
6139           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6140           FlagsLVal);
6141     } else
6142       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6143                                  FlagsLVal.getType());
6144   }
6145   if (Data.IsReductionWithTaskMod) {
6146     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6147     // is_ws, int num, void *data);
6148     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6149     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6150                                                   CGM.IntTy, /*isSigned=*/true);
6151     llvm::Value *Args[] = {
6152         IdentTLoc, GTid,
6153         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6154                                /*isSigned=*/true),
6155         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6156         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6157             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6158     return CGF.EmitRuntimeCall(
6159         OMPBuilder.getOrCreateRuntimeFunction(
6160             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6161         Args);
6162   }
6163   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6164   llvm::Value *Args[] = {
6165       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6166                                 /*isSigned=*/true),
6167       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6168       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6169                                                       CGM.VoidPtrTy)};
6170   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6171                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6172                              Args);
6173 }
6174 
6175 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6176                                             SourceLocation Loc,
6177                                             bool IsWorksharingReduction) {
6178   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6179   // is_ws, int num, void *data);
6180   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6181   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6182                                                 CGM.IntTy, /*isSigned=*/true);
6183   llvm::Value *Args[] = {IdentTLoc, GTid,
6184                          llvm::ConstantInt::get(CGM.IntTy,
6185                                                 IsWorksharingReduction ? 1 : 0,
6186                                                 /*isSigned=*/true)};
6187   (void)CGF.EmitRuntimeCall(
6188       OMPBuilder.getOrCreateRuntimeFunction(
6189           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6190       Args);
6191 }
6192 
6193 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6194                                               SourceLocation Loc,
6195                                               ReductionCodeGen &RCG,
6196                                               unsigned N) {
6197   auto Sizes = RCG.getSizes(N);
6198   // Emit threadprivate global variable if the type is non-constant
6199   // (Sizes.second = nullptr).
6200   if (Sizes.second) {
6201     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6202                                                      /*isSigned=*/false);
6203     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6204         CGF, CGM.getContext().getSizeType(),
6205         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6206     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6207   }
6208 }
6209 
6210 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6211                                               SourceLocation Loc,
6212                                               llvm::Value *ReductionsPtr,
6213                                               LValue SharedLVal) {
6214   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6215   // *d);
6216   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6217                                                    CGM.IntTy,
6218                                                    /*isSigned=*/true),
6219                          ReductionsPtr,
6220                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6221                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6222   return Address(
6223       CGF.EmitRuntimeCall(
6224           OMPBuilder.getOrCreateRuntimeFunction(
6225               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6226           Args),
6227       SharedLVal.getAlignment());
6228 }
6229 
6230 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6231                                        SourceLocation Loc) {
6232   if (!CGF.HaveInsertPoint())
6233     return;
6234 
6235   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6236     OMPBuilder.createTaskwait(CGF.Builder);
6237   } else {
6238     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6239     // global_tid);
6240     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6241     // Ignore return result until untied tasks are supported.
6242     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6243                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6244                         Args);
6245   }
6246 
6247   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6248     Region->emitUntiedSwitch(CGF);
6249 }
6250 
6251 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6252                                            OpenMPDirectiveKind InnerKind,
6253                                            const RegionCodeGenTy &CodeGen,
6254                                            bool HasCancel) {
6255   if (!CGF.HaveInsertPoint())
6256     return;
6257   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6258                                  InnerKind != OMPD_critical &&
6259                                      InnerKind != OMPD_master &&
6260                                      InnerKind != OMPD_masked);
6261   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6262 }
6263 
6264 namespace {
6265 enum RTCancelKind {
6266   CancelNoreq = 0,
6267   CancelParallel = 1,
6268   CancelLoop = 2,
6269   CancelSections = 3,
6270   CancelTaskgroup = 4
6271 };
6272 } // anonymous namespace
6273 
6274 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6275   RTCancelKind CancelKind = CancelNoreq;
6276   if (CancelRegion == OMPD_parallel)
6277     CancelKind = CancelParallel;
6278   else if (CancelRegion == OMPD_for)
6279     CancelKind = CancelLoop;
6280   else if (CancelRegion == OMPD_sections)
6281     CancelKind = CancelSections;
6282   else {
6283     assert(CancelRegion == OMPD_taskgroup);
6284     CancelKind = CancelTaskgroup;
6285   }
6286   return CancelKind;
6287 }
6288 
6289 void CGOpenMPRuntime::emitCancellationPointCall(
6290     CodeGenFunction &CGF, SourceLocation Loc,
6291     OpenMPDirectiveKind CancelRegion) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6295   // global_tid, kmp_int32 cncl_kind);
6296   if (auto *OMPRegionInfo =
6297           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6298     // For 'cancellation point taskgroup', the task region info may not have a
6299     // cancel. This may instead happen in another adjacent task.
6300     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6301       llvm::Value *Args[] = {
6302           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6303           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6304       // Ignore return result until untied tasks are supported.
6305       llvm::Value *Result = CGF.EmitRuntimeCall(
6306           OMPBuilder.getOrCreateRuntimeFunction(
6307               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6308           Args);
6309       // if (__kmpc_cancellationpoint()) {
6310       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6311       //   exit from construct;
6312       // }
6313       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6314       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6315       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6316       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6317       CGF.EmitBlock(ExitBB);
6318       if (CancelRegion == OMPD_parallel)
6319         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6320       // exit from construct;
6321       CodeGenFunction::JumpDest CancelDest =
6322           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6323       CGF.EmitBranchThroughCleanup(CancelDest);
6324       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6325     }
6326   }
6327 }
6328 
6329 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6330                                      const Expr *IfCond,
6331                                      OpenMPDirectiveKind CancelRegion) {
6332   if (!CGF.HaveInsertPoint())
6333     return;
6334   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6335   // kmp_int32 cncl_kind);
6336   auto &M = CGM.getModule();
6337   if (auto *OMPRegionInfo =
6338           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6339     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6340                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6341       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6342       llvm::Value *Args[] = {
6343           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6344           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6345       // Ignore return result until untied tasks are supported.
6346       llvm::Value *Result = CGF.EmitRuntimeCall(
6347           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6348       // if (__kmpc_cancel()) {
6349       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6350       //   exit from construct;
6351       // }
6352       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6353       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6354       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6355       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6356       CGF.EmitBlock(ExitBB);
6357       if (CancelRegion == OMPD_parallel)
6358         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6359       // exit from construct;
6360       CodeGenFunction::JumpDest CancelDest =
6361           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6362       CGF.EmitBranchThroughCleanup(CancelDest);
6363       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6364     };
6365     if (IfCond) {
6366       emitIfClause(CGF, IfCond, ThenGen,
6367                    [](CodeGenFunction &, PrePostActionTy &) {});
6368     } else {
6369       RegionCodeGenTy ThenRCG(ThenGen);
6370       ThenRCG(CGF);
6371     }
6372   }
6373 }
6374 
6375 namespace {
6376 /// Cleanup action for uses_allocators support.
6377 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6378   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6379 
6380 public:
6381   OMPUsesAllocatorsActionTy(
6382       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6383       : Allocators(Allocators) {}
6384   void Enter(CodeGenFunction &CGF) override {
6385     if (!CGF.HaveInsertPoint())
6386       return;
6387     for (const auto &AllocatorData : Allocators) {
6388       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6389           CGF, AllocatorData.first, AllocatorData.second);
6390     }
6391   }
6392   void Exit(CodeGenFunction &CGF) override {
6393     if (!CGF.HaveInsertPoint())
6394       return;
6395     for (const auto &AllocatorData : Allocators) {
6396       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6397                                                         AllocatorData.first);
6398     }
6399   }
6400 };
6401 } // namespace
6402 
6403 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6404     const OMPExecutableDirective &D, StringRef ParentName,
6405     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6406     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6407   assert(!ParentName.empty() && "Invalid target region parent name!");
6408   HasEmittedTargetRegion = true;
6409   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6410   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6411     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6412       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6413       if (!D.AllocatorTraits)
6414         continue;
6415       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6416     }
6417   }
6418   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6419   CodeGen.setAction(UsesAllocatorAction);
6420   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6421                                    IsOffloadEntry, CodeGen);
6422 }
6423 
6424 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6425                                              const Expr *Allocator,
6426                                              const Expr *AllocatorTraits) {
6427   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6428   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6429   // Use default memspace handle.
6430   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6431   llvm::Value *NumTraits = llvm::ConstantInt::get(
6432       CGF.IntTy, cast<ConstantArrayType>(
6433                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6434                      ->getSize()
6435                      .getLimitedValue());
6436   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6437   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6438       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6439   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6440                                            AllocatorTraitsLVal.getBaseInfo(),
6441                                            AllocatorTraitsLVal.getTBAAInfo());
6442   llvm::Value *Traits =
6443       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6444 
6445   llvm::Value *AllocatorVal =
6446       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6447                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6448                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6449   // Store to allocator.
6450   CGF.EmitVarDecl(*cast<VarDecl>(
6451       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6452   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6453   AllocatorVal =
6454       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6455                                Allocator->getType(), Allocator->getExprLoc());
6456   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6457 }
6458 
6459 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6460                                              const Expr *Allocator) {
6461   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6462   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6463   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6464   llvm::Value *AllocatorVal =
6465       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6466   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6467                                           CGF.getContext().VoidPtrTy,
6468                                           Allocator->getExprLoc());
6469   (void)CGF.EmitRuntimeCall(
6470       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6471                                             OMPRTL___kmpc_destroy_allocator),
6472       {ThreadId, AllocatorVal});
6473 }
6474 
6475 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6476     const OMPExecutableDirective &D, StringRef ParentName,
6477     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6478     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6479   // Create a unique name for the entry function using the source location
6480   // information of the current target region. The name will be something like:
6481   //
6482   // __omp_offloading_DD_FFFF_PP_lBB
6483   //
6484   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6485   // mangled name of the function that encloses the target region and BB is the
6486   // line number of the target region.
6487 
6488   unsigned DeviceID;
6489   unsigned FileID;
6490   unsigned Line;
6491   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6492                            Line);
6493   SmallString<64> EntryFnName;
6494   {
6495     llvm::raw_svector_ostream OS(EntryFnName);
6496     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6497        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6498   }
6499 
6500   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6501 
6502   CodeGenFunction CGF(CGM, true);
6503   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6504   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6505 
6506   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6507 
6508   // If this target outline function is not an offload entry, we don't need to
6509   // register it.
6510   if (!IsOffloadEntry)
6511     return;
6512 
6513   // The target region ID is used by the runtime library to identify the current
6514   // target region, so it only has to be unique and not necessarily point to
6515   // anything. It could be the pointer to the outlined function that implements
6516   // the target region, but we aren't using that so that the compiler doesn't
6517   // need to keep that, and could therefore inline the host function if proven
6518   // worthwhile during optimization. In the other hand, if emitting code for the
6519   // device, the ID has to be the function address so that it can retrieved from
6520   // the offloading entry and launched by the runtime library. We also mark the
6521   // outlined function to have external linkage in case we are emitting code for
6522   // the device, because these functions will be entry points to the device.
6523 
6524   if (CGM.getLangOpts().OpenMPIsDevice) {
6525     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6526     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6527     OutlinedFn->setDSOLocal(false);
6528     if (CGM.getTriple().isAMDGCN())
6529       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6530   } else {
6531     std::string Name = getName({EntryFnName, "region_id"});
6532     OutlinedFnID = new llvm::GlobalVariable(
6533         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6534         llvm::GlobalValue::WeakAnyLinkage,
6535         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6536   }
6537 
6538   // Register the information for the entry associated with this target region.
6539   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6540       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6541       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6542 }
6543 
6544 /// Checks if the expression is constant or does not have non-trivial function
6545 /// calls.
6546 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6547   // We can skip constant expressions.
6548   // We can skip expressions with trivial calls or simple expressions.
6549   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6550           !E->hasNonTrivialCall(Ctx)) &&
6551          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6552 }
6553 
6554 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6555                                                     const Stmt *Body) {
6556   const Stmt *Child = Body->IgnoreContainers();
6557   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6558     Child = nullptr;
6559     for (const Stmt *S : C->body()) {
6560       if (const auto *E = dyn_cast<Expr>(S)) {
6561         if (isTrivial(Ctx, E))
6562           continue;
6563       }
6564       // Some of the statements can be ignored.
6565       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6566           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6567         continue;
6568       // Analyze declarations.
6569       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6570         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6571               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6572                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6573                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6574                   isa<UsingDirectiveDecl>(D) ||
6575                   isa<OMPDeclareReductionDecl>(D) ||
6576                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6577                 return true;
6578               const auto *VD = dyn_cast<VarDecl>(D);
6579               if (!VD)
6580                 return false;
6581               return VD->hasGlobalStorage() || !VD->isUsed();
6582             }))
6583           continue;
6584       }
6585       // Found multiple children - cannot get the one child only.
6586       if (Child)
6587         return nullptr;
6588       Child = S;
6589     }
6590     if (Child)
6591       Child = Child->IgnoreContainers();
6592   }
6593   return Child;
6594 }
6595 
6596 /// Emit the number of teams for a target directive.  Inspect the num_teams
6597 /// clause associated with a teams construct combined or closely nested
6598 /// with the target directive.
6599 ///
6600 /// Emit a team of size one for directives such as 'target parallel' that
6601 /// have no associated teams construct.
6602 ///
6603 /// Otherwise, return nullptr.
6604 static llvm::Value *
6605 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6606                                const OMPExecutableDirective &D) {
6607   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6608          "Clauses associated with the teams directive expected to be emitted "
6609          "only for the host!");
6610   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6611   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6612          "Expected target-based executable directive.");
6613   CGBuilderTy &Bld = CGF.Builder;
6614   switch (DirectiveKind) {
6615   case OMPD_target: {
6616     const auto *CS = D.getInnermostCapturedStmt();
6617     const auto *Body =
6618         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6619     const Stmt *ChildStmt =
6620         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6621     if (const auto *NestedDir =
6622             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6623       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6624         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6625           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6626           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6627           const Expr *NumTeams =
6628               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6629           llvm::Value *NumTeamsVal =
6630               CGF.EmitScalarExpr(NumTeams,
6631                                  /*IgnoreResultAssign*/ true);
6632           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6633                                    /*isSigned=*/true);
6634         }
6635         return Bld.getInt32(0);
6636       }
6637       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6638           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6639         return Bld.getInt32(1);
6640       return Bld.getInt32(0);
6641     }
6642     return nullptr;
6643   }
6644   case OMPD_target_teams:
6645   case OMPD_target_teams_distribute:
6646   case OMPD_target_teams_distribute_simd:
6647   case OMPD_target_teams_distribute_parallel_for:
6648   case OMPD_target_teams_distribute_parallel_for_simd: {
6649     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6650       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6651       const Expr *NumTeams =
6652           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6653       llvm::Value *NumTeamsVal =
6654           CGF.EmitScalarExpr(NumTeams,
6655                              /*IgnoreResultAssign*/ true);
6656       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6657                                /*isSigned=*/true);
6658     }
6659     return Bld.getInt32(0);
6660   }
6661   case OMPD_target_parallel:
6662   case OMPD_target_parallel_for:
6663   case OMPD_target_parallel_for_simd:
6664   case OMPD_target_simd:
6665     return Bld.getInt32(1);
6666   case OMPD_parallel:
6667   case OMPD_for:
6668   case OMPD_parallel_for:
6669   case OMPD_parallel_master:
6670   case OMPD_parallel_sections:
6671   case OMPD_for_simd:
6672   case OMPD_parallel_for_simd:
6673   case OMPD_cancel:
6674   case OMPD_cancellation_point:
6675   case OMPD_ordered:
6676   case OMPD_threadprivate:
6677   case OMPD_allocate:
6678   case OMPD_task:
6679   case OMPD_simd:
6680   case OMPD_tile:
6681   case OMPD_unroll:
6682   case OMPD_sections:
6683   case OMPD_section:
6684   case OMPD_single:
6685   case OMPD_master:
6686   case OMPD_critical:
6687   case OMPD_taskyield:
6688   case OMPD_barrier:
6689   case OMPD_taskwait:
6690   case OMPD_taskgroup:
6691   case OMPD_atomic:
6692   case OMPD_flush:
6693   case OMPD_depobj:
6694   case OMPD_scan:
6695   case OMPD_teams:
6696   case OMPD_target_data:
6697   case OMPD_target_exit_data:
6698   case OMPD_target_enter_data:
6699   case OMPD_distribute:
6700   case OMPD_distribute_simd:
6701   case OMPD_distribute_parallel_for:
6702   case OMPD_distribute_parallel_for_simd:
6703   case OMPD_teams_distribute:
6704   case OMPD_teams_distribute_simd:
6705   case OMPD_teams_distribute_parallel_for:
6706   case OMPD_teams_distribute_parallel_for_simd:
6707   case OMPD_target_update:
6708   case OMPD_declare_simd:
6709   case OMPD_declare_variant:
6710   case OMPD_begin_declare_variant:
6711   case OMPD_end_declare_variant:
6712   case OMPD_declare_target:
6713   case OMPD_end_declare_target:
6714   case OMPD_declare_reduction:
6715   case OMPD_declare_mapper:
6716   case OMPD_taskloop:
6717   case OMPD_taskloop_simd:
6718   case OMPD_master_taskloop:
6719   case OMPD_master_taskloop_simd:
6720   case OMPD_parallel_master_taskloop:
6721   case OMPD_parallel_master_taskloop_simd:
6722   case OMPD_requires:
6723   case OMPD_unknown:
6724     break;
6725   default:
6726     break;
6727   }
6728   llvm_unreachable("Unexpected directive kind.");
6729 }
6730 
6731 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6732                                   llvm::Value *DefaultThreadLimitVal) {
6733   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6734       CGF.getContext(), CS->getCapturedStmt());
6735   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6736     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6737       llvm::Value *NumThreads = nullptr;
6738       llvm::Value *CondVal = nullptr;
6739       // Handle if clause. If if clause present, the number of threads is
6740       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6741       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6742         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6743         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6744         const OMPIfClause *IfClause = nullptr;
6745         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6746           if (C->getNameModifier() == OMPD_unknown ||
6747               C->getNameModifier() == OMPD_parallel) {
6748             IfClause = C;
6749             break;
6750           }
6751         }
6752         if (IfClause) {
6753           const Expr *Cond = IfClause->getCondition();
6754           bool Result;
6755           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6756             if (!Result)
6757               return CGF.Builder.getInt32(1);
6758           } else {
6759             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6760             if (const auto *PreInit =
6761                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6762               for (const auto *I : PreInit->decls()) {
6763                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6764                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6765                 } else {
6766                   CodeGenFunction::AutoVarEmission Emission =
6767                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6768                   CGF.EmitAutoVarCleanups(Emission);
6769                 }
6770               }
6771             }
6772             CondVal = CGF.EvaluateExprAsBool(Cond);
6773           }
6774         }
6775       }
6776       // Check the value of num_threads clause iff if clause was not specified
6777       // or is not evaluated to false.
6778       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6779         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6780         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6781         const auto *NumThreadsClause =
6782             Dir->getSingleClause<OMPNumThreadsClause>();
6783         CodeGenFunction::LexicalScope Scope(
6784             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6785         if (const auto *PreInit =
6786                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6787           for (const auto *I : PreInit->decls()) {
6788             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6789               CGF.EmitVarDecl(cast<VarDecl>(*I));
6790             } else {
6791               CodeGenFunction::AutoVarEmission Emission =
6792                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6793               CGF.EmitAutoVarCleanups(Emission);
6794             }
6795           }
6796         }
6797         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6798         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6799                                                /*isSigned=*/false);
6800         if (DefaultThreadLimitVal)
6801           NumThreads = CGF.Builder.CreateSelect(
6802               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6803               DefaultThreadLimitVal, NumThreads);
6804       } else {
6805         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6806                                            : CGF.Builder.getInt32(0);
6807       }
6808       // Process condition of the if clause.
6809       if (CondVal) {
6810         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6811                                               CGF.Builder.getInt32(1));
6812       }
6813       return NumThreads;
6814     }
6815     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6816       return CGF.Builder.getInt32(1);
6817     return DefaultThreadLimitVal;
6818   }
6819   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6820                                : CGF.Builder.getInt32(0);
6821 }
6822 
6823 /// Emit the number of threads for a target directive.  Inspect the
6824 /// thread_limit clause associated with a teams construct combined or closely
6825 /// nested with the target directive.
6826 ///
6827 /// Emit the num_threads clause for directives such as 'target parallel' that
6828 /// have no associated teams construct.
6829 ///
6830 /// Otherwise, return nullptr.
6831 static llvm::Value *
6832 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6833                                  const OMPExecutableDirective &D) {
6834   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6835          "Clauses associated with the teams directive expected to be emitted "
6836          "only for the host!");
6837   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6838   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6839          "Expected target-based executable directive.");
6840   CGBuilderTy &Bld = CGF.Builder;
6841   llvm::Value *ThreadLimitVal = nullptr;
6842   llvm::Value *NumThreadsVal = nullptr;
6843   switch (DirectiveKind) {
6844   case OMPD_target: {
6845     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6846     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6847       return NumThreads;
6848     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6849         CGF.getContext(), CS->getCapturedStmt());
6850     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6851       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6852         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6853         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6854         const auto *ThreadLimitClause =
6855             Dir->getSingleClause<OMPThreadLimitClause>();
6856         CodeGenFunction::LexicalScope Scope(
6857             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6858         if (const auto *PreInit =
6859                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6860           for (const auto *I : PreInit->decls()) {
6861             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6862               CGF.EmitVarDecl(cast<VarDecl>(*I));
6863             } else {
6864               CodeGenFunction::AutoVarEmission Emission =
6865                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6866               CGF.EmitAutoVarCleanups(Emission);
6867             }
6868           }
6869         }
6870         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6871             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6872         ThreadLimitVal =
6873             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6874       }
6875       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6876           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6877         CS = Dir->getInnermostCapturedStmt();
6878         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6879             CGF.getContext(), CS->getCapturedStmt());
6880         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6881       }
6882       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6883           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6884         CS = Dir->getInnermostCapturedStmt();
6885         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6886           return NumThreads;
6887       }
6888       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6889         return Bld.getInt32(1);
6890     }
6891     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6892   }
6893   case OMPD_target_teams: {
6894     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6895       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6896       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6897       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6898           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6899       ThreadLimitVal =
6900           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6901     }
6902     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6903     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6904       return NumThreads;
6905     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6906         CGF.getContext(), CS->getCapturedStmt());
6907     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6908       if (Dir->getDirectiveKind() == OMPD_distribute) {
6909         CS = Dir->getInnermostCapturedStmt();
6910         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6911           return NumThreads;
6912       }
6913     }
6914     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6915   }
6916   case OMPD_target_teams_distribute:
6917     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6918       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6919       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6920       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6921           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6922       ThreadLimitVal =
6923           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6924     }
6925     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6926   case OMPD_target_parallel:
6927   case OMPD_target_parallel_for:
6928   case OMPD_target_parallel_for_simd:
6929   case OMPD_target_teams_distribute_parallel_for:
6930   case OMPD_target_teams_distribute_parallel_for_simd: {
6931     llvm::Value *CondVal = nullptr;
6932     // Handle if clause. If if clause present, the number of threads is
6933     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6934     if (D.hasClausesOfKind<OMPIfClause>()) {
6935       const OMPIfClause *IfClause = nullptr;
6936       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6937         if (C->getNameModifier() == OMPD_unknown ||
6938             C->getNameModifier() == OMPD_parallel) {
6939           IfClause = C;
6940           break;
6941         }
6942       }
6943       if (IfClause) {
6944         const Expr *Cond = IfClause->getCondition();
6945         bool Result;
6946         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6947           if (!Result)
6948             return Bld.getInt32(1);
6949         } else {
6950           CodeGenFunction::RunCleanupsScope Scope(CGF);
6951           CondVal = CGF.EvaluateExprAsBool(Cond);
6952         }
6953       }
6954     }
6955     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6956       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6957       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6958       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6959           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6960       ThreadLimitVal =
6961           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6962     }
6963     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6964       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6965       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6966       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6967           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6968       NumThreadsVal =
6969           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6970       ThreadLimitVal = ThreadLimitVal
6971                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6972                                                                 ThreadLimitVal),
6973                                               NumThreadsVal, ThreadLimitVal)
6974                            : NumThreadsVal;
6975     }
6976     if (!ThreadLimitVal)
6977       ThreadLimitVal = Bld.getInt32(0);
6978     if (CondVal)
6979       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6980     return ThreadLimitVal;
6981   }
6982   case OMPD_target_teams_distribute_simd:
6983   case OMPD_target_simd:
6984     return Bld.getInt32(1);
6985   case OMPD_parallel:
6986   case OMPD_for:
6987   case OMPD_parallel_for:
6988   case OMPD_parallel_master:
6989   case OMPD_parallel_sections:
6990   case OMPD_for_simd:
6991   case OMPD_parallel_for_simd:
6992   case OMPD_cancel:
6993   case OMPD_cancellation_point:
6994   case OMPD_ordered:
6995   case OMPD_threadprivate:
6996   case OMPD_allocate:
6997   case OMPD_task:
6998   case OMPD_simd:
6999   case OMPD_tile:
7000   case OMPD_unroll:
7001   case OMPD_sections:
7002   case OMPD_section:
7003   case OMPD_single:
7004   case OMPD_master:
7005   case OMPD_critical:
7006   case OMPD_taskyield:
7007   case OMPD_barrier:
7008   case OMPD_taskwait:
7009   case OMPD_taskgroup:
7010   case OMPD_atomic:
7011   case OMPD_flush:
7012   case OMPD_depobj:
7013   case OMPD_scan:
7014   case OMPD_teams:
7015   case OMPD_target_data:
7016   case OMPD_target_exit_data:
7017   case OMPD_target_enter_data:
7018   case OMPD_distribute:
7019   case OMPD_distribute_simd:
7020   case OMPD_distribute_parallel_for:
7021   case OMPD_distribute_parallel_for_simd:
7022   case OMPD_teams_distribute:
7023   case OMPD_teams_distribute_simd:
7024   case OMPD_teams_distribute_parallel_for:
7025   case OMPD_teams_distribute_parallel_for_simd:
7026   case OMPD_target_update:
7027   case OMPD_declare_simd:
7028   case OMPD_declare_variant:
7029   case OMPD_begin_declare_variant:
7030   case OMPD_end_declare_variant:
7031   case OMPD_declare_target:
7032   case OMPD_end_declare_target:
7033   case OMPD_declare_reduction:
7034   case OMPD_declare_mapper:
7035   case OMPD_taskloop:
7036   case OMPD_taskloop_simd:
7037   case OMPD_master_taskloop:
7038   case OMPD_master_taskloop_simd:
7039   case OMPD_parallel_master_taskloop:
7040   case OMPD_parallel_master_taskloop_simd:
7041   case OMPD_requires:
7042   case OMPD_unknown:
7043     break;
7044   default:
7045     break;
7046   }
7047   llvm_unreachable("Unsupported directive kind.");
7048 }
7049 
7050 namespace {
7051 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7052 
7053 // Utility to handle information from clauses associated with a given
7054 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7055 // It provides a convenient interface to obtain the information and generate
7056 // code for that information.
7057 class MappableExprsHandler {
7058 public:
7059   /// Values for bit flags used to specify the mapping type for
7060   /// offloading.
7061   enum OpenMPOffloadMappingFlags : uint64_t {
7062     /// No flags
7063     OMP_MAP_NONE = 0x0,
7064     /// Allocate memory on the device and move data from host to device.
7065     OMP_MAP_TO = 0x01,
7066     /// Allocate memory on the device and move data from device to host.
7067     OMP_MAP_FROM = 0x02,
7068     /// Always perform the requested mapping action on the element, even
7069     /// if it was already mapped before.
7070     OMP_MAP_ALWAYS = 0x04,
7071     /// Delete the element from the device environment, ignoring the
7072     /// current reference count associated with the element.
7073     OMP_MAP_DELETE = 0x08,
7074     /// The element being mapped is a pointer-pointee pair; both the
7075     /// pointer and the pointee should be mapped.
7076     OMP_MAP_PTR_AND_OBJ = 0x10,
7077     /// This flags signals that the base address of an entry should be
7078     /// passed to the target kernel as an argument.
7079     OMP_MAP_TARGET_PARAM = 0x20,
7080     /// Signal that the runtime library has to return the device pointer
7081     /// in the current position for the data being mapped. Used when we have the
7082     /// use_device_ptr or use_device_addr clause.
7083     OMP_MAP_RETURN_PARAM = 0x40,
7084     /// This flag signals that the reference being passed is a pointer to
7085     /// private data.
7086     OMP_MAP_PRIVATE = 0x80,
7087     /// Pass the element to the device by value.
7088     OMP_MAP_LITERAL = 0x100,
7089     /// Implicit map
7090     OMP_MAP_IMPLICIT = 0x200,
7091     /// Close is a hint to the runtime to allocate memory close to
7092     /// the target device.
7093     OMP_MAP_CLOSE = 0x400,
7094     /// 0x800 is reserved for compatibility with XLC.
7095     /// Produce a runtime error if the data is not already allocated.
7096     OMP_MAP_PRESENT = 0x1000,
7097     /// Signal that the runtime library should use args as an array of
7098     /// descriptor_dim pointers and use args_size as dims. Used when we have
7099     /// non-contiguous list items in target update directive
7100     OMP_MAP_NON_CONTIG = 0x100000000000,
7101     /// The 16 MSBs of the flags indicate whether the entry is member of some
7102     /// struct/class.
7103     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7104     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7105   };
7106 
7107   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7108   static unsigned getFlagMemberOffset() {
7109     unsigned Offset = 0;
7110     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7111          Remain = Remain >> 1)
7112       Offset++;
7113     return Offset;
7114   }
7115 
7116   /// Class that holds debugging information for a data mapping to be passed to
7117   /// the runtime library.
7118   class MappingExprInfo {
7119     /// The variable declaration used for the data mapping.
7120     const ValueDecl *MapDecl = nullptr;
7121     /// The original expression used in the map clause, or null if there is
7122     /// none.
7123     const Expr *MapExpr = nullptr;
7124 
7125   public:
7126     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7127         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7128 
7129     const ValueDecl *getMapDecl() const { return MapDecl; }
7130     const Expr *getMapExpr() const { return MapExpr; }
7131   };
7132 
7133   /// Class that associates information with a base pointer to be passed to the
7134   /// runtime library.
7135   class BasePointerInfo {
7136     /// The base pointer.
7137     llvm::Value *Ptr = nullptr;
7138     /// The base declaration that refers to this device pointer, or null if
7139     /// there is none.
7140     const ValueDecl *DevPtrDecl = nullptr;
7141 
7142   public:
7143     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7144         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7145     llvm::Value *operator*() const { return Ptr; }
7146     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7147     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7148   };
7149 
7150   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7151   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7152   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7153   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7154   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7155   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7156   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7157 
7158   /// This structure contains combined information generated for mappable
7159   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7160   /// mappers, and non-contiguous information.
7161   struct MapCombinedInfoTy {
7162     struct StructNonContiguousInfo {
7163       bool IsNonContiguous = false;
7164       MapDimArrayTy Dims;
7165       MapNonContiguousArrayTy Offsets;
7166       MapNonContiguousArrayTy Counts;
7167       MapNonContiguousArrayTy Strides;
7168     };
7169     MapExprsArrayTy Exprs;
7170     MapBaseValuesArrayTy BasePointers;
7171     MapValuesArrayTy Pointers;
7172     MapValuesArrayTy Sizes;
7173     MapFlagsArrayTy Types;
7174     MapMappersArrayTy Mappers;
7175     StructNonContiguousInfo NonContigInfo;
7176 
7177     /// Append arrays in \a CurInfo.
7178     void append(MapCombinedInfoTy &CurInfo) {
7179       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7180       BasePointers.append(CurInfo.BasePointers.begin(),
7181                           CurInfo.BasePointers.end());
7182       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7183       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7184       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7185       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7186       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7187                                  CurInfo.NonContigInfo.Dims.end());
7188       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7189                                     CurInfo.NonContigInfo.Offsets.end());
7190       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7191                                    CurInfo.NonContigInfo.Counts.end());
7192       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7193                                     CurInfo.NonContigInfo.Strides.end());
7194     }
7195   };
7196 
7197   /// Map between a struct and the its lowest & highest elements which have been
7198   /// mapped.
7199   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7200   ///                    HE(FieldIndex, Pointer)}
7201   struct StructRangeInfoTy {
7202     MapCombinedInfoTy PreliminaryMapData;
7203     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7204         0, Address::invalid()};
7205     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7206         0, Address::invalid()};
7207     Address Base = Address::invalid();
7208     Address LB = Address::invalid();
7209     bool IsArraySection = false;
7210     bool HasCompleteRecord = false;
7211   };
7212 
7213 private:
7214   /// Kind that defines how a device pointer has to be returned.
7215   struct MapInfo {
7216     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7217     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7218     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7219     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7220     bool ReturnDevicePointer = false;
7221     bool IsImplicit = false;
7222     const ValueDecl *Mapper = nullptr;
7223     const Expr *VarRef = nullptr;
7224     bool ForDeviceAddr = false;
7225 
7226     MapInfo() = default;
7227     MapInfo(
7228         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7229         OpenMPMapClauseKind MapType,
7230         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7231         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7232         bool ReturnDevicePointer, bool IsImplicit,
7233         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7234         bool ForDeviceAddr = false)
7235         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7236           MotionModifiers(MotionModifiers),
7237           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7238           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7239   };
7240 
7241   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7242   /// member and there is no map information about it, then emission of that
7243   /// entry is deferred until the whole struct has been processed.
7244   struct DeferredDevicePtrEntryTy {
7245     const Expr *IE = nullptr;
7246     const ValueDecl *VD = nullptr;
7247     bool ForDeviceAddr = false;
7248 
7249     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7250                              bool ForDeviceAddr)
7251         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7252   };
7253 
7254   /// The target directive from where the mappable clauses were extracted. It
7255   /// is either a executable directive or a user-defined mapper directive.
7256   llvm::PointerUnion<const OMPExecutableDirective *,
7257                      const OMPDeclareMapperDecl *>
7258       CurDir;
7259 
7260   /// Function the directive is being generated for.
7261   CodeGenFunction &CGF;
7262 
7263   /// Set of all first private variables in the current directive.
7264   /// bool data is set to true if the variable is implicitly marked as
7265   /// firstprivate, false otherwise.
7266   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7267 
7268   /// Map between device pointer declarations and their expression components.
7269   /// The key value for declarations in 'this' is null.
7270   llvm::DenseMap<
7271       const ValueDecl *,
7272       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7273       DevPointersMap;
7274 
7275   llvm::Value *getExprTypeSize(const Expr *E) const {
7276     QualType ExprTy = E->getType().getCanonicalType();
7277 
7278     // Calculate the size for array shaping expression.
7279     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7280       llvm::Value *Size =
7281           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7282       for (const Expr *SE : OAE->getDimensions()) {
7283         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7284         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7285                                       CGF.getContext().getSizeType(),
7286                                       SE->getExprLoc());
7287         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7288       }
7289       return Size;
7290     }
7291 
7292     // Reference types are ignored for mapping purposes.
7293     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7294       ExprTy = RefTy->getPointeeType().getCanonicalType();
7295 
7296     // Given that an array section is considered a built-in type, we need to
7297     // do the calculation based on the length of the section instead of relying
7298     // on CGF.getTypeSize(E->getType()).
7299     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7300       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7301                             OAE->getBase()->IgnoreParenImpCasts())
7302                             .getCanonicalType();
7303 
7304       // If there is no length associated with the expression and lower bound is
7305       // not specified too, that means we are using the whole length of the
7306       // base.
7307       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7308           !OAE->getLowerBound())
7309         return CGF.getTypeSize(BaseTy);
7310 
7311       llvm::Value *ElemSize;
7312       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7313         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7314       } else {
7315         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7316         assert(ATy && "Expecting array type if not a pointer type.");
7317         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7318       }
7319 
7320       // If we don't have a length at this point, that is because we have an
7321       // array section with a single element.
7322       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7323         return ElemSize;
7324 
7325       if (const Expr *LenExpr = OAE->getLength()) {
7326         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7327         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7328                                              CGF.getContext().getSizeType(),
7329                                              LenExpr->getExprLoc());
7330         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7331       }
7332       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7333              OAE->getLowerBound() && "expected array_section[lb:].");
7334       // Size = sizetype - lb * elemtype;
7335       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7336       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7337       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7338                                        CGF.getContext().getSizeType(),
7339                                        OAE->getLowerBound()->getExprLoc());
7340       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7341       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7342       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7343       LengthVal = CGF.Builder.CreateSelect(
7344           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7345       return LengthVal;
7346     }
7347     return CGF.getTypeSize(ExprTy);
7348   }
7349 
7350   /// Return the corresponding bits for a given map clause modifier. Add
7351   /// a flag marking the map as a pointer if requested. Add a flag marking the
7352   /// map as the first one of a series of maps that relate to the same map
7353   /// expression.
7354   OpenMPOffloadMappingFlags getMapTypeBits(
7355       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7356       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7357       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7358     OpenMPOffloadMappingFlags Bits =
7359         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7360     switch (MapType) {
7361     case OMPC_MAP_alloc:
7362     case OMPC_MAP_release:
7363       // alloc and release is the default behavior in the runtime library,  i.e.
7364       // if we don't pass any bits alloc/release that is what the runtime is
7365       // going to do. Therefore, we don't need to signal anything for these two
7366       // type modifiers.
7367       break;
7368     case OMPC_MAP_to:
7369       Bits |= OMP_MAP_TO;
7370       break;
7371     case OMPC_MAP_from:
7372       Bits |= OMP_MAP_FROM;
7373       break;
7374     case OMPC_MAP_tofrom:
7375       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7376       break;
7377     case OMPC_MAP_delete:
7378       Bits |= OMP_MAP_DELETE;
7379       break;
7380     case OMPC_MAP_unknown:
7381       llvm_unreachable("Unexpected map type!");
7382     }
7383     if (AddPtrFlag)
7384       Bits |= OMP_MAP_PTR_AND_OBJ;
7385     if (AddIsTargetParamFlag)
7386       Bits |= OMP_MAP_TARGET_PARAM;
7387     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7388         != MapModifiers.end())
7389       Bits |= OMP_MAP_ALWAYS;
7390     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7391         != MapModifiers.end())
7392       Bits |= OMP_MAP_CLOSE;
7393     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7394             MapModifiers.end() ||
7395         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7396             MotionModifiers.end())
7397       Bits |= OMP_MAP_PRESENT;
7398     if (IsNonContiguous)
7399       Bits |= OMP_MAP_NON_CONTIG;
7400     return Bits;
7401   }
7402 
7403   /// Return true if the provided expression is a final array section. A
7404   /// final array section, is one whose length can't be proved to be one.
7405   bool isFinalArraySectionExpression(const Expr *E) const {
7406     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7407 
7408     // It is not an array section and therefore not a unity-size one.
7409     if (!OASE)
7410       return false;
7411 
7412     // An array section with no colon always refer to a single element.
7413     if (OASE->getColonLocFirst().isInvalid())
7414       return false;
7415 
7416     const Expr *Length = OASE->getLength();
7417 
7418     // If we don't have a length we have to check if the array has size 1
7419     // for this dimension. Also, we should always expect a length if the
7420     // base type is pointer.
7421     if (!Length) {
7422       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7423                              OASE->getBase()->IgnoreParenImpCasts())
7424                              .getCanonicalType();
7425       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7426         return ATy->getSize().getSExtValue() != 1;
7427       // If we don't have a constant dimension length, we have to consider
7428       // the current section as having any size, so it is not necessarily
7429       // unitary. If it happen to be unity size, that's user fault.
7430       return true;
7431     }
7432 
7433     // Check if the length evaluates to 1.
7434     Expr::EvalResult Result;
7435     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7436       return true; // Can have more that size 1.
7437 
7438     llvm::APSInt ConstLength = Result.Val.getInt();
7439     return ConstLength.getSExtValue() != 1;
7440   }
7441 
7442   /// Generate the base pointers, section pointers, sizes, map type bits, and
7443   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7444   /// map type, map or motion modifiers, and expression components.
7445   /// \a IsFirstComponent should be set to true if the provided set of
7446   /// components is the first associated with a capture.
7447   void generateInfoForComponentList(
7448       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7449       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7450       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7451       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7452       bool IsFirstComponentList, bool IsImplicit,
7453       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7454       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7455       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7456           OverlappedElements = llvm::None) const {
7457     // The following summarizes what has to be generated for each map and the
7458     // types below. The generated information is expressed in this order:
7459     // base pointer, section pointer, size, flags
7460     // (to add to the ones that come from the map type and modifier).
7461     //
7462     // double d;
7463     // int i[100];
7464     // float *p;
7465     //
7466     // struct S1 {
7467     //   int i;
7468     //   float f[50];
7469     // }
7470     // struct S2 {
7471     //   int i;
7472     //   float f[50];
7473     //   S1 s;
7474     //   double *p;
7475     //   struct S2 *ps;
7476     //   int &ref;
7477     // }
7478     // S2 s;
7479     // S2 *ps;
7480     //
7481     // map(d)
7482     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7483     //
7484     // map(i)
7485     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7486     //
7487     // map(i[1:23])
7488     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7489     //
7490     // map(p)
7491     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7492     //
7493     // map(p[1:24])
7494     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7495     // in unified shared memory mode or for local pointers
7496     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7497     //
7498     // map(s)
7499     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7500     //
7501     // map(s.i)
7502     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7503     //
7504     // map(s.s.f)
7505     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7506     //
7507     // map(s.p)
7508     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7509     //
7510     // map(to: s.p[:22])
7511     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7512     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7513     // &(s.p), &(s.p[0]), 22*sizeof(double),
7514     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7515     // (*) alloc space for struct members, only this is a target parameter
7516     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7517     //      optimizes this entry out, same in the examples below)
7518     // (***) map the pointee (map: to)
7519     //
7520     // map(to: s.ref)
7521     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7522     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7523     // (*) alloc space for struct members, only this is a target parameter
7524     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7525     //      optimizes this entry out, same in the examples below)
7526     // (***) map the pointee (map: to)
7527     //
7528     // map(s.ps)
7529     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7530     //
7531     // map(from: s.ps->s.i)
7532     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7533     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7534     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7535     //
7536     // map(to: s.ps->ps)
7537     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7538     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7539     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7540     //
7541     // map(s.ps->ps->ps)
7542     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7543     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7544     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7545     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7546     //
7547     // map(to: s.ps->ps->s.f[:22])
7548     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7549     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7550     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7551     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7552     //
7553     // map(ps)
7554     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7555     //
7556     // map(ps->i)
7557     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7558     //
7559     // map(ps->s.f)
7560     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7561     //
7562     // map(from: ps->p)
7563     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7564     //
7565     // map(to: ps->p[:22])
7566     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7567     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7568     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7569     //
7570     // map(ps->ps)
7571     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7572     //
7573     // map(from: ps->ps->s.i)
7574     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7575     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7576     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7577     //
7578     // map(from: ps->ps->ps)
7579     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7580     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7581     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7582     //
7583     // map(ps->ps->ps->ps)
7584     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7585     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7586     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7587     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7588     //
7589     // map(to: ps->ps->ps->s.f[:22])
7590     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7591     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7592     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7593     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7594     //
7595     // map(to: s.f[:22]) map(from: s.p[:33])
7596     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7597     //     sizeof(double*) (**), TARGET_PARAM
7598     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7599     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7600     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7601     // (*) allocate contiguous space needed to fit all mapped members even if
7602     //     we allocate space for members not mapped (in this example,
7603     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7604     //     them as well because they fall between &s.f[0] and &s.p)
7605     //
7606     // map(from: s.f[:22]) map(to: ps->p[:33])
7607     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7608     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7609     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7610     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7611     // (*) the struct this entry pertains to is the 2nd element in the list of
7612     //     arguments, hence MEMBER_OF(2)
7613     //
7614     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7615     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7616     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7617     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7618     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7619     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7620     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7621     // (*) the struct this entry pertains to is the 4th element in the list
7622     //     of arguments, hence MEMBER_OF(4)
7623 
7624     // Track if the map information being generated is the first for a capture.
7625     bool IsCaptureFirstInfo = IsFirstComponentList;
7626     // When the variable is on a declare target link or in a to clause with
7627     // unified memory, a reference is needed to hold the host/device address
7628     // of the variable.
7629     bool RequiresReference = false;
7630 
7631     // Scan the components from the base to the complete expression.
7632     auto CI = Components.rbegin();
7633     auto CE = Components.rend();
7634     auto I = CI;
7635 
7636     // Track if the map information being generated is the first for a list of
7637     // components.
7638     bool IsExpressionFirstInfo = true;
7639     bool FirstPointerInComplexData = false;
7640     Address BP = Address::invalid();
7641     const Expr *AssocExpr = I->getAssociatedExpression();
7642     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7643     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7644     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7645 
7646     if (isa<MemberExpr>(AssocExpr)) {
7647       // The base is the 'this' pointer. The content of the pointer is going
7648       // to be the base of the field being mapped.
7649       BP = CGF.LoadCXXThisAddress();
7650     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7651                (OASE &&
7652                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7653       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7654     } else if (OAShE &&
7655                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7656       BP = Address(
7657           CGF.EmitScalarExpr(OAShE->getBase()),
7658           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7659     } else {
7660       // The base is the reference to the variable.
7661       // BP = &Var.
7662       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7663       if (const auto *VD =
7664               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7665         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7666                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7667           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7668               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7669                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7670             RequiresReference = true;
7671             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7672           }
7673         }
7674       }
7675 
7676       // If the variable is a pointer and is being dereferenced (i.e. is not
7677       // the last component), the base has to be the pointer itself, not its
7678       // reference. References are ignored for mapping purposes.
7679       QualType Ty =
7680           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7681       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7682         // No need to generate individual map information for the pointer, it
7683         // can be associated with the combined storage if shared memory mode is
7684         // active or the base declaration is not global variable.
7685         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7686         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7687             !VD || VD->hasLocalStorage())
7688           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7689         else
7690           FirstPointerInComplexData = true;
7691         ++I;
7692       }
7693     }
7694 
7695     // Track whether a component of the list should be marked as MEMBER_OF some
7696     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7697     // in a component list should be marked as MEMBER_OF, all subsequent entries
7698     // do not belong to the base struct. E.g.
7699     // struct S2 s;
7700     // s.ps->ps->ps->f[:]
7701     //   (1) (2) (3) (4)
7702     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7703     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7704     // is the pointee of ps(2) which is not member of struct s, so it should not
7705     // be marked as such (it is still PTR_AND_OBJ).
7706     // The variable is initialized to false so that PTR_AND_OBJ entries which
7707     // are not struct members are not considered (e.g. array of pointers to
7708     // data).
7709     bool ShouldBeMemberOf = false;
7710 
7711     // Variable keeping track of whether or not we have encountered a component
7712     // in the component list which is a member expression. Useful when we have a
7713     // pointer or a final array section, in which case it is the previous
7714     // component in the list which tells us whether we have a member expression.
7715     // E.g. X.f[:]
7716     // While processing the final array section "[:]" it is "f" which tells us
7717     // whether we are dealing with a member of a declared struct.
7718     const MemberExpr *EncounteredME = nullptr;
7719 
7720     // Track for the total number of dimension. Start from one for the dummy
7721     // dimension.
7722     uint64_t DimSize = 1;
7723 
7724     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7725     bool IsPrevMemberReference = false;
7726 
7727     for (; I != CE; ++I) {
7728       // If the current component is member of a struct (parent struct) mark it.
7729       if (!EncounteredME) {
7730         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7731         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7732         // as MEMBER_OF the parent struct.
7733         if (EncounteredME) {
7734           ShouldBeMemberOf = true;
7735           // Do not emit as complex pointer if this is actually not array-like
7736           // expression.
7737           if (FirstPointerInComplexData) {
7738             QualType Ty = std::prev(I)
7739                               ->getAssociatedDeclaration()
7740                               ->getType()
7741                               .getNonReferenceType();
7742             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7743             FirstPointerInComplexData = false;
7744           }
7745         }
7746       }
7747 
7748       auto Next = std::next(I);
7749 
7750       // We need to generate the addresses and sizes if this is the last
7751       // component, if the component is a pointer or if it is an array section
7752       // whose length can't be proved to be one. If this is a pointer, it
7753       // becomes the base address for the following components.
7754 
7755       // A final array section, is one whose length can't be proved to be one.
7756       // If the map item is non-contiguous then we don't treat any array section
7757       // as final array section.
7758       bool IsFinalArraySection =
7759           !IsNonContiguous &&
7760           isFinalArraySectionExpression(I->getAssociatedExpression());
7761 
7762       // If we have a declaration for the mapping use that, otherwise use
7763       // the base declaration of the map clause.
7764       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7765                                      ? I->getAssociatedDeclaration()
7766                                      : BaseDecl;
7767       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7768                                                : MapExpr;
7769 
7770       // Get information on whether the element is a pointer. Have to do a
7771       // special treatment for array sections given that they are built-in
7772       // types.
7773       const auto *OASE =
7774           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7775       const auto *OAShE =
7776           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7777       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7778       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7779       bool IsPointer =
7780           OAShE ||
7781           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7782                        .getCanonicalType()
7783                        ->isAnyPointerType()) ||
7784           I->getAssociatedExpression()->getType()->isAnyPointerType();
7785       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7786                                MapDecl &&
7787                                MapDecl->getType()->isLValueReferenceType();
7788       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7789 
7790       if (OASE)
7791         ++DimSize;
7792 
7793       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7794           IsFinalArraySection) {
7795         // If this is not the last component, we expect the pointer to be
7796         // associated with an array expression or member expression.
7797         assert((Next == CE ||
7798                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7799                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7800                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7801                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7802                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7803                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7804                "Unexpected expression");
7805 
7806         Address LB = Address::invalid();
7807         Address LowestElem = Address::invalid();
7808         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7809                                        const MemberExpr *E) {
7810           const Expr *BaseExpr = E->getBase();
7811           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7812           // scalar.
7813           LValue BaseLV;
7814           if (E->isArrow()) {
7815             LValueBaseInfo BaseInfo;
7816             TBAAAccessInfo TBAAInfo;
7817             Address Addr =
7818                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7819             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7820             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7821           } else {
7822             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7823           }
7824           return BaseLV;
7825         };
7826         if (OAShE) {
7827           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7828                                     CGF.getContext().getTypeAlignInChars(
7829                                         OAShE->getBase()->getType()));
7830         } else if (IsMemberReference) {
7831           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7832           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7833           LowestElem = CGF.EmitLValueForFieldInitialization(
7834                               BaseLVal, cast<FieldDecl>(MapDecl))
7835                            .getAddress(CGF);
7836           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7837                    .getAddress(CGF);
7838         } else {
7839           LowestElem = LB =
7840               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7841                   .getAddress(CGF);
7842         }
7843 
7844         // If this component is a pointer inside the base struct then we don't
7845         // need to create any entry for it - it will be combined with the object
7846         // it is pointing to into a single PTR_AND_OBJ entry.
7847         bool IsMemberPointerOrAddr =
7848             EncounteredME &&
7849             (((IsPointer || ForDeviceAddr) &&
7850               I->getAssociatedExpression() == EncounteredME) ||
7851              (IsPrevMemberReference && !IsPointer) ||
7852              (IsMemberReference && Next != CE &&
7853               !Next->getAssociatedExpression()->getType()->isPointerType()));
7854         if (!OverlappedElements.empty() && Next == CE) {
7855           // Handle base element with the info for overlapped elements.
7856           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7857           assert(!IsPointer &&
7858                  "Unexpected base element with the pointer type.");
7859           // Mark the whole struct as the struct that requires allocation on the
7860           // device.
7861           PartialStruct.LowestElem = {0, LowestElem};
7862           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7863               I->getAssociatedExpression()->getType());
7864           Address HB = CGF.Builder.CreateConstGEP(
7865               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
7866                                                               CGF.VoidPtrTy),
7867               TypeSize.getQuantity() - 1);
7868           PartialStruct.HighestElem = {
7869               std::numeric_limits<decltype(
7870                   PartialStruct.HighestElem.first)>::max(),
7871               HB};
7872           PartialStruct.Base = BP;
7873           PartialStruct.LB = LB;
7874           assert(
7875               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7876               "Overlapped elements must be used only once for the variable.");
7877           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7878           // Emit data for non-overlapped data.
7879           OpenMPOffloadMappingFlags Flags =
7880               OMP_MAP_MEMBER_OF |
7881               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7882                              /*AddPtrFlag=*/false,
7883                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7884           llvm::Value *Size = nullptr;
7885           // Do bitcopy of all non-overlapped structure elements.
7886           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7887                    Component : OverlappedElements) {
7888             Address ComponentLB = Address::invalid();
7889             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7890                  Component) {
7891               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7892                 const auto *FD = dyn_cast<FieldDecl>(VD);
7893                 if (FD && FD->getType()->isLValueReferenceType()) {
7894                   const auto *ME =
7895                       cast<MemberExpr>(MC.getAssociatedExpression());
7896                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7897                   ComponentLB =
7898                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7899                           .getAddress(CGF);
7900                 } else {
7901                   ComponentLB =
7902                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7903                           .getAddress(CGF);
7904                 }
7905                 Size = CGF.Builder.CreatePtrDiff(
7906                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7907                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7908                 break;
7909               }
7910             }
7911             assert(Size && "Failed to determine structure size");
7912             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7913             CombinedInfo.BasePointers.push_back(BP.getPointer());
7914             CombinedInfo.Pointers.push_back(LB.getPointer());
7915             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7916                 Size, CGF.Int64Ty, /*isSigned=*/true));
7917             CombinedInfo.Types.push_back(Flags);
7918             CombinedInfo.Mappers.push_back(nullptr);
7919             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7920                                                                       : 1);
7921             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7922           }
7923           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7924           CombinedInfo.BasePointers.push_back(BP.getPointer());
7925           CombinedInfo.Pointers.push_back(LB.getPointer());
7926           Size = CGF.Builder.CreatePtrDiff(
7927               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7928               CGF.EmitCastToVoidPtr(LB.getPointer()));
7929           CombinedInfo.Sizes.push_back(
7930               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7931           CombinedInfo.Types.push_back(Flags);
7932           CombinedInfo.Mappers.push_back(nullptr);
7933           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7934                                                                     : 1);
7935           break;
7936         }
7937         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7938         if (!IsMemberPointerOrAddr ||
7939             (Next == CE && MapType != OMPC_MAP_unknown)) {
7940           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7941           CombinedInfo.BasePointers.push_back(BP.getPointer());
7942           CombinedInfo.Pointers.push_back(LB.getPointer());
7943           CombinedInfo.Sizes.push_back(
7944               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7945           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7946                                                                     : 1);
7947 
7948           // If Mapper is valid, the last component inherits the mapper.
7949           bool HasMapper = Mapper && Next == CE;
7950           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7951 
7952           // We need to add a pointer flag for each map that comes from the
7953           // same expression except for the first one. We also need to signal
7954           // this map is the first one that relates with the current capture
7955           // (there is a set of entries for each capture).
7956           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7957               MapType, MapModifiers, MotionModifiers, IsImplicit,
7958               !IsExpressionFirstInfo || RequiresReference ||
7959                   FirstPointerInComplexData || IsMemberReference,
7960               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7961 
7962           if (!IsExpressionFirstInfo || IsMemberReference) {
7963             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7964             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7965             if (IsPointer || (IsMemberReference && Next != CE))
7966               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7967                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7968 
7969             if (ShouldBeMemberOf) {
7970               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7971               // should be later updated with the correct value of MEMBER_OF.
7972               Flags |= OMP_MAP_MEMBER_OF;
7973               // From now on, all subsequent PTR_AND_OBJ entries should not be
7974               // marked as MEMBER_OF.
7975               ShouldBeMemberOf = false;
7976             }
7977           }
7978 
7979           CombinedInfo.Types.push_back(Flags);
7980         }
7981 
7982         // If we have encountered a member expression so far, keep track of the
7983         // mapped member. If the parent is "*this", then the value declaration
7984         // is nullptr.
7985         if (EncounteredME) {
7986           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7987           unsigned FieldIndex = FD->getFieldIndex();
7988 
7989           // Update info about the lowest and highest elements for this struct
7990           if (!PartialStruct.Base.isValid()) {
7991             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7992             if (IsFinalArraySection) {
7993               Address HB =
7994                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7995                       .getAddress(CGF);
7996               PartialStruct.HighestElem = {FieldIndex, HB};
7997             } else {
7998               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7999             }
8000             PartialStruct.Base = BP;
8001             PartialStruct.LB = BP;
8002           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8003             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8004           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8005             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8006           }
8007         }
8008 
8009         // Need to emit combined struct for array sections.
8010         if (IsFinalArraySection || IsNonContiguous)
8011           PartialStruct.IsArraySection = true;
8012 
8013         // If we have a final array section, we are done with this expression.
8014         if (IsFinalArraySection)
8015           break;
8016 
8017         // The pointer becomes the base for the next element.
8018         if (Next != CE)
8019           BP = IsMemberReference ? LowestElem : LB;
8020 
8021         IsExpressionFirstInfo = false;
8022         IsCaptureFirstInfo = false;
8023         FirstPointerInComplexData = false;
8024         IsPrevMemberReference = IsMemberReference;
8025       } else if (FirstPointerInComplexData) {
8026         QualType Ty = Components.rbegin()
8027                           ->getAssociatedDeclaration()
8028                           ->getType()
8029                           .getNonReferenceType();
8030         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8031         FirstPointerInComplexData = false;
8032       }
8033     }
8034     // If ran into the whole component - allocate the space for the whole
8035     // record.
8036     if (!EncounteredME)
8037       PartialStruct.HasCompleteRecord = true;
8038 
8039     if (!IsNonContiguous)
8040       return;
8041 
8042     const ASTContext &Context = CGF.getContext();
8043 
8044     // For supporting stride in array section, we need to initialize the first
8045     // dimension size as 1, first offset as 0, and first count as 1
8046     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8047     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8048     MapValuesArrayTy CurStrides;
8049     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8050     uint64_t ElementTypeSize;
8051 
8052     // Collect Size information for each dimension and get the element size as
8053     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8054     // should be [10, 10] and the first stride is 4 btyes.
8055     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8056          Components) {
8057       const Expr *AssocExpr = Component.getAssociatedExpression();
8058       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8059 
8060       if (!OASE)
8061         continue;
8062 
8063       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8064       auto *CAT = Context.getAsConstantArrayType(Ty);
8065       auto *VAT = Context.getAsVariableArrayType(Ty);
8066 
8067       // We need all the dimension size except for the last dimension.
8068       assert((VAT || CAT || &Component == &*Components.begin()) &&
8069              "Should be either ConstantArray or VariableArray if not the "
8070              "first Component");
8071 
8072       // Get element size if CurStrides is empty.
8073       if (CurStrides.empty()) {
8074         const Type *ElementType = nullptr;
8075         if (CAT)
8076           ElementType = CAT->getElementType().getTypePtr();
8077         else if (VAT)
8078           ElementType = VAT->getElementType().getTypePtr();
8079         else
8080           assert(&Component == &*Components.begin() &&
8081                  "Only expect pointer (non CAT or VAT) when this is the "
8082                  "first Component");
8083         // If ElementType is null, then it means the base is a pointer
8084         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8085         // for next iteration.
8086         if (ElementType) {
8087           // For the case that having pointer as base, we need to remove one
8088           // level of indirection.
8089           if (&Component != &*Components.begin())
8090             ElementType = ElementType->getPointeeOrArrayElementType();
8091           ElementTypeSize =
8092               Context.getTypeSizeInChars(ElementType).getQuantity();
8093           CurStrides.push_back(
8094               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8095         }
8096       }
8097       // Get dimension value except for the last dimension since we don't need
8098       // it.
8099       if (DimSizes.size() < Components.size() - 1) {
8100         if (CAT)
8101           DimSizes.push_back(llvm::ConstantInt::get(
8102               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8103         else if (VAT)
8104           DimSizes.push_back(CGF.Builder.CreateIntCast(
8105               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8106               /*IsSigned=*/false));
8107       }
8108     }
8109 
8110     // Skip the dummy dimension since we have already have its information.
8111     auto DI = DimSizes.begin() + 1;
8112     // Product of dimension.
8113     llvm::Value *DimProd =
8114         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8115 
8116     // Collect info for non-contiguous. Notice that offset, count, and stride
8117     // are only meaningful for array-section, so we insert a null for anything
8118     // other than array-section.
8119     // Also, the size of offset, count, and stride are not the same as
8120     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8121     // count, and stride are the same as the number of non-contiguous
8122     // declaration in target update to/from clause.
8123     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8124          Components) {
8125       const Expr *AssocExpr = Component.getAssociatedExpression();
8126 
8127       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8128         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8129             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8130             /*isSigned=*/false);
8131         CurOffsets.push_back(Offset);
8132         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8133         CurStrides.push_back(CurStrides.back());
8134         continue;
8135       }
8136 
8137       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8138 
8139       if (!OASE)
8140         continue;
8141 
8142       // Offset
8143       const Expr *OffsetExpr = OASE->getLowerBound();
8144       llvm::Value *Offset = nullptr;
8145       if (!OffsetExpr) {
8146         // If offset is absent, then we just set it to zero.
8147         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8148       } else {
8149         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8150                                            CGF.Int64Ty,
8151                                            /*isSigned=*/false);
8152       }
8153       CurOffsets.push_back(Offset);
8154 
8155       // Count
8156       const Expr *CountExpr = OASE->getLength();
8157       llvm::Value *Count = nullptr;
8158       if (!CountExpr) {
8159         // In Clang, once a high dimension is an array section, we construct all
8160         // the lower dimension as array section, however, for case like
8161         // arr[0:2][2], Clang construct the inner dimension as an array section
8162         // but it actually is not in an array section form according to spec.
8163         if (!OASE->getColonLocFirst().isValid() &&
8164             !OASE->getColonLocSecond().isValid()) {
8165           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8166         } else {
8167           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8168           // When the length is absent it defaults to ⌈(size −
8169           // lower-bound)/stride⌉, where size is the size of the array
8170           // dimension.
8171           const Expr *StrideExpr = OASE->getStride();
8172           llvm::Value *Stride =
8173               StrideExpr
8174                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8175                                               CGF.Int64Ty, /*isSigned=*/false)
8176                   : nullptr;
8177           if (Stride)
8178             Count = CGF.Builder.CreateUDiv(
8179                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8180           else
8181             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8182         }
8183       } else {
8184         Count = CGF.EmitScalarExpr(CountExpr);
8185       }
8186       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8187       CurCounts.push_back(Count);
8188 
8189       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8190       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8191       //              Offset      Count     Stride
8192       //    D0          0           1         4    (int)    <- dummy dimension
8193       //    D1          0           2         8    (2 * (1) * 4)
8194       //    D2          1           2         20   (1 * (1 * 5) * 4)
8195       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8196       const Expr *StrideExpr = OASE->getStride();
8197       llvm::Value *Stride =
8198           StrideExpr
8199               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8200                                           CGF.Int64Ty, /*isSigned=*/false)
8201               : nullptr;
8202       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8203       if (Stride)
8204         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8205       else
8206         CurStrides.push_back(DimProd);
8207       if (DI != DimSizes.end())
8208         ++DI;
8209     }
8210 
8211     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8212     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8213     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8214   }
8215 
8216   /// Return the adjusted map modifiers if the declaration a capture refers to
8217   /// appears in a first-private clause. This is expected to be used only with
8218   /// directives that start with 'target'.
8219   MappableExprsHandler::OpenMPOffloadMappingFlags
8220   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8221     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8222 
8223     // A first private variable captured by reference will use only the
8224     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8225     // declaration is known as first-private in this handler.
8226     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8227       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8228         return MappableExprsHandler::OMP_MAP_TO |
8229                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8230       return MappableExprsHandler::OMP_MAP_PRIVATE |
8231              MappableExprsHandler::OMP_MAP_TO;
8232     }
8233     return MappableExprsHandler::OMP_MAP_TO |
8234            MappableExprsHandler::OMP_MAP_FROM;
8235   }
8236 
8237   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8238     // Rotate by getFlagMemberOffset() bits.
8239     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8240                                                   << getFlagMemberOffset());
8241   }
8242 
8243   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8244                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8245     // If the entry is PTR_AND_OBJ but has not been marked with the special
8246     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8247     // marked as MEMBER_OF.
8248     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8249         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8250       return;
8251 
8252     // Reset the placeholder value to prepare the flag for the assignment of the
8253     // proper MEMBER_OF value.
8254     Flags &= ~OMP_MAP_MEMBER_OF;
8255     Flags |= MemberOfFlag;
8256   }
8257 
8258   void getPlainLayout(const CXXRecordDecl *RD,
8259                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8260                       bool AsBase) const {
8261     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8262 
8263     llvm::StructType *St =
8264         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8265 
8266     unsigned NumElements = St->getNumElements();
8267     llvm::SmallVector<
8268         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8269         RecordLayout(NumElements);
8270 
8271     // Fill bases.
8272     for (const auto &I : RD->bases()) {
8273       if (I.isVirtual())
8274         continue;
8275       const auto *Base = I.getType()->getAsCXXRecordDecl();
8276       // Ignore empty bases.
8277       if (Base->isEmpty() || CGF.getContext()
8278                                  .getASTRecordLayout(Base)
8279                                  .getNonVirtualSize()
8280                                  .isZero())
8281         continue;
8282 
8283       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8284       RecordLayout[FieldIndex] = Base;
8285     }
8286     // Fill in virtual bases.
8287     for (const auto &I : RD->vbases()) {
8288       const auto *Base = I.getType()->getAsCXXRecordDecl();
8289       // Ignore empty bases.
8290       if (Base->isEmpty())
8291         continue;
8292       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8293       if (RecordLayout[FieldIndex])
8294         continue;
8295       RecordLayout[FieldIndex] = Base;
8296     }
8297     // Fill in all the fields.
8298     assert(!RD->isUnion() && "Unexpected union.");
8299     for (const auto *Field : RD->fields()) {
8300       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8301       // will fill in later.)
8302       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8303         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8304         RecordLayout[FieldIndex] = Field;
8305       }
8306     }
8307     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8308              &Data : RecordLayout) {
8309       if (Data.isNull())
8310         continue;
8311       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8312         getPlainLayout(Base, Layout, /*AsBase=*/true);
8313       else
8314         Layout.push_back(Data.get<const FieldDecl *>());
8315     }
8316   }
8317 
8318   /// Generate all the base pointers, section pointers, sizes, map types, and
8319   /// mappers for the extracted mappable expressions (all included in \a
8320   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8321   /// pair of the relevant declaration and index where it occurs is appended to
8322   /// the device pointers info array.
8323   void generateAllInfoForClauses(
8324       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8325       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8326           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8327     // We have to process the component lists that relate with the same
8328     // declaration in a single chunk so that we can generate the map flags
8329     // correctly. Therefore, we organize all lists in a map.
8330     enum MapKind { Present, Allocs, Other, Total };
8331     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8332                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8333         Info;
8334 
8335     // Helper function to fill the information map for the different supported
8336     // clauses.
8337     auto &&InfoGen =
8338         [&Info, &SkipVarSet](
8339             const ValueDecl *D, MapKind Kind,
8340             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8341             OpenMPMapClauseKind MapType,
8342             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8343             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8344             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8345             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8346           if (SkipVarSet.contains(D))
8347             return;
8348           auto It = Info.find(D);
8349           if (It == Info.end())
8350             It = Info
8351                      .insert(std::make_pair(
8352                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8353                      .first;
8354           It->second[Kind].emplace_back(
8355               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8356               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8357         };
8358 
8359     for (const auto *Cl : Clauses) {
8360       const auto *C = dyn_cast<OMPMapClause>(Cl);
8361       if (!C)
8362         continue;
8363       MapKind Kind = Other;
8364       if (!C->getMapTypeModifiers().empty() &&
8365           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8366             return K == OMPC_MAP_MODIFIER_present;
8367           }))
8368         Kind = Present;
8369       else if (C->getMapType() == OMPC_MAP_alloc)
8370         Kind = Allocs;
8371       const auto *EI = C->getVarRefs().begin();
8372       for (const auto L : C->component_lists()) {
8373         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8374         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8375                 C->getMapTypeModifiers(), llvm::None,
8376                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8377                 E);
8378         ++EI;
8379       }
8380     }
8381     for (const auto *Cl : Clauses) {
8382       const auto *C = dyn_cast<OMPToClause>(Cl);
8383       if (!C)
8384         continue;
8385       MapKind Kind = Other;
8386       if (!C->getMotionModifiers().empty() &&
8387           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8388             return K == OMPC_MOTION_MODIFIER_present;
8389           }))
8390         Kind = Present;
8391       const auto *EI = C->getVarRefs().begin();
8392       for (const auto L : C->component_lists()) {
8393         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8394                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8395                 C->isImplicit(), std::get<2>(L), *EI);
8396         ++EI;
8397       }
8398     }
8399     for (const auto *Cl : Clauses) {
8400       const auto *C = dyn_cast<OMPFromClause>(Cl);
8401       if (!C)
8402         continue;
8403       MapKind Kind = Other;
8404       if (!C->getMotionModifiers().empty() &&
8405           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8406             return K == OMPC_MOTION_MODIFIER_present;
8407           }))
8408         Kind = Present;
8409       const auto *EI = C->getVarRefs().begin();
8410       for (const auto L : C->component_lists()) {
8411         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8412                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8413                 C->isImplicit(), std::get<2>(L), *EI);
8414         ++EI;
8415       }
8416     }
8417 
8418     // Look at the use_device_ptr clause information and mark the existing map
8419     // entries as such. If there is no map information for an entry in the
8420     // use_device_ptr list, we create one with map type 'alloc' and zero size
8421     // section. It is the user fault if that was not mapped before. If there is
8422     // no map information and the pointer is a struct member, then we defer the
8423     // emission of that entry until the whole struct has been processed.
8424     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8425                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8426         DeferredInfo;
8427     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8428 
8429     for (const auto *Cl : Clauses) {
8430       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8431       if (!C)
8432         continue;
8433       for (const auto L : C->component_lists()) {
8434         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8435             std::get<1>(L);
8436         assert(!Components.empty() &&
8437                "Not expecting empty list of components!");
8438         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8439         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8440         const Expr *IE = Components.back().getAssociatedExpression();
8441         // If the first component is a member expression, we have to look into
8442         // 'this', which maps to null in the map of map information. Otherwise
8443         // look directly for the information.
8444         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8445 
8446         // We potentially have map information for this declaration already.
8447         // Look for the first set of components that refer to it.
8448         if (It != Info.end()) {
8449           bool Found = false;
8450           for (auto &Data : It->second) {
8451             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8452               return MI.Components.back().getAssociatedDeclaration() == VD;
8453             });
8454             // If we found a map entry, signal that the pointer has to be
8455             // returned and move on to the next declaration. Exclude cases where
8456             // the base pointer is mapped as array subscript, array section or
8457             // array shaping. The base address is passed as a pointer to base in
8458             // this case and cannot be used as a base for use_device_ptr list
8459             // item.
8460             if (CI != Data.end()) {
8461               auto PrevCI = std::next(CI->Components.rbegin());
8462               const auto *VarD = dyn_cast<VarDecl>(VD);
8463               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8464                   isa<MemberExpr>(IE) ||
8465                   !VD->getType().getNonReferenceType()->isPointerType() ||
8466                   PrevCI == CI->Components.rend() ||
8467                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8468                   VarD->hasLocalStorage()) {
8469                 CI->ReturnDevicePointer = true;
8470                 Found = true;
8471                 break;
8472               }
8473             }
8474           }
8475           if (Found)
8476             continue;
8477         }
8478 
8479         // We didn't find any match in our map information - generate a zero
8480         // size array section - if the pointer is a struct member we defer this
8481         // action until the whole struct has been processed.
8482         if (isa<MemberExpr>(IE)) {
8483           // Insert the pointer into Info to be processed by
8484           // generateInfoForComponentList. Because it is a member pointer
8485           // without a pointee, no entry will be generated for it, therefore
8486           // we need to generate one after the whole struct has been processed.
8487           // Nonetheless, generateInfoForComponentList must be called to take
8488           // the pointer into account for the calculation of the range of the
8489           // partial struct.
8490           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8491                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8492                   nullptr);
8493           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8494         } else {
8495           llvm::Value *Ptr =
8496               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8497           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8498           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8499           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8500           UseDevicePtrCombinedInfo.Sizes.push_back(
8501               llvm::Constant::getNullValue(CGF.Int64Ty));
8502           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8503           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8504         }
8505       }
8506     }
8507 
8508     // Look at the use_device_addr clause information and mark the existing map
8509     // entries as such. If there is no map information for an entry in the
8510     // use_device_addr list, we create one with map type 'alloc' and zero size
8511     // section. It is the user fault if that was not mapped before. If there is
8512     // no map information and the pointer is a struct member, then we defer the
8513     // emission of that entry until the whole struct has been processed.
8514     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8515     for (const auto *Cl : Clauses) {
8516       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8517       if (!C)
8518         continue;
8519       for (const auto L : C->component_lists()) {
8520         assert(!std::get<1>(L).empty() &&
8521                "Not expecting empty list of components!");
8522         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8523         if (!Processed.insert(VD).second)
8524           continue;
8525         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8526         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8527         // If the first component is a member expression, we have to look into
8528         // 'this', which maps to null in the map of map information. Otherwise
8529         // look directly for the information.
8530         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8531 
8532         // We potentially have map information for this declaration already.
8533         // Look for the first set of components that refer to it.
8534         if (It != Info.end()) {
8535           bool Found = false;
8536           for (auto &Data : It->second) {
8537             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8538               return MI.Components.back().getAssociatedDeclaration() == VD;
8539             });
8540             // If we found a map entry, signal that the pointer has to be
8541             // returned and move on to the next declaration.
8542             if (CI != Data.end()) {
8543               CI->ReturnDevicePointer = true;
8544               Found = true;
8545               break;
8546             }
8547           }
8548           if (Found)
8549             continue;
8550         }
8551 
8552         // We didn't find any match in our map information - generate a zero
8553         // size array section - if the pointer is a struct member we defer this
8554         // action until the whole struct has been processed.
8555         if (isa<MemberExpr>(IE)) {
8556           // Insert the pointer into Info to be processed by
8557           // generateInfoForComponentList. Because it is a member pointer
8558           // without a pointee, no entry will be generated for it, therefore
8559           // we need to generate one after the whole struct has been processed.
8560           // Nonetheless, generateInfoForComponentList must be called to take
8561           // the pointer into account for the calculation of the range of the
8562           // partial struct.
8563           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8564                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8565                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8566           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8567         } else {
8568           llvm::Value *Ptr;
8569           if (IE->isGLValue())
8570             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8571           else
8572             Ptr = CGF.EmitScalarExpr(IE);
8573           CombinedInfo.Exprs.push_back(VD);
8574           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8575           CombinedInfo.Pointers.push_back(Ptr);
8576           CombinedInfo.Sizes.push_back(
8577               llvm::Constant::getNullValue(CGF.Int64Ty));
8578           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8579           CombinedInfo.Mappers.push_back(nullptr);
8580         }
8581       }
8582     }
8583 
8584     for (const auto &Data : Info) {
8585       StructRangeInfoTy PartialStruct;
8586       // Temporary generated information.
8587       MapCombinedInfoTy CurInfo;
8588       const Decl *D = Data.first;
8589       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8590       for (const auto &M : Data.second) {
8591         for (const MapInfo &L : M) {
8592           assert(!L.Components.empty() &&
8593                  "Not expecting declaration with no component lists.");
8594 
8595           // Remember the current base pointer index.
8596           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8597           CurInfo.NonContigInfo.IsNonContiguous =
8598               L.Components.back().isNonContiguous();
8599           generateInfoForComponentList(
8600               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8601               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8602               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8603 
8604           // If this entry relates with a device pointer, set the relevant
8605           // declaration and add the 'return pointer' flag.
8606           if (L.ReturnDevicePointer) {
8607             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8608                    "Unexpected number of mapped base pointers.");
8609 
8610             const ValueDecl *RelevantVD =
8611                 L.Components.back().getAssociatedDeclaration();
8612             assert(RelevantVD &&
8613                    "No relevant declaration related with device pointer??");
8614 
8615             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8616                 RelevantVD);
8617             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8618           }
8619         }
8620       }
8621 
8622       // Append any pending zero-length pointers which are struct members and
8623       // used with use_device_ptr or use_device_addr.
8624       auto CI = DeferredInfo.find(Data.first);
8625       if (CI != DeferredInfo.end()) {
8626         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8627           llvm::Value *BasePtr;
8628           llvm::Value *Ptr;
8629           if (L.ForDeviceAddr) {
8630             if (L.IE->isGLValue())
8631               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8632             else
8633               Ptr = this->CGF.EmitScalarExpr(L.IE);
8634             BasePtr = Ptr;
8635             // Entry is RETURN_PARAM. Also, set the placeholder value
8636             // MEMBER_OF=FFFF so that the entry is later updated with the
8637             // correct value of MEMBER_OF.
8638             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8639           } else {
8640             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8641             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8642                                              L.IE->getExprLoc());
8643             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8644             // placeholder value MEMBER_OF=FFFF so that the entry is later
8645             // updated with the correct value of MEMBER_OF.
8646             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8647                                     OMP_MAP_MEMBER_OF);
8648           }
8649           CurInfo.Exprs.push_back(L.VD);
8650           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8651           CurInfo.Pointers.push_back(Ptr);
8652           CurInfo.Sizes.push_back(
8653               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8654           CurInfo.Mappers.push_back(nullptr);
8655         }
8656       }
8657       // If there is an entry in PartialStruct it means we have a struct with
8658       // individual members mapped. Emit an extra combined entry.
8659       if (PartialStruct.Base.isValid()) {
8660         CurInfo.NonContigInfo.Dims.push_back(0);
8661         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8662       }
8663 
8664       // We need to append the results of this capture to what we already
8665       // have.
8666       CombinedInfo.append(CurInfo);
8667     }
8668     // Append data for use_device_ptr clauses.
8669     CombinedInfo.append(UseDevicePtrCombinedInfo);
8670   }
8671 
8672 public:
8673   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8674       : CurDir(&Dir), CGF(CGF) {
8675     // Extract firstprivate clause information.
8676     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8677       for (const auto *D : C->varlists())
8678         FirstPrivateDecls.try_emplace(
8679             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8680     // Extract implicit firstprivates from uses_allocators clauses.
8681     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8682       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8683         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8684         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8685           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8686                                         /*Implicit=*/true);
8687         else if (const auto *VD = dyn_cast<VarDecl>(
8688                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8689                          ->getDecl()))
8690           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8691       }
8692     }
8693     // Extract device pointer clause information.
8694     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8695       for (auto L : C->component_lists())
8696         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8697   }
8698 
8699   /// Constructor for the declare mapper directive.
8700   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8701       : CurDir(&Dir), CGF(CGF) {}
8702 
8703   /// Generate code for the combined entry if we have a partially mapped struct
8704   /// and take care of the mapping flags of the arguments corresponding to
8705   /// individual struct members.
8706   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8707                          MapFlagsArrayTy &CurTypes,
8708                          const StructRangeInfoTy &PartialStruct,
8709                          const ValueDecl *VD = nullptr,
8710                          bool NotTargetParams = true) const {
8711     if (CurTypes.size() == 1 &&
8712         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8713         !PartialStruct.IsArraySection)
8714       return;
8715     Address LBAddr = PartialStruct.LowestElem.second;
8716     Address HBAddr = PartialStruct.HighestElem.second;
8717     if (PartialStruct.HasCompleteRecord) {
8718       LBAddr = PartialStruct.LB;
8719       HBAddr = PartialStruct.LB;
8720     }
8721     CombinedInfo.Exprs.push_back(VD);
8722     // Base is the base of the struct
8723     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8724     // Pointer is the address of the lowest element
8725     llvm::Value *LB = LBAddr.getPointer();
8726     CombinedInfo.Pointers.push_back(LB);
8727     // There should not be a mapper for a combined entry.
8728     CombinedInfo.Mappers.push_back(nullptr);
8729     // Size is (addr of {highest+1} element) - (addr of lowest element)
8730     llvm::Value *HB = HBAddr.getPointer();
8731     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8732     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8733     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8734     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8735     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8736                                                   /*isSigned=*/false);
8737     CombinedInfo.Sizes.push_back(Size);
8738     // Map type is always TARGET_PARAM, if generate info for captures.
8739     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8740                                                  : OMP_MAP_TARGET_PARAM);
8741     // If any element has the present modifier, then make sure the runtime
8742     // doesn't attempt to allocate the struct.
8743     if (CurTypes.end() !=
8744         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8745           return Type & OMP_MAP_PRESENT;
8746         }))
8747       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8748     // Remove TARGET_PARAM flag from the first element
8749     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8750 
8751     // All other current entries will be MEMBER_OF the combined entry
8752     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8753     // 0xFFFF in the MEMBER_OF field).
8754     OpenMPOffloadMappingFlags MemberOfFlag =
8755         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8756     for (auto &M : CurTypes)
8757       setCorrectMemberOfFlag(M, MemberOfFlag);
8758   }
8759 
8760   /// Generate all the base pointers, section pointers, sizes, map types, and
8761   /// mappers for the extracted mappable expressions (all included in \a
8762   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8763   /// pair of the relevant declaration and index where it occurs is appended to
8764   /// the device pointers info array.
8765   void generateAllInfo(
8766       MapCombinedInfoTy &CombinedInfo,
8767       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8768           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8769     assert(CurDir.is<const OMPExecutableDirective *>() &&
8770            "Expect a executable directive");
8771     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8772     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8773   }
8774 
8775   /// Generate all the base pointers, section pointers, sizes, map types, and
8776   /// mappers for the extracted map clauses of user-defined mapper (all included
8777   /// in \a CombinedInfo).
8778   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8779     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8780            "Expect a declare mapper directive");
8781     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8782     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8783   }
8784 
8785   /// Emit capture info for lambdas for variables captured by reference.
8786   void generateInfoForLambdaCaptures(
8787       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8788       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8789     const auto *RD = VD->getType()
8790                          .getCanonicalType()
8791                          .getNonReferenceType()
8792                          ->getAsCXXRecordDecl();
8793     if (!RD || !RD->isLambda())
8794       return;
8795     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8796     LValue VDLVal = CGF.MakeAddrLValue(
8797         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8798     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8799     FieldDecl *ThisCapture = nullptr;
8800     RD->getCaptureFields(Captures, ThisCapture);
8801     if (ThisCapture) {
8802       LValue ThisLVal =
8803           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8804       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8805       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8806                                  VDLVal.getPointer(CGF));
8807       CombinedInfo.Exprs.push_back(VD);
8808       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8809       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8810       CombinedInfo.Sizes.push_back(
8811           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8812                                     CGF.Int64Ty, /*isSigned=*/true));
8813       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8814                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8815       CombinedInfo.Mappers.push_back(nullptr);
8816     }
8817     for (const LambdaCapture &LC : RD->captures()) {
8818       if (!LC.capturesVariable())
8819         continue;
8820       const VarDecl *VD = LC.getCapturedVar();
8821       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8822         continue;
8823       auto It = Captures.find(VD);
8824       assert(It != Captures.end() && "Found lambda capture without field.");
8825       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8826       if (LC.getCaptureKind() == LCK_ByRef) {
8827         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8828         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8829                                    VDLVal.getPointer(CGF));
8830         CombinedInfo.Exprs.push_back(VD);
8831         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8832         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8833         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8834             CGF.getTypeSize(
8835                 VD->getType().getCanonicalType().getNonReferenceType()),
8836             CGF.Int64Ty, /*isSigned=*/true));
8837       } else {
8838         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8839         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8840                                    VDLVal.getPointer(CGF));
8841         CombinedInfo.Exprs.push_back(VD);
8842         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8843         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8844         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8845       }
8846       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8847                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8848       CombinedInfo.Mappers.push_back(nullptr);
8849     }
8850   }
8851 
8852   /// Set correct indices for lambdas captures.
8853   void adjustMemberOfForLambdaCaptures(
8854       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8855       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8856       MapFlagsArrayTy &Types) const {
8857     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8858       // Set correct member_of idx for all implicit lambda captures.
8859       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8860                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8861         continue;
8862       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8863       assert(BasePtr && "Unable to find base lambda address.");
8864       int TgtIdx = -1;
8865       for (unsigned J = I; J > 0; --J) {
8866         unsigned Idx = J - 1;
8867         if (Pointers[Idx] != BasePtr)
8868           continue;
8869         TgtIdx = Idx;
8870         break;
8871       }
8872       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8873       // All other current entries will be MEMBER_OF the combined entry
8874       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8875       // 0xFFFF in the MEMBER_OF field).
8876       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8877       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8878     }
8879   }
8880 
8881   /// Generate the base pointers, section pointers, sizes, map types, and
8882   /// mappers associated to a given capture (all included in \a CombinedInfo).
8883   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8884                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8885                               StructRangeInfoTy &PartialStruct) const {
8886     assert(!Cap->capturesVariableArrayType() &&
8887            "Not expecting to generate map info for a variable array type!");
8888 
8889     // We need to know when we generating information for the first component
8890     const ValueDecl *VD = Cap->capturesThis()
8891                               ? nullptr
8892                               : Cap->getCapturedVar()->getCanonicalDecl();
8893 
8894     // If this declaration appears in a is_device_ptr clause we just have to
8895     // pass the pointer by value. If it is a reference to a declaration, we just
8896     // pass its value.
8897     if (DevPointersMap.count(VD)) {
8898       CombinedInfo.Exprs.push_back(VD);
8899       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8900       CombinedInfo.Pointers.push_back(Arg);
8901       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8902           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8903           /*isSigned=*/true));
8904       CombinedInfo.Types.push_back(
8905           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8906           OMP_MAP_TARGET_PARAM);
8907       CombinedInfo.Mappers.push_back(nullptr);
8908       return;
8909     }
8910 
8911     using MapData =
8912         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8913                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8914                    const ValueDecl *, const Expr *>;
8915     SmallVector<MapData, 4> DeclComponentLists;
8916     assert(CurDir.is<const OMPExecutableDirective *>() &&
8917            "Expect a executable directive");
8918     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8919     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8920       const auto *EI = C->getVarRefs().begin();
8921       for (const auto L : C->decl_component_lists(VD)) {
8922         const ValueDecl *VDecl, *Mapper;
8923         // The Expression is not correct if the mapping is implicit
8924         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8925         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8926         std::tie(VDecl, Components, Mapper) = L;
8927         assert(VDecl == VD && "We got information for the wrong declaration??");
8928         assert(!Components.empty() &&
8929                "Not expecting declaration with no component lists.");
8930         DeclComponentLists.emplace_back(Components, C->getMapType(),
8931                                         C->getMapTypeModifiers(),
8932                                         C->isImplicit(), Mapper, E);
8933         ++EI;
8934       }
8935     }
8936     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8937                                              const MapData &RHS) {
8938       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8939       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8940       bool HasPresent = !MapModifiers.empty() &&
8941                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8942                           return K == clang::OMPC_MAP_MODIFIER_present;
8943                         });
8944       bool HasAllocs = MapType == OMPC_MAP_alloc;
8945       MapModifiers = std::get<2>(RHS);
8946       MapType = std::get<1>(LHS);
8947       bool HasPresentR =
8948           !MapModifiers.empty() &&
8949           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8950             return K == clang::OMPC_MAP_MODIFIER_present;
8951           });
8952       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8953       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8954     });
8955 
8956     // Find overlapping elements (including the offset from the base element).
8957     llvm::SmallDenseMap<
8958         const MapData *,
8959         llvm::SmallVector<
8960             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8961         4>
8962         OverlappedData;
8963     size_t Count = 0;
8964     for (const MapData &L : DeclComponentLists) {
8965       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8966       OpenMPMapClauseKind MapType;
8967       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8968       bool IsImplicit;
8969       const ValueDecl *Mapper;
8970       const Expr *VarRef;
8971       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8972           L;
8973       ++Count;
8974       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8975         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8976         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8977                  VarRef) = L1;
8978         auto CI = Components.rbegin();
8979         auto CE = Components.rend();
8980         auto SI = Components1.rbegin();
8981         auto SE = Components1.rend();
8982         for (; CI != CE && SI != SE; ++CI, ++SI) {
8983           if (CI->getAssociatedExpression()->getStmtClass() !=
8984               SI->getAssociatedExpression()->getStmtClass())
8985             break;
8986           // Are we dealing with different variables/fields?
8987           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8988             break;
8989         }
8990         // Found overlapping if, at least for one component, reached the head
8991         // of the components list.
8992         if (CI == CE || SI == SE) {
8993           // Ignore it if it is the same component.
8994           if (CI == CE && SI == SE)
8995             continue;
8996           const auto It = (SI == SE) ? CI : SI;
8997           // If one component is a pointer and another one is a kind of
8998           // dereference of this pointer (array subscript, section, dereference,
8999           // etc.), it is not an overlapping.
9000           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9001               std::prev(It)
9002                   ->getAssociatedExpression()
9003                   ->getType()
9004                   ->isPointerType())
9005             continue;
9006           const MapData &BaseData = CI == CE ? L : L1;
9007           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9008               SI == SE ? Components : Components1;
9009           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9010           OverlappedElements.getSecond().push_back(SubData);
9011         }
9012       }
9013     }
9014     // Sort the overlapped elements for each item.
9015     llvm::SmallVector<const FieldDecl *, 4> Layout;
9016     if (!OverlappedData.empty()) {
9017       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9018       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9019       while (BaseType != OrigType) {
9020         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9021         OrigType = BaseType->getPointeeOrArrayElementType();
9022       }
9023 
9024       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9025         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9026       else {
9027         const auto *RD = BaseType->getAsRecordDecl();
9028         Layout.append(RD->field_begin(), RD->field_end());
9029       }
9030     }
9031     for (auto &Pair : OverlappedData) {
9032       llvm::stable_sort(
9033           Pair.getSecond(),
9034           [&Layout](
9035               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9036               OMPClauseMappableExprCommon::MappableExprComponentListRef
9037                   Second) {
9038             auto CI = First.rbegin();
9039             auto CE = First.rend();
9040             auto SI = Second.rbegin();
9041             auto SE = Second.rend();
9042             for (; CI != CE && SI != SE; ++CI, ++SI) {
9043               if (CI->getAssociatedExpression()->getStmtClass() !=
9044                   SI->getAssociatedExpression()->getStmtClass())
9045                 break;
9046               // Are we dealing with different variables/fields?
9047               if (CI->getAssociatedDeclaration() !=
9048                   SI->getAssociatedDeclaration())
9049                 break;
9050             }
9051 
9052             // Lists contain the same elements.
9053             if (CI == CE && SI == SE)
9054               return false;
9055 
9056             // List with less elements is less than list with more elements.
9057             if (CI == CE || SI == SE)
9058               return CI == CE;
9059 
9060             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9061             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9062             if (FD1->getParent() == FD2->getParent())
9063               return FD1->getFieldIndex() < FD2->getFieldIndex();
9064             const auto It =
9065                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9066                   return FD == FD1 || FD == FD2;
9067                 });
9068             return *It == FD1;
9069           });
9070     }
9071 
9072     // Associated with a capture, because the mapping flags depend on it.
9073     // Go through all of the elements with the overlapped elements.
9074     bool IsFirstComponentList = true;
9075     for (const auto &Pair : OverlappedData) {
9076       const MapData &L = *Pair.getFirst();
9077       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9078       OpenMPMapClauseKind MapType;
9079       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9080       bool IsImplicit;
9081       const ValueDecl *Mapper;
9082       const Expr *VarRef;
9083       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9084           L;
9085       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9086           OverlappedComponents = Pair.getSecond();
9087       generateInfoForComponentList(
9088           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9089           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9090           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9091       IsFirstComponentList = false;
9092     }
9093     // Go through other elements without overlapped elements.
9094     for (const MapData &L : DeclComponentLists) {
9095       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9096       OpenMPMapClauseKind MapType;
9097       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9098       bool IsImplicit;
9099       const ValueDecl *Mapper;
9100       const Expr *VarRef;
9101       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9102           L;
9103       auto It = OverlappedData.find(&L);
9104       if (It == OverlappedData.end())
9105         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9106                                      Components, CombinedInfo, PartialStruct,
9107                                      IsFirstComponentList, IsImplicit, Mapper,
9108                                      /*ForDeviceAddr=*/false, VD, VarRef);
9109       IsFirstComponentList = false;
9110     }
9111   }
9112 
9113   /// Generate the default map information for a given capture \a CI,
9114   /// record field declaration \a RI and captured value \a CV.
9115   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9116                               const FieldDecl &RI, llvm::Value *CV,
9117                               MapCombinedInfoTy &CombinedInfo) const {
9118     bool IsImplicit = true;
9119     // Do the default mapping.
9120     if (CI.capturesThis()) {
9121       CombinedInfo.Exprs.push_back(nullptr);
9122       CombinedInfo.BasePointers.push_back(CV);
9123       CombinedInfo.Pointers.push_back(CV);
9124       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9125       CombinedInfo.Sizes.push_back(
9126           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9127                                     CGF.Int64Ty, /*isSigned=*/true));
9128       // Default map type.
9129       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9130     } else if (CI.capturesVariableByCopy()) {
9131       const VarDecl *VD = CI.getCapturedVar();
9132       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9133       CombinedInfo.BasePointers.push_back(CV);
9134       CombinedInfo.Pointers.push_back(CV);
9135       if (!RI.getType()->isAnyPointerType()) {
9136         // We have to signal to the runtime captures passed by value that are
9137         // not pointers.
9138         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9139         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9140             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9141       } else {
9142         // Pointers are implicitly mapped with a zero size and no flags
9143         // (other than first map that is added for all implicit maps).
9144         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9145         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9146       }
9147       auto I = FirstPrivateDecls.find(VD);
9148       if (I != FirstPrivateDecls.end())
9149         IsImplicit = I->getSecond();
9150     } else {
9151       assert(CI.capturesVariable() && "Expected captured reference.");
9152       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9153       QualType ElementType = PtrTy->getPointeeType();
9154       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9155           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9156       // The default map type for a scalar/complex type is 'to' because by
9157       // default the value doesn't have to be retrieved. For an aggregate
9158       // type, the default is 'tofrom'.
9159       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9160       const VarDecl *VD = CI.getCapturedVar();
9161       auto I = FirstPrivateDecls.find(VD);
9162       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9163       CombinedInfo.BasePointers.push_back(CV);
9164       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9165         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9166             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9167             AlignmentSource::Decl));
9168         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9169       } else {
9170         CombinedInfo.Pointers.push_back(CV);
9171       }
9172       if (I != FirstPrivateDecls.end())
9173         IsImplicit = I->getSecond();
9174     }
9175     // Every default map produces a single argument which is a target parameter.
9176     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9177 
9178     // Add flag stating this is an implicit map.
9179     if (IsImplicit)
9180       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9181 
9182     // No user-defined mapper for default mapping.
9183     CombinedInfo.Mappers.push_back(nullptr);
9184   }
9185 };
9186 } // anonymous namespace
9187 
9188 static void emitNonContiguousDescriptor(
9189     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9190     CGOpenMPRuntime::TargetDataInfo &Info) {
9191   CodeGenModule &CGM = CGF.CGM;
9192   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9193       &NonContigInfo = CombinedInfo.NonContigInfo;
9194 
9195   // Build an array of struct descriptor_dim and then assign it to
9196   // offload_args.
9197   //
9198   // struct descriptor_dim {
9199   //  uint64_t offset;
9200   //  uint64_t count;
9201   //  uint64_t stride
9202   // };
9203   ASTContext &C = CGF.getContext();
9204   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9205   RecordDecl *RD;
9206   RD = C.buildImplicitRecord("descriptor_dim");
9207   RD->startDefinition();
9208   addFieldToRecordDecl(C, RD, Int64Ty);
9209   addFieldToRecordDecl(C, RD, Int64Ty);
9210   addFieldToRecordDecl(C, RD, Int64Ty);
9211   RD->completeDefinition();
9212   QualType DimTy = C.getRecordType(RD);
9213 
9214   enum { OffsetFD = 0, CountFD, StrideFD };
9215   // We need two index variable here since the size of "Dims" is the same as the
9216   // size of Components, however, the size of offset, count, and stride is equal
9217   // to the size of base declaration that is non-contiguous.
9218   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9219     // Skip emitting ir if dimension size is 1 since it cannot be
9220     // non-contiguous.
9221     if (NonContigInfo.Dims[I] == 1)
9222       continue;
9223     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9224     QualType ArrayTy =
9225         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9226     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9227     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9228       unsigned RevIdx = EE - II - 1;
9229       LValue DimsLVal = CGF.MakeAddrLValue(
9230           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9231       // Offset
9232       LValue OffsetLVal = CGF.EmitLValueForField(
9233           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9234       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9235       // Count
9236       LValue CountLVal = CGF.EmitLValueForField(
9237           DimsLVal, *std::next(RD->field_begin(), CountFD));
9238       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9239       // Stride
9240       LValue StrideLVal = CGF.EmitLValueForField(
9241           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9242       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9243     }
9244     // args[I] = &dims
9245     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9246         DimsAddr, CGM.Int8PtrTy);
9247     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9248         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9249         Info.PointersArray, 0, I);
9250     Address PAddr(P, CGF.getPointerAlign());
9251     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9252     ++L;
9253   }
9254 }
9255 
9256 /// Emit a string constant containing the names of the values mapped to the
9257 /// offloading runtime library.
9258 llvm::Constant *
9259 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9260                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9261   llvm::Constant *SrcLocStr;
9262   if (!MapExprs.getMapDecl()) {
9263     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9264   } else {
9265     std::string ExprName = "";
9266     if (MapExprs.getMapExpr()) {
9267       PrintingPolicy P(CGF.getContext().getLangOpts());
9268       llvm::raw_string_ostream OS(ExprName);
9269       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9270       OS.flush();
9271     } else {
9272       ExprName = MapExprs.getMapDecl()->getNameAsString();
9273     }
9274 
9275     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9276     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9277     const char *FileName = PLoc.getFilename();
9278     unsigned Line = PLoc.getLine();
9279     unsigned Column = PLoc.getColumn();
9280     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9281                                                 Line, Column);
9282   }
9283   return SrcLocStr;
9284 }
9285 
9286 /// Emit the arrays used to pass the captures and map information to the
9287 /// offloading runtime library. If there is no map or capture information,
9288 /// return nullptr by reference.
9289 static void emitOffloadingArrays(
9290     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9291     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9292     bool IsNonContiguous = false) {
9293   CodeGenModule &CGM = CGF.CGM;
9294   ASTContext &Ctx = CGF.getContext();
9295 
9296   // Reset the array information.
9297   Info.clearArrayInfo();
9298   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9299 
9300   if (Info.NumberOfPtrs) {
9301     // Detect if we have any capture size requiring runtime evaluation of the
9302     // size so that a constant array could be eventually used.
9303     bool hasRuntimeEvaluationCaptureSize = false;
9304     for (llvm::Value *S : CombinedInfo.Sizes)
9305       if (!isa<llvm::Constant>(S)) {
9306         hasRuntimeEvaluationCaptureSize = true;
9307         break;
9308       }
9309 
9310     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9311     QualType PointerArrayType = Ctx.getConstantArrayType(
9312         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9313         /*IndexTypeQuals=*/0);
9314 
9315     Info.BasePointersArray =
9316         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9317     Info.PointersArray =
9318         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9319     Address MappersArray =
9320         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9321     Info.MappersArray = MappersArray.getPointer();
9322 
9323     // If we don't have any VLA types or other types that require runtime
9324     // evaluation, we can use a constant array for the map sizes, otherwise we
9325     // need to fill up the arrays as we do for the pointers.
9326     QualType Int64Ty =
9327         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9328     if (hasRuntimeEvaluationCaptureSize) {
9329       QualType SizeArrayType = Ctx.getConstantArrayType(
9330           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9331           /*IndexTypeQuals=*/0);
9332       Info.SizesArray =
9333           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9334     } else {
9335       // We expect all the sizes to be constant, so we collect them to create
9336       // a constant array.
9337       SmallVector<llvm::Constant *, 16> ConstSizes;
9338       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9339         if (IsNonContiguous &&
9340             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9341           ConstSizes.push_back(llvm::ConstantInt::get(
9342               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9343         } else {
9344           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9345         }
9346       }
9347 
9348       auto *SizesArrayInit = llvm::ConstantArray::get(
9349           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9350       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9351       auto *SizesArrayGbl = new llvm::GlobalVariable(
9352           CGM.getModule(), SizesArrayInit->getType(),
9353           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9354           SizesArrayInit, Name);
9355       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9356       Info.SizesArray = SizesArrayGbl;
9357     }
9358 
9359     // The map types are always constant so we don't need to generate code to
9360     // fill arrays. Instead, we create an array constant.
9361     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9362     llvm::copy(CombinedInfo.Types, Mapping.begin());
9363     std::string MaptypesName =
9364         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9365     auto *MapTypesArrayGbl =
9366         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9367     Info.MapTypesArray = MapTypesArrayGbl;
9368 
9369     // The information types are only built if there is debug information
9370     // requested.
9371     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9372       Info.MapNamesArray = llvm::Constant::getNullValue(
9373           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9374     } else {
9375       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9376         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9377       };
9378       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9379       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9380       std::string MapnamesName =
9381           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9382       auto *MapNamesArrayGbl =
9383           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9384       Info.MapNamesArray = MapNamesArrayGbl;
9385     }
9386 
9387     // If there's a present map type modifier, it must not be applied to the end
9388     // of a region, so generate a separate map type array in that case.
9389     if (Info.separateBeginEndCalls()) {
9390       bool EndMapTypesDiffer = false;
9391       for (uint64_t &Type : Mapping) {
9392         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9393           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9394           EndMapTypesDiffer = true;
9395         }
9396       }
9397       if (EndMapTypesDiffer) {
9398         MapTypesArrayGbl =
9399             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9400         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9401       }
9402     }
9403 
9404     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9405       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9406       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9407           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9408           Info.BasePointersArray, 0, I);
9409       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9410           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9411       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9412       CGF.Builder.CreateStore(BPVal, BPAddr);
9413 
9414       if (Info.requiresDevicePointerInfo())
9415         if (const ValueDecl *DevVD =
9416                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9417           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9418 
9419       llvm::Value *PVal = CombinedInfo.Pointers[I];
9420       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9421           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9422           Info.PointersArray, 0, I);
9423       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9424           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9425       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9426       CGF.Builder.CreateStore(PVal, PAddr);
9427 
9428       if (hasRuntimeEvaluationCaptureSize) {
9429         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9430             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9431             Info.SizesArray,
9432             /*Idx0=*/0,
9433             /*Idx1=*/I);
9434         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9435         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9436                                                           CGM.Int64Ty,
9437                                                           /*isSigned=*/true),
9438                                 SAddr);
9439       }
9440 
9441       // Fill up the mapper array.
9442       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9443       if (CombinedInfo.Mappers[I]) {
9444         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9445             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9446         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9447         Info.HasMapper = true;
9448       }
9449       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9450       CGF.Builder.CreateStore(MFunc, MAddr);
9451     }
9452   }
9453 
9454   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9455       Info.NumberOfPtrs == 0)
9456     return;
9457 
9458   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9459 }
9460 
9461 namespace {
9462 /// Additional arguments for emitOffloadingArraysArgument function.
9463 struct ArgumentsOptions {
9464   bool ForEndCall = false;
9465   ArgumentsOptions() = default;
9466   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9467 };
9468 } // namespace
9469 
9470 /// Emit the arguments to be passed to the runtime library based on the
9471 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9472 /// ForEndCall, emit map types to be passed for the end of the region instead of
9473 /// the beginning.
9474 static void emitOffloadingArraysArgument(
9475     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9476     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9477     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9478     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9479     const ArgumentsOptions &Options = ArgumentsOptions()) {
9480   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9481          "expected region end call to runtime only when end call is separate");
9482   CodeGenModule &CGM = CGF.CGM;
9483   if (Info.NumberOfPtrs) {
9484     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9485         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9486         Info.BasePointersArray,
9487         /*Idx0=*/0, /*Idx1=*/0);
9488     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9489         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9490         Info.PointersArray,
9491         /*Idx0=*/0,
9492         /*Idx1=*/0);
9493     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9494         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9495         /*Idx0=*/0, /*Idx1=*/0);
9496     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9497         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9498         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9499                                                     : Info.MapTypesArray,
9500         /*Idx0=*/0,
9501         /*Idx1=*/0);
9502 
9503     // Only emit the mapper information arrays if debug information is
9504     // requested.
9505     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9506       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9507     else
9508       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9509           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9510           Info.MapNamesArray,
9511           /*Idx0=*/0,
9512           /*Idx1=*/0);
9513     // If there is no user-defined mapper, set the mapper array to nullptr to
9514     // avoid an unnecessary data privatization
9515     if (!Info.HasMapper)
9516       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9517     else
9518       MappersArrayArg =
9519           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9520   } else {
9521     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9522     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9523     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9524     MapTypesArrayArg =
9525         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9526     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9527     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9528   }
9529 }
9530 
9531 /// Check for inner distribute directive.
9532 static const OMPExecutableDirective *
9533 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9534   const auto *CS = D.getInnermostCapturedStmt();
9535   const auto *Body =
9536       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9537   const Stmt *ChildStmt =
9538       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9539 
9540   if (const auto *NestedDir =
9541           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9542     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9543     switch (D.getDirectiveKind()) {
9544     case OMPD_target:
9545       if (isOpenMPDistributeDirective(DKind))
9546         return NestedDir;
9547       if (DKind == OMPD_teams) {
9548         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9549             /*IgnoreCaptured=*/true);
9550         if (!Body)
9551           return nullptr;
9552         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9553         if (const auto *NND =
9554                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9555           DKind = NND->getDirectiveKind();
9556           if (isOpenMPDistributeDirective(DKind))
9557             return NND;
9558         }
9559       }
9560       return nullptr;
9561     case OMPD_target_teams:
9562       if (isOpenMPDistributeDirective(DKind))
9563         return NestedDir;
9564       return nullptr;
9565     case OMPD_target_parallel:
9566     case OMPD_target_simd:
9567     case OMPD_target_parallel_for:
9568     case OMPD_target_parallel_for_simd:
9569       return nullptr;
9570     case OMPD_target_teams_distribute:
9571     case OMPD_target_teams_distribute_simd:
9572     case OMPD_target_teams_distribute_parallel_for:
9573     case OMPD_target_teams_distribute_parallel_for_simd:
9574     case OMPD_parallel:
9575     case OMPD_for:
9576     case OMPD_parallel_for:
9577     case OMPD_parallel_master:
9578     case OMPD_parallel_sections:
9579     case OMPD_for_simd:
9580     case OMPD_parallel_for_simd:
9581     case OMPD_cancel:
9582     case OMPD_cancellation_point:
9583     case OMPD_ordered:
9584     case OMPD_threadprivate:
9585     case OMPD_allocate:
9586     case OMPD_task:
9587     case OMPD_simd:
9588     case OMPD_tile:
9589     case OMPD_unroll:
9590     case OMPD_sections:
9591     case OMPD_section:
9592     case OMPD_single:
9593     case OMPD_master:
9594     case OMPD_critical:
9595     case OMPD_taskyield:
9596     case OMPD_barrier:
9597     case OMPD_taskwait:
9598     case OMPD_taskgroup:
9599     case OMPD_atomic:
9600     case OMPD_flush:
9601     case OMPD_depobj:
9602     case OMPD_scan:
9603     case OMPD_teams:
9604     case OMPD_target_data:
9605     case OMPD_target_exit_data:
9606     case OMPD_target_enter_data:
9607     case OMPD_distribute:
9608     case OMPD_distribute_simd:
9609     case OMPD_distribute_parallel_for:
9610     case OMPD_distribute_parallel_for_simd:
9611     case OMPD_teams_distribute:
9612     case OMPD_teams_distribute_simd:
9613     case OMPD_teams_distribute_parallel_for:
9614     case OMPD_teams_distribute_parallel_for_simd:
9615     case OMPD_target_update:
9616     case OMPD_declare_simd:
9617     case OMPD_declare_variant:
9618     case OMPD_begin_declare_variant:
9619     case OMPD_end_declare_variant:
9620     case OMPD_declare_target:
9621     case OMPD_end_declare_target:
9622     case OMPD_declare_reduction:
9623     case OMPD_declare_mapper:
9624     case OMPD_taskloop:
9625     case OMPD_taskloop_simd:
9626     case OMPD_master_taskloop:
9627     case OMPD_master_taskloop_simd:
9628     case OMPD_parallel_master_taskloop:
9629     case OMPD_parallel_master_taskloop_simd:
9630     case OMPD_requires:
9631     case OMPD_unknown:
9632     default:
9633       llvm_unreachable("Unexpected directive.");
9634     }
9635   }
9636 
9637   return nullptr;
9638 }
9639 
9640 /// Emit the user-defined mapper function. The code generation follows the
9641 /// pattern in the example below.
9642 /// \code
9643 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9644 ///                                           void *base, void *begin,
9645 ///                                           int64_t size, int64_t type,
9646 ///                                           void *name = nullptr) {
9647 ///   // Allocate space for an array section first or add a base/begin for
9648 ///   // pointer dereference.
9649 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9650 ///       !maptype.IsDelete)
9651 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9652 ///                                 size*sizeof(Ty), clearToFromMember(type));
9653 ///   // Map members.
9654 ///   for (unsigned i = 0; i < size; i++) {
9655 ///     // For each component specified by this mapper:
9656 ///     for (auto c : begin[i]->all_components) {
9657 ///       if (c.hasMapper())
9658 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9659 ///                       c.arg_type, c.arg_name);
9660 ///       else
9661 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9662 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9663 ///                                     c.arg_name);
9664 ///     }
9665 ///   }
9666 ///   // Delete the array section.
9667 ///   if (size > 1 && maptype.IsDelete)
9668 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9669 ///                                 size*sizeof(Ty), clearToFromMember(type));
9670 /// }
9671 /// \endcode
9672 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9673                                             CodeGenFunction *CGF) {
9674   if (UDMMap.count(D) > 0)
9675     return;
9676   ASTContext &C = CGM.getContext();
9677   QualType Ty = D->getType();
9678   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9679   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9680   auto *MapperVarDecl =
9681       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9682   SourceLocation Loc = D->getLocation();
9683   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9684 
9685   // Prepare mapper function arguments and attributes.
9686   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9687                               C.VoidPtrTy, ImplicitParamDecl::Other);
9688   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9689                             ImplicitParamDecl::Other);
9690   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9691                              C.VoidPtrTy, ImplicitParamDecl::Other);
9692   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9693                             ImplicitParamDecl::Other);
9694   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9695                             ImplicitParamDecl::Other);
9696   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9697                             ImplicitParamDecl::Other);
9698   FunctionArgList Args;
9699   Args.push_back(&HandleArg);
9700   Args.push_back(&BaseArg);
9701   Args.push_back(&BeginArg);
9702   Args.push_back(&SizeArg);
9703   Args.push_back(&TypeArg);
9704   Args.push_back(&NameArg);
9705   const CGFunctionInfo &FnInfo =
9706       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9707   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9708   SmallString<64> TyStr;
9709   llvm::raw_svector_ostream Out(TyStr);
9710   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9711   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9712   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9713                                     Name, &CGM.getModule());
9714   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9715   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9716   // Start the mapper function code generation.
9717   CodeGenFunction MapperCGF(CGM);
9718   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9719   // Compute the starting and end addresses of array elements.
9720   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9721       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9722       C.getPointerType(Int64Ty), Loc);
9723   // Prepare common arguments for array initiation and deletion.
9724   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9725       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9726       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9727   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9728       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9729       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9730   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9731       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9732       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9733   // Convert the size in bytes into the number of array elements.
9734   Size = MapperCGF.Builder.CreateExactUDiv(
9735       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9736   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9737       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9738   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9739   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9740       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9741       C.getPointerType(Int64Ty), Loc);
9742   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9743       MapperCGF.GetAddrOfLocalVar(&NameArg),
9744       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9745 
9746   // Emit array initiation if this is an array section and \p MapType indicates
9747   // that memory allocation is required.
9748   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9749   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9750                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9751 
9752   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9753 
9754   // Emit the loop header block.
9755   MapperCGF.EmitBlock(HeadBB);
9756   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9757   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9758   // Evaluate whether the initial condition is satisfied.
9759   llvm::Value *IsEmpty =
9760       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9761   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9762   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9763 
9764   // Emit the loop body block.
9765   MapperCGF.EmitBlock(BodyBB);
9766   llvm::BasicBlock *LastBB = BodyBB;
9767   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9768       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9769   PtrPHI->addIncoming(PtrBegin, EntryBB);
9770   Address PtrCurrent =
9771       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9772                           .getAlignment()
9773                           .alignmentOfArrayElement(ElementSize));
9774   // Privatize the declared variable of mapper to be the current array element.
9775   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9776   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9777   (void)Scope.Privatize();
9778 
9779   // Get map clause information. Fill up the arrays with all mapped variables.
9780   MappableExprsHandler::MapCombinedInfoTy Info;
9781   MappableExprsHandler MEHandler(*D, MapperCGF);
9782   MEHandler.generateAllInfoForMapper(Info);
9783 
9784   // Call the runtime API __tgt_mapper_num_components to get the number of
9785   // pre-existing components.
9786   llvm::Value *OffloadingArgs[] = {Handle};
9787   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9788       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9789                                             OMPRTL___tgt_mapper_num_components),
9790       OffloadingArgs);
9791   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9792       PreviousSize,
9793       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9794 
9795   // Fill up the runtime mapper handle for all components.
9796   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9797     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9798         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9799     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9800         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9801     llvm::Value *CurSizeArg = Info.Sizes[I];
9802     llvm::Value *CurNameArg =
9803         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9804             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9805             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9806 
9807     // Extract the MEMBER_OF field from the map type.
9808     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9809     llvm::Value *MemberMapType =
9810         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9811 
9812     // Combine the map type inherited from user-defined mapper with that
9813     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9814     // bits of the \a MapType, which is the input argument of the mapper
9815     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9816     // bits of MemberMapType.
9817     // [OpenMP 5.0], 1.2.6. map-type decay.
9818     //        | alloc |  to   | from  | tofrom | release | delete
9819     // ----------------------------------------------------------
9820     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9821     // to     | alloc |  to   | alloc |   to   | release | delete
9822     // from   | alloc | alloc | from  |  from  | release | delete
9823     // tofrom | alloc |  to   | from  | tofrom | release | delete
9824     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9825         MapType,
9826         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9827                                    MappableExprsHandler::OMP_MAP_FROM));
9828     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9829     llvm::BasicBlock *AllocElseBB =
9830         MapperCGF.createBasicBlock("omp.type.alloc.else");
9831     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9832     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9833     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9834     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9835     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9836     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9837     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9838     MapperCGF.EmitBlock(AllocBB);
9839     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9840         MemberMapType,
9841         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9842                                      MappableExprsHandler::OMP_MAP_FROM)));
9843     MapperCGF.Builder.CreateBr(EndBB);
9844     MapperCGF.EmitBlock(AllocElseBB);
9845     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9846         LeftToFrom,
9847         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9848     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9849     // In case of to, clear OMP_MAP_FROM.
9850     MapperCGF.EmitBlock(ToBB);
9851     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9852         MemberMapType,
9853         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9854     MapperCGF.Builder.CreateBr(EndBB);
9855     MapperCGF.EmitBlock(ToElseBB);
9856     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9857         LeftToFrom,
9858         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9859     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9860     // In case of from, clear OMP_MAP_TO.
9861     MapperCGF.EmitBlock(FromBB);
9862     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9863         MemberMapType,
9864         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9865     // In case of tofrom, do nothing.
9866     MapperCGF.EmitBlock(EndBB);
9867     LastBB = EndBB;
9868     llvm::PHINode *CurMapType =
9869         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9870     CurMapType->addIncoming(AllocMapType, AllocBB);
9871     CurMapType->addIncoming(ToMapType, ToBB);
9872     CurMapType->addIncoming(FromMapType, FromBB);
9873     CurMapType->addIncoming(MemberMapType, ToElseBB);
9874 
9875     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9876                                      CurSizeArg, CurMapType, CurNameArg};
9877     if (Info.Mappers[I]) {
9878       // Call the corresponding mapper function.
9879       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9880           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9881       assert(MapperFunc && "Expect a valid mapper function is available.");
9882       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9883     } else {
9884       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9885       // data structure.
9886       MapperCGF.EmitRuntimeCall(
9887           OMPBuilder.getOrCreateRuntimeFunction(
9888               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9889           OffloadingArgs);
9890     }
9891   }
9892 
9893   // Update the pointer to point to the next element that needs to be mapped,
9894   // and check whether we have mapped all elements.
9895   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9896       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9897   PtrPHI->addIncoming(PtrNext, LastBB);
9898   llvm::Value *IsDone =
9899       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9900   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9901   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9902 
9903   MapperCGF.EmitBlock(ExitBB);
9904   // Emit array deletion if this is an array section and \p MapType indicates
9905   // that deletion is required.
9906   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9907                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9908 
9909   // Emit the function exit block.
9910   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9911   MapperCGF.FinishFunction();
9912   UDMMap.try_emplace(D, Fn);
9913   if (CGF) {
9914     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9915     Decls.second.push_back(D);
9916   }
9917 }
9918 
9919 /// Emit the array initialization or deletion portion for user-defined mapper
9920 /// code generation. First, it evaluates whether an array section is mapped and
9921 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9922 /// true, and \a MapType indicates to not delete this array, array
9923 /// initialization code is generated. If \a IsInit is false, and \a MapType
9924 /// indicates to not this array, array deletion code is generated.
9925 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9926     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9927     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9928     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9929     bool IsInit) {
9930   StringRef Prefix = IsInit ? ".init" : ".del";
9931 
9932   // Evaluate if this is an array section.
9933   llvm::BasicBlock *BodyBB =
9934       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9935   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9936       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9937   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9938       MapType,
9939       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9940   llvm::Value *DeleteCond;
9941   llvm::Value *Cond;
9942   if (IsInit) {
9943     // base != begin?
9944     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9945         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9946     // IsPtrAndObj?
9947     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9948         MapType,
9949         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9950     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9951     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9952     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9953     DeleteCond = MapperCGF.Builder.CreateIsNull(
9954         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9955   } else {
9956     Cond = IsArray;
9957     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9958         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9959   }
9960   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9961   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9962 
9963   MapperCGF.EmitBlock(BodyBB);
9964   // Get the array size by multiplying element size and element number (i.e., \p
9965   // Size).
9966   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9967       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9968   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9969   // memory allocation/deletion purpose only.
9970   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9971       MapType,
9972       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9973                                    MappableExprsHandler::OMP_MAP_FROM)));
9974   MapTypeArg = MapperCGF.Builder.CreateOr(
9975       MapTypeArg,
9976       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
9977 
9978   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9979   // data structure.
9980   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9981                                    ArraySize, MapTypeArg, MapName};
9982   MapperCGF.EmitRuntimeCall(
9983       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9984                                             OMPRTL___tgt_push_mapper_component),
9985       OffloadingArgs);
9986 }
9987 
9988 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9989     const OMPDeclareMapperDecl *D) {
9990   auto I = UDMMap.find(D);
9991   if (I != UDMMap.end())
9992     return I->second;
9993   emitUserDefinedMapper(D);
9994   return UDMMap.lookup(D);
9995 }
9996 
9997 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9998     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9999     llvm::Value *DeviceID,
10000     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10001                                      const OMPLoopDirective &D)>
10002         SizeEmitter) {
10003   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10004   const OMPExecutableDirective *TD = &D;
10005   // Get nested teams distribute kind directive, if any.
10006   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10007     TD = getNestedDistributeDirective(CGM.getContext(), D);
10008   if (!TD)
10009     return;
10010   const auto *LD = cast<OMPLoopDirective>(TD);
10011   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10012                                                          PrePostActionTy &) {
10013     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10014       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10015       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10016       CGF.EmitRuntimeCall(
10017           OMPBuilder.getOrCreateRuntimeFunction(
10018               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10019           Args);
10020     }
10021   };
10022   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10023 }
10024 
10025 void CGOpenMPRuntime::emitTargetCall(
10026     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10027     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10028     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10029     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10030                                      const OMPLoopDirective &D)>
10031         SizeEmitter) {
10032   if (!CGF.HaveInsertPoint())
10033     return;
10034 
10035   assert(OutlinedFn && "Invalid outlined function!");
10036 
10037   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10038                                  D.hasClausesOfKind<OMPNowaitClause>();
10039   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10040   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10041   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10042                                             PrePostActionTy &) {
10043     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10044   };
10045   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10046 
10047   CodeGenFunction::OMPTargetDataInfo InputInfo;
10048   llvm::Value *MapTypesArray = nullptr;
10049   llvm::Value *MapNamesArray = nullptr;
10050   // Fill up the pointer arrays and transfer execution to the device.
10051   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10052                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10053                     &CapturedVars,
10054                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10055     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10056       // Reverse offloading is not supported, so just execute on the host.
10057       if (RequiresOuterTask) {
10058         CapturedVars.clear();
10059         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10060       }
10061       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10062       return;
10063     }
10064 
10065     // On top of the arrays that were filled up, the target offloading call
10066     // takes as arguments the device id as well as the host pointer. The host
10067     // pointer is used by the runtime library to identify the current target
10068     // region, so it only has to be unique and not necessarily point to
10069     // anything. It could be the pointer to the outlined function that
10070     // implements the target region, but we aren't using that so that the
10071     // compiler doesn't need to keep that, and could therefore inline the host
10072     // function if proven worthwhile during optimization.
10073 
10074     // From this point on, we need to have an ID of the target region defined.
10075     assert(OutlinedFnID && "Invalid outlined function ID!");
10076 
10077     // Emit device ID if any.
10078     llvm::Value *DeviceID;
10079     if (Device.getPointer()) {
10080       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10081               Device.getInt() == OMPC_DEVICE_device_num) &&
10082              "Expected device_num modifier.");
10083       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10084       DeviceID =
10085           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10086     } else {
10087       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10088     }
10089 
10090     // Emit the number of elements in the offloading arrays.
10091     llvm::Value *PointerNum =
10092         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10093 
10094     // Return value of the runtime offloading call.
10095     llvm::Value *Return;
10096 
10097     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10098     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10099 
10100     // Source location for the ident struct
10101     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10102 
10103     // Emit tripcount for the target loop-based directive.
10104     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10105 
10106     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10107     // The target region is an outlined function launched by the runtime
10108     // via calls __tgt_target() or __tgt_target_teams().
10109     //
10110     // __tgt_target() launches a target region with one team and one thread,
10111     // executing a serial region.  This master thread may in turn launch
10112     // more threads within its team upon encountering a parallel region,
10113     // however, no additional teams can be launched on the device.
10114     //
10115     // __tgt_target_teams() launches a target region with one or more teams,
10116     // each with one or more threads.  This call is required for target
10117     // constructs such as:
10118     //  'target teams'
10119     //  'target' / 'teams'
10120     //  'target teams distribute parallel for'
10121     //  'target parallel'
10122     // and so on.
10123     //
10124     // Note that on the host and CPU targets, the runtime implementation of
10125     // these calls simply call the outlined function without forking threads.
10126     // The outlined functions themselves have runtime calls to
10127     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10128     // the compiler in emitTeamsCall() and emitParallelCall().
10129     //
10130     // In contrast, on the NVPTX target, the implementation of
10131     // __tgt_target_teams() launches a GPU kernel with the requested number
10132     // of teams and threads so no additional calls to the runtime are required.
10133     if (NumTeams) {
10134       // If we have NumTeams defined this means that we have an enclosed teams
10135       // region. Therefore we also expect to have NumThreads defined. These two
10136       // values should be defined in the presence of a teams directive,
10137       // regardless of having any clauses associated. If the user is using teams
10138       // but no clauses, these two values will be the default that should be
10139       // passed to the runtime library - a 32-bit integer with the value zero.
10140       assert(NumThreads && "Thread limit expression should be available along "
10141                            "with number of teams.");
10142       llvm::Value *OffloadingArgs[] = {RTLoc,
10143                                        DeviceID,
10144                                        OutlinedFnID,
10145                                        PointerNum,
10146                                        InputInfo.BasePointersArray.getPointer(),
10147                                        InputInfo.PointersArray.getPointer(),
10148                                        InputInfo.SizesArray.getPointer(),
10149                                        MapTypesArray,
10150                                        MapNamesArray,
10151                                        InputInfo.MappersArray.getPointer(),
10152                                        NumTeams,
10153                                        NumThreads};
10154       Return = CGF.EmitRuntimeCall(
10155           OMPBuilder.getOrCreateRuntimeFunction(
10156               CGM.getModule(), HasNowait
10157                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10158                                    : OMPRTL___tgt_target_teams_mapper),
10159           OffloadingArgs);
10160     } else {
10161       llvm::Value *OffloadingArgs[] = {RTLoc,
10162                                        DeviceID,
10163                                        OutlinedFnID,
10164                                        PointerNum,
10165                                        InputInfo.BasePointersArray.getPointer(),
10166                                        InputInfo.PointersArray.getPointer(),
10167                                        InputInfo.SizesArray.getPointer(),
10168                                        MapTypesArray,
10169                                        MapNamesArray,
10170                                        InputInfo.MappersArray.getPointer()};
10171       Return = CGF.EmitRuntimeCall(
10172           OMPBuilder.getOrCreateRuntimeFunction(
10173               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10174                                          : OMPRTL___tgt_target_mapper),
10175           OffloadingArgs);
10176     }
10177 
10178     // Check the error code and execute the host version if required.
10179     llvm::BasicBlock *OffloadFailedBlock =
10180         CGF.createBasicBlock("omp_offload.failed");
10181     llvm::BasicBlock *OffloadContBlock =
10182         CGF.createBasicBlock("omp_offload.cont");
10183     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10184     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10185 
10186     CGF.EmitBlock(OffloadFailedBlock);
10187     if (RequiresOuterTask) {
10188       CapturedVars.clear();
10189       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10190     }
10191     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10192     CGF.EmitBranch(OffloadContBlock);
10193 
10194     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10195   };
10196 
10197   // Notify that the host version must be executed.
10198   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10199                     RequiresOuterTask](CodeGenFunction &CGF,
10200                                        PrePostActionTy &) {
10201     if (RequiresOuterTask) {
10202       CapturedVars.clear();
10203       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10204     }
10205     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10206   };
10207 
10208   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10209                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10210                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10211     // Fill up the arrays with all the captured variables.
10212     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10213 
10214     // Get mappable expression information.
10215     MappableExprsHandler MEHandler(D, CGF);
10216     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10217     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10218 
10219     auto RI = CS.getCapturedRecordDecl()->field_begin();
10220     auto *CV = CapturedVars.begin();
10221     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10222                                               CE = CS.capture_end();
10223          CI != CE; ++CI, ++RI, ++CV) {
10224       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10225       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10226 
10227       // VLA sizes are passed to the outlined region by copy and do not have map
10228       // information associated.
10229       if (CI->capturesVariableArrayType()) {
10230         CurInfo.Exprs.push_back(nullptr);
10231         CurInfo.BasePointers.push_back(*CV);
10232         CurInfo.Pointers.push_back(*CV);
10233         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10234             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10235         // Copy to the device as an argument. No need to retrieve it.
10236         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10237                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10238                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10239         CurInfo.Mappers.push_back(nullptr);
10240       } else {
10241         // If we have any information in the map clause, we use it, otherwise we
10242         // just do a default mapping.
10243         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10244         if (!CI->capturesThis())
10245           MappedVarSet.insert(CI->getCapturedVar());
10246         else
10247           MappedVarSet.insert(nullptr);
10248         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10249           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10250         // Generate correct mapping for variables captured by reference in
10251         // lambdas.
10252         if (CI->capturesVariable())
10253           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10254                                                   CurInfo, LambdaPointers);
10255       }
10256       // We expect to have at least an element of information for this capture.
10257       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10258              "Non-existing map pointer for capture!");
10259       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10260              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10261              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10262              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10263              "Inconsistent map information sizes!");
10264 
10265       // If there is an entry in PartialStruct it means we have a struct with
10266       // individual members mapped. Emit an extra combined entry.
10267       if (PartialStruct.Base.isValid()) {
10268         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10269         MEHandler.emitCombinedEntry(
10270             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10271             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10272       }
10273 
10274       // We need to append the results of this capture to what we already have.
10275       CombinedInfo.append(CurInfo);
10276     }
10277     // Adjust MEMBER_OF flags for the lambdas captures.
10278     MEHandler.adjustMemberOfForLambdaCaptures(
10279         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10280         CombinedInfo.Types);
10281     // Map any list items in a map clause that were not captures because they
10282     // weren't referenced within the construct.
10283     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10284 
10285     TargetDataInfo Info;
10286     // Fill up the arrays and create the arguments.
10287     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10288     emitOffloadingArraysArgument(
10289         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10290         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10291         {/*ForEndTask=*/false});
10292 
10293     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10294     InputInfo.BasePointersArray =
10295         Address(Info.BasePointersArray, CGM.getPointerAlign());
10296     InputInfo.PointersArray =
10297         Address(Info.PointersArray, CGM.getPointerAlign());
10298     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10299     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10300     MapTypesArray = Info.MapTypesArray;
10301     MapNamesArray = Info.MapNamesArray;
10302     if (RequiresOuterTask)
10303       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10304     else
10305       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10306   };
10307 
10308   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10309                              CodeGenFunction &CGF, PrePostActionTy &) {
10310     if (RequiresOuterTask) {
10311       CodeGenFunction::OMPTargetDataInfo InputInfo;
10312       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10313     } else {
10314       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10315     }
10316   };
10317 
10318   // If we have a target function ID it means that we need to support
10319   // offloading, otherwise, just execute on the host. We need to execute on host
10320   // regardless of the conditional in the if clause if, e.g., the user do not
10321   // specify target triples.
10322   if (OutlinedFnID) {
10323     if (IfCond) {
10324       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10325     } else {
10326       RegionCodeGenTy ThenRCG(TargetThenGen);
10327       ThenRCG(CGF);
10328     }
10329   } else {
10330     RegionCodeGenTy ElseRCG(TargetElseGen);
10331     ElseRCG(CGF);
10332   }
10333 }
10334 
10335 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10336                                                     StringRef ParentName) {
10337   if (!S)
10338     return;
10339 
10340   // Codegen OMP target directives that offload compute to the device.
10341   bool RequiresDeviceCodegen =
10342       isa<OMPExecutableDirective>(S) &&
10343       isOpenMPTargetExecutionDirective(
10344           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10345 
10346   if (RequiresDeviceCodegen) {
10347     const auto &E = *cast<OMPExecutableDirective>(S);
10348     unsigned DeviceID;
10349     unsigned FileID;
10350     unsigned Line;
10351     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10352                              FileID, Line);
10353 
10354     // Is this a target region that should not be emitted as an entry point? If
10355     // so just signal we are done with this target region.
10356     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10357                                                             ParentName, Line))
10358       return;
10359 
10360     switch (E.getDirectiveKind()) {
10361     case OMPD_target:
10362       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10363                                                    cast<OMPTargetDirective>(E));
10364       break;
10365     case OMPD_target_parallel:
10366       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10367           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10368       break;
10369     case OMPD_target_teams:
10370       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10371           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10372       break;
10373     case OMPD_target_teams_distribute:
10374       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10375           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10376       break;
10377     case OMPD_target_teams_distribute_simd:
10378       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10379           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10380       break;
10381     case OMPD_target_parallel_for:
10382       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10383           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10384       break;
10385     case OMPD_target_parallel_for_simd:
10386       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10387           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10388       break;
10389     case OMPD_target_simd:
10390       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10391           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10392       break;
10393     case OMPD_target_teams_distribute_parallel_for:
10394       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10395           CGM, ParentName,
10396           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10397       break;
10398     case OMPD_target_teams_distribute_parallel_for_simd:
10399       CodeGenFunction::
10400           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10401               CGM, ParentName,
10402               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10403       break;
10404     case OMPD_parallel:
10405     case OMPD_for:
10406     case OMPD_parallel_for:
10407     case OMPD_parallel_master:
10408     case OMPD_parallel_sections:
10409     case OMPD_for_simd:
10410     case OMPD_parallel_for_simd:
10411     case OMPD_cancel:
10412     case OMPD_cancellation_point:
10413     case OMPD_ordered:
10414     case OMPD_threadprivate:
10415     case OMPD_allocate:
10416     case OMPD_task:
10417     case OMPD_simd:
10418     case OMPD_tile:
10419     case OMPD_unroll:
10420     case OMPD_sections:
10421     case OMPD_section:
10422     case OMPD_single:
10423     case OMPD_master:
10424     case OMPD_critical:
10425     case OMPD_taskyield:
10426     case OMPD_barrier:
10427     case OMPD_taskwait:
10428     case OMPD_taskgroup:
10429     case OMPD_atomic:
10430     case OMPD_flush:
10431     case OMPD_depobj:
10432     case OMPD_scan:
10433     case OMPD_teams:
10434     case OMPD_target_data:
10435     case OMPD_target_exit_data:
10436     case OMPD_target_enter_data:
10437     case OMPD_distribute:
10438     case OMPD_distribute_simd:
10439     case OMPD_distribute_parallel_for:
10440     case OMPD_distribute_parallel_for_simd:
10441     case OMPD_teams_distribute:
10442     case OMPD_teams_distribute_simd:
10443     case OMPD_teams_distribute_parallel_for:
10444     case OMPD_teams_distribute_parallel_for_simd:
10445     case OMPD_target_update:
10446     case OMPD_declare_simd:
10447     case OMPD_declare_variant:
10448     case OMPD_begin_declare_variant:
10449     case OMPD_end_declare_variant:
10450     case OMPD_declare_target:
10451     case OMPD_end_declare_target:
10452     case OMPD_declare_reduction:
10453     case OMPD_declare_mapper:
10454     case OMPD_taskloop:
10455     case OMPD_taskloop_simd:
10456     case OMPD_master_taskloop:
10457     case OMPD_master_taskloop_simd:
10458     case OMPD_parallel_master_taskloop:
10459     case OMPD_parallel_master_taskloop_simd:
10460     case OMPD_requires:
10461     case OMPD_unknown:
10462     default:
10463       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10464     }
10465     return;
10466   }
10467 
10468   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10469     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10470       return;
10471 
10472     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10473     return;
10474   }
10475 
10476   // If this is a lambda function, look into its body.
10477   if (const auto *L = dyn_cast<LambdaExpr>(S))
10478     S = L->getBody();
10479 
10480   // Keep looking for target regions recursively.
10481   for (const Stmt *II : S->children())
10482     scanForTargetRegionsFunctions(II, ParentName);
10483 }
10484 
10485 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10486   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10487       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10488   if (!DevTy)
10489     return false;
10490   // Do not emit device_type(nohost) functions for the host.
10491   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10492     return true;
10493   // Do not emit device_type(host) functions for the device.
10494   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10495     return true;
10496   return false;
10497 }
10498 
10499 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10500   // If emitting code for the host, we do not process FD here. Instead we do
10501   // the normal code generation.
10502   if (!CGM.getLangOpts().OpenMPIsDevice) {
10503     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10504       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10505                                   CGM.getLangOpts().OpenMPIsDevice))
10506         return true;
10507     return false;
10508   }
10509 
10510   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10511   // Try to detect target regions in the function.
10512   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10513     StringRef Name = CGM.getMangledName(GD);
10514     scanForTargetRegionsFunctions(FD->getBody(), Name);
10515     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10516                                 CGM.getLangOpts().OpenMPIsDevice))
10517       return true;
10518   }
10519 
10520   // Do not to emit function if it is not marked as declare target.
10521   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10522          AlreadyEmittedTargetDecls.count(VD) == 0;
10523 }
10524 
10525 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10526   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10527                               CGM.getLangOpts().OpenMPIsDevice))
10528     return true;
10529 
10530   if (!CGM.getLangOpts().OpenMPIsDevice)
10531     return false;
10532 
10533   // Check if there are Ctors/Dtors in this declaration and look for target
10534   // regions in it. We use the complete variant to produce the kernel name
10535   // mangling.
10536   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10537   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10538     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10539       StringRef ParentName =
10540           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10541       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10542     }
10543     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10544       StringRef ParentName =
10545           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10546       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10547     }
10548   }
10549 
10550   // Do not to emit variable if it is not marked as declare target.
10551   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10552       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10553           cast<VarDecl>(GD.getDecl()));
10554   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10555       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10556        HasRequiresUnifiedSharedMemory)) {
10557     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10558     return true;
10559   }
10560   return false;
10561 }
10562 
10563 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10564                                                    llvm::Constant *Addr) {
10565   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10566       !CGM.getLangOpts().OpenMPIsDevice)
10567     return;
10568 
10569   // If we have host/nohost variables, they do not need to be registered.
10570   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10571       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10572   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10573     return;
10574 
10575   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10576       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10577   if (!Res) {
10578     if (CGM.getLangOpts().OpenMPIsDevice) {
10579       // Register non-target variables being emitted in device code (debug info
10580       // may cause this).
10581       StringRef VarName = CGM.getMangledName(VD);
10582       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10583     }
10584     return;
10585   }
10586   // Register declare target variables.
10587   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10588   StringRef VarName;
10589   CharUnits VarSize;
10590   llvm::GlobalValue::LinkageTypes Linkage;
10591 
10592   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10593       !HasRequiresUnifiedSharedMemory) {
10594     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10595     VarName = CGM.getMangledName(VD);
10596     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10597       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10598       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10599     } else {
10600       VarSize = CharUnits::Zero();
10601     }
10602     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10603     // Temp solution to prevent optimizations of the internal variables.
10604     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10605       // Do not create a "ref-variable" if the original is not also available
10606       // on the host.
10607       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10608         return;
10609       std::string RefName = getName({VarName, "ref"});
10610       if (!CGM.GetGlobalValue(RefName)) {
10611         llvm::Constant *AddrRef =
10612             getOrCreateInternalVariable(Addr->getType(), RefName);
10613         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10614         GVAddrRef->setConstant(/*Val=*/true);
10615         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10616         GVAddrRef->setInitializer(Addr);
10617         CGM.addCompilerUsedGlobal(GVAddrRef);
10618       }
10619     }
10620   } else {
10621     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10622             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10623              HasRequiresUnifiedSharedMemory)) &&
10624            "Declare target attribute must link or to with unified memory.");
10625     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10626       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10627     else
10628       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10629 
10630     if (CGM.getLangOpts().OpenMPIsDevice) {
10631       VarName = Addr->getName();
10632       Addr = nullptr;
10633     } else {
10634       VarName = getAddrOfDeclareTargetVar(VD).getName();
10635       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10636     }
10637     VarSize = CGM.getPointerSize();
10638     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10639   }
10640 
10641   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10642       VarName, Addr, VarSize, Flags, Linkage);
10643 }
10644 
10645 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10646   if (isa<FunctionDecl>(GD.getDecl()) ||
10647       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10648     return emitTargetFunctions(GD);
10649 
10650   return emitTargetGlobalVariable(GD);
10651 }
10652 
10653 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10654   for (const VarDecl *VD : DeferredGlobalVariables) {
10655     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10656         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10657     if (!Res)
10658       continue;
10659     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10660         !HasRequiresUnifiedSharedMemory) {
10661       CGM.EmitGlobal(VD);
10662     } else {
10663       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10664               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10665                HasRequiresUnifiedSharedMemory)) &&
10666              "Expected link clause or to clause with unified memory.");
10667       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10668     }
10669   }
10670 }
10671 
10672 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10673     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10674   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10675          " Expected target-based directive.");
10676 }
10677 
10678 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10679   for (const OMPClause *Clause : D->clauselists()) {
10680     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10681       HasRequiresUnifiedSharedMemory = true;
10682     } else if (const auto *AC =
10683                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10684       switch (AC->getAtomicDefaultMemOrderKind()) {
10685       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10686         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10687         break;
10688       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10689         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10690         break;
10691       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10692         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10693         break;
10694       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10695         break;
10696       }
10697     }
10698   }
10699 }
10700 
10701 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10702   return RequiresAtomicOrdering;
10703 }
10704 
10705 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10706                                                        LangAS &AS) {
10707   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10708     return false;
10709   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10710   switch(A->getAllocatorType()) {
10711   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10712   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10713   // Not supported, fallback to the default mem space.
10714   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10715   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10716   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10717   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10718   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10719   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10720   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10721     AS = LangAS::Default;
10722     return true;
10723   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10724     llvm_unreachable("Expected predefined allocator for the variables with the "
10725                      "static storage.");
10726   }
10727   return false;
10728 }
10729 
10730 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10731   return HasRequiresUnifiedSharedMemory;
10732 }
10733 
10734 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10735     CodeGenModule &CGM)
10736     : CGM(CGM) {
10737   if (CGM.getLangOpts().OpenMPIsDevice) {
10738     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10739     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10740   }
10741 }
10742 
10743 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10744   if (CGM.getLangOpts().OpenMPIsDevice)
10745     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10746 }
10747 
10748 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10749   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10750     return true;
10751 
10752   const auto *D = cast<FunctionDecl>(GD.getDecl());
10753   // Do not to emit function if it is marked as declare target as it was already
10754   // emitted.
10755   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10756     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10757       if (auto *F = dyn_cast_or_null<llvm::Function>(
10758               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10759         return !F->isDeclaration();
10760       return false;
10761     }
10762     return true;
10763   }
10764 
10765   return !AlreadyEmittedTargetDecls.insert(D).second;
10766 }
10767 
10768 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10769   // If we don't have entries or if we are emitting code for the device, we
10770   // don't need to do anything.
10771   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10772       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10773       (OffloadEntriesInfoManager.empty() &&
10774        !HasEmittedDeclareTargetRegion &&
10775        !HasEmittedTargetRegion))
10776     return nullptr;
10777 
10778   // Create and register the function that handles the requires directives.
10779   ASTContext &C = CGM.getContext();
10780 
10781   llvm::Function *RequiresRegFn;
10782   {
10783     CodeGenFunction CGF(CGM);
10784     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10785     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10786     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10787     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10788     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10789     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10790     // TODO: check for other requires clauses.
10791     // The requires directive takes effect only when a target region is
10792     // present in the compilation unit. Otherwise it is ignored and not
10793     // passed to the runtime. This avoids the runtime from throwing an error
10794     // for mismatching requires clauses across compilation units that don't
10795     // contain at least 1 target region.
10796     assert((HasEmittedTargetRegion ||
10797             HasEmittedDeclareTargetRegion ||
10798             !OffloadEntriesInfoManager.empty()) &&
10799            "Target or declare target region expected.");
10800     if (HasRequiresUnifiedSharedMemory)
10801       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10802     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10803                             CGM.getModule(), OMPRTL___tgt_register_requires),
10804                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10805     CGF.FinishFunction();
10806   }
10807   return RequiresRegFn;
10808 }
10809 
10810 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10811                                     const OMPExecutableDirective &D,
10812                                     SourceLocation Loc,
10813                                     llvm::Function *OutlinedFn,
10814                                     ArrayRef<llvm::Value *> CapturedVars) {
10815   if (!CGF.HaveInsertPoint())
10816     return;
10817 
10818   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10819   CodeGenFunction::RunCleanupsScope Scope(CGF);
10820 
10821   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10822   llvm::Value *Args[] = {
10823       RTLoc,
10824       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10825       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10826   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10827   RealArgs.append(std::begin(Args), std::end(Args));
10828   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10829 
10830   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10831       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10832   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10833 }
10834 
10835 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10836                                          const Expr *NumTeams,
10837                                          const Expr *ThreadLimit,
10838                                          SourceLocation Loc) {
10839   if (!CGF.HaveInsertPoint())
10840     return;
10841 
10842   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10843 
10844   llvm::Value *NumTeamsVal =
10845       NumTeams
10846           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10847                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10848           : CGF.Builder.getInt32(0);
10849 
10850   llvm::Value *ThreadLimitVal =
10851       ThreadLimit
10852           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10853                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10854           : CGF.Builder.getInt32(0);
10855 
10856   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10857   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10858                                      ThreadLimitVal};
10859   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10860                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10861                       PushNumTeamsArgs);
10862 }
10863 
10864 void CGOpenMPRuntime::emitTargetDataCalls(
10865     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10866     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10867   if (!CGF.HaveInsertPoint())
10868     return;
10869 
10870   // Action used to replace the default codegen action and turn privatization
10871   // off.
10872   PrePostActionTy NoPrivAction;
10873 
10874   // Generate the code for the opening of the data environment. Capture all the
10875   // arguments of the runtime call by reference because they are used in the
10876   // closing of the region.
10877   auto &&BeginThenGen = [this, &D, Device, &Info,
10878                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10879     // Fill up the arrays with all the mapped variables.
10880     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10881 
10882     // Get map clause information.
10883     MappableExprsHandler MEHandler(D, CGF);
10884     MEHandler.generateAllInfo(CombinedInfo);
10885 
10886     // Fill up the arrays and create the arguments.
10887     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10888                          /*IsNonContiguous=*/true);
10889 
10890     llvm::Value *BasePointersArrayArg = nullptr;
10891     llvm::Value *PointersArrayArg = nullptr;
10892     llvm::Value *SizesArrayArg = nullptr;
10893     llvm::Value *MapTypesArrayArg = nullptr;
10894     llvm::Value *MapNamesArrayArg = nullptr;
10895     llvm::Value *MappersArrayArg = nullptr;
10896     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10897                                  SizesArrayArg, MapTypesArrayArg,
10898                                  MapNamesArrayArg, MappersArrayArg, Info);
10899 
10900     // Emit device ID if any.
10901     llvm::Value *DeviceID = nullptr;
10902     if (Device) {
10903       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10904                                            CGF.Int64Ty, /*isSigned=*/true);
10905     } else {
10906       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10907     }
10908 
10909     // Emit the number of elements in the offloading arrays.
10910     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10911     //
10912     // Source location for the ident struct
10913     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10914 
10915     llvm::Value *OffloadingArgs[] = {RTLoc,
10916                                      DeviceID,
10917                                      PointerNum,
10918                                      BasePointersArrayArg,
10919                                      PointersArrayArg,
10920                                      SizesArrayArg,
10921                                      MapTypesArrayArg,
10922                                      MapNamesArrayArg,
10923                                      MappersArrayArg};
10924     CGF.EmitRuntimeCall(
10925         OMPBuilder.getOrCreateRuntimeFunction(
10926             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10927         OffloadingArgs);
10928 
10929     // If device pointer privatization is required, emit the body of the region
10930     // here. It will have to be duplicated: with and without privatization.
10931     if (!Info.CaptureDeviceAddrMap.empty())
10932       CodeGen(CGF);
10933   };
10934 
10935   // Generate code for the closing of the data region.
10936   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10937                                                 PrePostActionTy &) {
10938     assert(Info.isValid() && "Invalid data environment closing arguments.");
10939 
10940     llvm::Value *BasePointersArrayArg = nullptr;
10941     llvm::Value *PointersArrayArg = nullptr;
10942     llvm::Value *SizesArrayArg = nullptr;
10943     llvm::Value *MapTypesArrayArg = nullptr;
10944     llvm::Value *MapNamesArrayArg = nullptr;
10945     llvm::Value *MappersArrayArg = nullptr;
10946     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10947                                  SizesArrayArg, MapTypesArrayArg,
10948                                  MapNamesArrayArg, MappersArrayArg, Info,
10949                                  {/*ForEndCall=*/true});
10950 
10951     // Emit device ID if any.
10952     llvm::Value *DeviceID = nullptr;
10953     if (Device) {
10954       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10955                                            CGF.Int64Ty, /*isSigned=*/true);
10956     } else {
10957       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10958     }
10959 
10960     // Emit the number of elements in the offloading arrays.
10961     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10962 
10963     // Source location for the ident struct
10964     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10965 
10966     llvm::Value *OffloadingArgs[] = {RTLoc,
10967                                      DeviceID,
10968                                      PointerNum,
10969                                      BasePointersArrayArg,
10970                                      PointersArrayArg,
10971                                      SizesArrayArg,
10972                                      MapTypesArrayArg,
10973                                      MapNamesArrayArg,
10974                                      MappersArrayArg};
10975     CGF.EmitRuntimeCall(
10976         OMPBuilder.getOrCreateRuntimeFunction(
10977             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10978         OffloadingArgs);
10979   };
10980 
10981   // If we need device pointer privatization, we need to emit the body of the
10982   // region with no privatization in the 'else' branch of the conditional.
10983   // Otherwise, we don't have to do anything.
10984   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10985                                                          PrePostActionTy &) {
10986     if (!Info.CaptureDeviceAddrMap.empty()) {
10987       CodeGen.setAction(NoPrivAction);
10988       CodeGen(CGF);
10989     }
10990   };
10991 
10992   // We don't have to do anything to close the region if the if clause evaluates
10993   // to false.
10994   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10995 
10996   if (IfCond) {
10997     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10998   } else {
10999     RegionCodeGenTy RCG(BeginThenGen);
11000     RCG(CGF);
11001   }
11002 
11003   // If we don't require privatization of device pointers, we emit the body in
11004   // between the runtime calls. This avoids duplicating the body code.
11005   if (Info.CaptureDeviceAddrMap.empty()) {
11006     CodeGen.setAction(NoPrivAction);
11007     CodeGen(CGF);
11008   }
11009 
11010   if (IfCond) {
11011     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11012   } else {
11013     RegionCodeGenTy RCG(EndThenGen);
11014     RCG(CGF);
11015   }
11016 }
11017 
11018 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11019     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11020     const Expr *Device) {
11021   if (!CGF.HaveInsertPoint())
11022     return;
11023 
11024   assert((isa<OMPTargetEnterDataDirective>(D) ||
11025           isa<OMPTargetExitDataDirective>(D) ||
11026           isa<OMPTargetUpdateDirective>(D)) &&
11027          "Expecting either target enter, exit data, or update directives.");
11028 
11029   CodeGenFunction::OMPTargetDataInfo InputInfo;
11030   llvm::Value *MapTypesArray = nullptr;
11031   llvm::Value *MapNamesArray = nullptr;
11032   // Generate the code for the opening of the data environment.
11033   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11034                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11035     // Emit device ID if any.
11036     llvm::Value *DeviceID = nullptr;
11037     if (Device) {
11038       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11039                                            CGF.Int64Ty, /*isSigned=*/true);
11040     } else {
11041       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11042     }
11043 
11044     // Emit the number of elements in the offloading arrays.
11045     llvm::Constant *PointerNum =
11046         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11047 
11048     // Source location for the ident struct
11049     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11050 
11051     llvm::Value *OffloadingArgs[] = {RTLoc,
11052                                      DeviceID,
11053                                      PointerNum,
11054                                      InputInfo.BasePointersArray.getPointer(),
11055                                      InputInfo.PointersArray.getPointer(),
11056                                      InputInfo.SizesArray.getPointer(),
11057                                      MapTypesArray,
11058                                      MapNamesArray,
11059                                      InputInfo.MappersArray.getPointer()};
11060 
11061     // Select the right runtime function call for each standalone
11062     // directive.
11063     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11064     RuntimeFunction RTLFn;
11065     switch (D.getDirectiveKind()) {
11066     case OMPD_target_enter_data:
11067       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11068                         : OMPRTL___tgt_target_data_begin_mapper;
11069       break;
11070     case OMPD_target_exit_data:
11071       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11072                         : OMPRTL___tgt_target_data_end_mapper;
11073       break;
11074     case OMPD_target_update:
11075       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11076                         : OMPRTL___tgt_target_data_update_mapper;
11077       break;
11078     case OMPD_parallel:
11079     case OMPD_for:
11080     case OMPD_parallel_for:
11081     case OMPD_parallel_master:
11082     case OMPD_parallel_sections:
11083     case OMPD_for_simd:
11084     case OMPD_parallel_for_simd:
11085     case OMPD_cancel:
11086     case OMPD_cancellation_point:
11087     case OMPD_ordered:
11088     case OMPD_threadprivate:
11089     case OMPD_allocate:
11090     case OMPD_task:
11091     case OMPD_simd:
11092     case OMPD_tile:
11093     case OMPD_unroll:
11094     case OMPD_sections:
11095     case OMPD_section:
11096     case OMPD_single:
11097     case OMPD_master:
11098     case OMPD_critical:
11099     case OMPD_taskyield:
11100     case OMPD_barrier:
11101     case OMPD_taskwait:
11102     case OMPD_taskgroup:
11103     case OMPD_atomic:
11104     case OMPD_flush:
11105     case OMPD_depobj:
11106     case OMPD_scan:
11107     case OMPD_teams:
11108     case OMPD_target_data:
11109     case OMPD_distribute:
11110     case OMPD_distribute_simd:
11111     case OMPD_distribute_parallel_for:
11112     case OMPD_distribute_parallel_for_simd:
11113     case OMPD_teams_distribute:
11114     case OMPD_teams_distribute_simd:
11115     case OMPD_teams_distribute_parallel_for:
11116     case OMPD_teams_distribute_parallel_for_simd:
11117     case OMPD_declare_simd:
11118     case OMPD_declare_variant:
11119     case OMPD_begin_declare_variant:
11120     case OMPD_end_declare_variant:
11121     case OMPD_declare_target:
11122     case OMPD_end_declare_target:
11123     case OMPD_declare_reduction:
11124     case OMPD_declare_mapper:
11125     case OMPD_taskloop:
11126     case OMPD_taskloop_simd:
11127     case OMPD_master_taskloop:
11128     case OMPD_master_taskloop_simd:
11129     case OMPD_parallel_master_taskloop:
11130     case OMPD_parallel_master_taskloop_simd:
11131     case OMPD_target:
11132     case OMPD_target_simd:
11133     case OMPD_target_teams_distribute:
11134     case OMPD_target_teams_distribute_simd:
11135     case OMPD_target_teams_distribute_parallel_for:
11136     case OMPD_target_teams_distribute_parallel_for_simd:
11137     case OMPD_target_teams:
11138     case OMPD_target_parallel:
11139     case OMPD_target_parallel_for:
11140     case OMPD_target_parallel_for_simd:
11141     case OMPD_requires:
11142     case OMPD_unknown:
11143     default:
11144       llvm_unreachable("Unexpected standalone target data directive.");
11145       break;
11146     }
11147     CGF.EmitRuntimeCall(
11148         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11149         OffloadingArgs);
11150   };
11151 
11152   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11153                           &MapNamesArray](CodeGenFunction &CGF,
11154                                           PrePostActionTy &) {
11155     // Fill up the arrays with all the mapped variables.
11156     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11157 
11158     // Get map clause information.
11159     MappableExprsHandler MEHandler(D, CGF);
11160     MEHandler.generateAllInfo(CombinedInfo);
11161 
11162     TargetDataInfo Info;
11163     // Fill up the arrays and create the arguments.
11164     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11165                          /*IsNonContiguous=*/true);
11166     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11167                              D.hasClausesOfKind<OMPNowaitClause>();
11168     emitOffloadingArraysArgument(
11169         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11170         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11171         {/*ForEndTask=*/false});
11172     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11173     InputInfo.BasePointersArray =
11174         Address(Info.BasePointersArray, CGM.getPointerAlign());
11175     InputInfo.PointersArray =
11176         Address(Info.PointersArray, CGM.getPointerAlign());
11177     InputInfo.SizesArray =
11178         Address(Info.SizesArray, CGM.getPointerAlign());
11179     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11180     MapTypesArray = Info.MapTypesArray;
11181     MapNamesArray = Info.MapNamesArray;
11182     if (RequiresOuterTask)
11183       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11184     else
11185       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11186   };
11187 
11188   if (IfCond) {
11189     emitIfClause(CGF, IfCond, TargetThenGen,
11190                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11191   } else {
11192     RegionCodeGenTy ThenRCG(TargetThenGen);
11193     ThenRCG(CGF);
11194   }
11195 }
11196 
11197 namespace {
11198   /// Kind of parameter in a function with 'declare simd' directive.
11199   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11200   /// Attribute set of the parameter.
11201   struct ParamAttrTy {
11202     ParamKindTy Kind = Vector;
11203     llvm::APSInt StrideOrArg;
11204     llvm::APSInt Alignment;
11205   };
11206 } // namespace
11207 
11208 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11209                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11210   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11211   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11212   // of that clause. The VLEN value must be power of 2.
11213   // In other case the notion of the function`s "characteristic data type" (CDT)
11214   // is used to compute the vector length.
11215   // CDT is defined in the following order:
11216   //   a) For non-void function, the CDT is the return type.
11217   //   b) If the function has any non-uniform, non-linear parameters, then the
11218   //   CDT is the type of the first such parameter.
11219   //   c) If the CDT determined by a) or b) above is struct, union, or class
11220   //   type which is pass-by-value (except for the type that maps to the
11221   //   built-in complex data type), the characteristic data type is int.
11222   //   d) If none of the above three cases is applicable, the CDT is int.
11223   // The VLEN is then determined based on the CDT and the size of vector
11224   // register of that ISA for which current vector version is generated. The
11225   // VLEN is computed using the formula below:
11226   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11227   // where vector register size specified in section 3.2.1 Registers and the
11228   // Stack Frame of original AMD64 ABI document.
11229   QualType RetType = FD->getReturnType();
11230   if (RetType.isNull())
11231     return 0;
11232   ASTContext &C = FD->getASTContext();
11233   QualType CDT;
11234   if (!RetType.isNull() && !RetType->isVoidType()) {
11235     CDT = RetType;
11236   } else {
11237     unsigned Offset = 0;
11238     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11239       if (ParamAttrs[Offset].Kind == Vector)
11240         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11241       ++Offset;
11242     }
11243     if (CDT.isNull()) {
11244       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11245         if (ParamAttrs[I + Offset].Kind == Vector) {
11246           CDT = FD->getParamDecl(I)->getType();
11247           break;
11248         }
11249       }
11250     }
11251   }
11252   if (CDT.isNull())
11253     CDT = C.IntTy;
11254   CDT = CDT->getCanonicalTypeUnqualified();
11255   if (CDT->isRecordType() || CDT->isUnionType())
11256     CDT = C.IntTy;
11257   return C.getTypeSize(CDT);
11258 }
11259 
11260 static void
11261 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11262                            const llvm::APSInt &VLENVal,
11263                            ArrayRef<ParamAttrTy> ParamAttrs,
11264                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11265   struct ISADataTy {
11266     char ISA;
11267     unsigned VecRegSize;
11268   };
11269   ISADataTy ISAData[] = {
11270       {
11271           'b', 128
11272       }, // SSE
11273       {
11274           'c', 256
11275       }, // AVX
11276       {
11277           'd', 256
11278       }, // AVX2
11279       {
11280           'e', 512
11281       }, // AVX512
11282   };
11283   llvm::SmallVector<char, 2> Masked;
11284   switch (State) {
11285   case OMPDeclareSimdDeclAttr::BS_Undefined:
11286     Masked.push_back('N');
11287     Masked.push_back('M');
11288     break;
11289   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11290     Masked.push_back('N');
11291     break;
11292   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11293     Masked.push_back('M');
11294     break;
11295   }
11296   for (char Mask : Masked) {
11297     for (const ISADataTy &Data : ISAData) {
11298       SmallString<256> Buffer;
11299       llvm::raw_svector_ostream Out(Buffer);
11300       Out << "_ZGV" << Data.ISA << Mask;
11301       if (!VLENVal) {
11302         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11303         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11304         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11305       } else {
11306         Out << VLENVal;
11307       }
11308       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11309         switch (ParamAttr.Kind){
11310         case LinearWithVarStride:
11311           Out << 's' << ParamAttr.StrideOrArg;
11312           break;
11313         case Linear:
11314           Out << 'l';
11315           if (ParamAttr.StrideOrArg != 1)
11316             Out << ParamAttr.StrideOrArg;
11317           break;
11318         case Uniform:
11319           Out << 'u';
11320           break;
11321         case Vector:
11322           Out << 'v';
11323           break;
11324         }
11325         if (!!ParamAttr.Alignment)
11326           Out << 'a' << ParamAttr.Alignment;
11327       }
11328       Out << '_' << Fn->getName();
11329       Fn->addFnAttr(Out.str());
11330     }
11331   }
11332 }
11333 
11334 // This are the Functions that are needed to mangle the name of the
11335 // vector functions generated by the compiler, according to the rules
11336 // defined in the "Vector Function ABI specifications for AArch64",
11337 // available at
11338 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11339 
11340 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11341 ///
11342 /// TODO: Need to implement the behavior for reference marked with a
11343 /// var or no linear modifiers (1.b in the section). For this, we
11344 /// need to extend ParamKindTy to support the linear modifiers.
11345 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11346   QT = QT.getCanonicalType();
11347 
11348   if (QT->isVoidType())
11349     return false;
11350 
11351   if (Kind == ParamKindTy::Uniform)
11352     return false;
11353 
11354   if (Kind == ParamKindTy::Linear)
11355     return false;
11356 
11357   // TODO: Handle linear references with modifiers
11358 
11359   if (Kind == ParamKindTy::LinearWithVarStride)
11360     return false;
11361 
11362   return true;
11363 }
11364 
11365 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11366 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11367   QT = QT.getCanonicalType();
11368   unsigned Size = C.getTypeSize(QT);
11369 
11370   // Only scalars and complex within 16 bytes wide set PVB to true.
11371   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11372     return false;
11373 
11374   if (QT->isFloatingType())
11375     return true;
11376 
11377   if (QT->isIntegerType())
11378     return true;
11379 
11380   if (QT->isPointerType())
11381     return true;
11382 
11383   // TODO: Add support for complex types (section 3.1.2, item 2).
11384 
11385   return false;
11386 }
11387 
11388 /// Computes the lane size (LS) of a return type or of an input parameter,
11389 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11390 /// TODO: Add support for references, section 3.2.1, item 1.
11391 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11392   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11393     QualType PTy = QT.getCanonicalType()->getPointeeType();
11394     if (getAArch64PBV(PTy, C))
11395       return C.getTypeSize(PTy);
11396   }
11397   if (getAArch64PBV(QT, C))
11398     return C.getTypeSize(QT);
11399 
11400   return C.getTypeSize(C.getUIntPtrType());
11401 }
11402 
11403 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11404 // signature of the scalar function, as defined in 3.2.2 of the
11405 // AAVFABI.
11406 static std::tuple<unsigned, unsigned, bool>
11407 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11408   QualType RetType = FD->getReturnType().getCanonicalType();
11409 
11410   ASTContext &C = FD->getASTContext();
11411 
11412   bool OutputBecomesInput = false;
11413 
11414   llvm::SmallVector<unsigned, 8> Sizes;
11415   if (!RetType->isVoidType()) {
11416     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11417     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11418       OutputBecomesInput = true;
11419   }
11420   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11421     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11422     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11423   }
11424 
11425   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11426   // The LS of a function parameter / return value can only be a power
11427   // of 2, starting from 8 bits, up to 128.
11428   assert(std::all_of(Sizes.begin(), Sizes.end(),
11429                      [](unsigned Size) {
11430                        return Size == 8 || Size == 16 || Size == 32 ||
11431                               Size == 64 || Size == 128;
11432                      }) &&
11433          "Invalid size");
11434 
11435   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11436                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11437                          OutputBecomesInput);
11438 }
11439 
11440 /// Mangle the parameter part of the vector function name according to
11441 /// their OpenMP classification. The mangling function is defined in
11442 /// section 3.5 of the AAVFABI.
11443 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11444   SmallString<256> Buffer;
11445   llvm::raw_svector_ostream Out(Buffer);
11446   for (const auto &ParamAttr : ParamAttrs) {
11447     switch (ParamAttr.Kind) {
11448     case LinearWithVarStride:
11449       Out << "ls" << ParamAttr.StrideOrArg;
11450       break;
11451     case Linear:
11452       Out << 'l';
11453       // Don't print the step value if it is not present or if it is
11454       // equal to 1.
11455       if (ParamAttr.StrideOrArg != 1)
11456         Out << ParamAttr.StrideOrArg;
11457       break;
11458     case Uniform:
11459       Out << 'u';
11460       break;
11461     case Vector:
11462       Out << 'v';
11463       break;
11464     }
11465 
11466     if (!!ParamAttr.Alignment)
11467       Out << 'a' << ParamAttr.Alignment;
11468   }
11469 
11470   return std::string(Out.str());
11471 }
11472 
11473 // Function used to add the attribute. The parameter `VLEN` is
11474 // templated to allow the use of "x" when targeting scalable functions
11475 // for SVE.
11476 template <typename T>
11477 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11478                                  char ISA, StringRef ParSeq,
11479                                  StringRef MangledName, bool OutputBecomesInput,
11480                                  llvm::Function *Fn) {
11481   SmallString<256> Buffer;
11482   llvm::raw_svector_ostream Out(Buffer);
11483   Out << Prefix << ISA << LMask << VLEN;
11484   if (OutputBecomesInput)
11485     Out << "v";
11486   Out << ParSeq << "_" << MangledName;
11487   Fn->addFnAttr(Out.str());
11488 }
11489 
11490 // Helper function to generate the Advanced SIMD names depending on
11491 // the value of the NDS when simdlen is not present.
11492 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11493                                       StringRef Prefix, char ISA,
11494                                       StringRef ParSeq, StringRef MangledName,
11495                                       bool OutputBecomesInput,
11496                                       llvm::Function *Fn) {
11497   switch (NDS) {
11498   case 8:
11499     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11500                          OutputBecomesInput, Fn);
11501     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11502                          OutputBecomesInput, Fn);
11503     break;
11504   case 16:
11505     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11506                          OutputBecomesInput, Fn);
11507     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11508                          OutputBecomesInput, Fn);
11509     break;
11510   case 32:
11511     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11512                          OutputBecomesInput, Fn);
11513     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11514                          OutputBecomesInput, Fn);
11515     break;
11516   case 64:
11517   case 128:
11518     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11519                          OutputBecomesInput, Fn);
11520     break;
11521   default:
11522     llvm_unreachable("Scalar type is too wide.");
11523   }
11524 }
11525 
11526 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11527 static void emitAArch64DeclareSimdFunction(
11528     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11529     ArrayRef<ParamAttrTy> ParamAttrs,
11530     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11531     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11532 
11533   // Get basic data for building the vector signature.
11534   const auto Data = getNDSWDS(FD, ParamAttrs);
11535   const unsigned NDS = std::get<0>(Data);
11536   const unsigned WDS = std::get<1>(Data);
11537   const bool OutputBecomesInput = std::get<2>(Data);
11538 
11539   // Check the values provided via `simdlen` by the user.
11540   // 1. A `simdlen(1)` doesn't produce vector signatures,
11541   if (UserVLEN == 1) {
11542     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11543         DiagnosticsEngine::Warning,
11544         "The clause simdlen(1) has no effect when targeting aarch64.");
11545     CGM.getDiags().Report(SLoc, DiagID);
11546     return;
11547   }
11548 
11549   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11550   // Advanced SIMD output.
11551   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11552     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11553         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11554                                     "power of 2 when targeting Advanced SIMD.");
11555     CGM.getDiags().Report(SLoc, DiagID);
11556     return;
11557   }
11558 
11559   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11560   // limits.
11561   if (ISA == 's' && UserVLEN != 0) {
11562     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11563       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11564           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11565                                       "lanes in the architectural constraints "
11566                                       "for SVE (min is 128-bit, max is "
11567                                       "2048-bit, by steps of 128-bit)");
11568       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11569       return;
11570     }
11571   }
11572 
11573   // Sort out parameter sequence.
11574   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11575   StringRef Prefix = "_ZGV";
11576   // Generate simdlen from user input (if any).
11577   if (UserVLEN) {
11578     if (ISA == 's') {
11579       // SVE generates only a masked function.
11580       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11581                            OutputBecomesInput, Fn);
11582     } else {
11583       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11584       // Advanced SIMD generates one or two functions, depending on
11585       // the `[not]inbranch` clause.
11586       switch (State) {
11587       case OMPDeclareSimdDeclAttr::BS_Undefined:
11588         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11589                              OutputBecomesInput, Fn);
11590         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11591                              OutputBecomesInput, Fn);
11592         break;
11593       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11594         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11595                              OutputBecomesInput, Fn);
11596         break;
11597       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11598         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11599                              OutputBecomesInput, Fn);
11600         break;
11601       }
11602     }
11603   } else {
11604     // If no user simdlen is provided, follow the AAVFABI rules for
11605     // generating the vector length.
11606     if (ISA == 's') {
11607       // SVE, section 3.4.1, item 1.
11608       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11609                            OutputBecomesInput, Fn);
11610     } else {
11611       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11612       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11613       // two vector names depending on the use of the clause
11614       // `[not]inbranch`.
11615       switch (State) {
11616       case OMPDeclareSimdDeclAttr::BS_Undefined:
11617         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11618                                   OutputBecomesInput, Fn);
11619         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11620                                   OutputBecomesInput, Fn);
11621         break;
11622       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11623         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11624                                   OutputBecomesInput, Fn);
11625         break;
11626       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11627         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11628                                   OutputBecomesInput, Fn);
11629         break;
11630       }
11631     }
11632   }
11633 }
11634 
11635 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11636                                               llvm::Function *Fn) {
11637   ASTContext &C = CGM.getContext();
11638   FD = FD->getMostRecentDecl();
11639   // Map params to their positions in function decl.
11640   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11641   if (isa<CXXMethodDecl>(FD))
11642     ParamPositions.try_emplace(FD, 0);
11643   unsigned ParamPos = ParamPositions.size();
11644   for (const ParmVarDecl *P : FD->parameters()) {
11645     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11646     ++ParamPos;
11647   }
11648   while (FD) {
11649     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11650       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11651       // Mark uniform parameters.
11652       for (const Expr *E : Attr->uniforms()) {
11653         E = E->IgnoreParenImpCasts();
11654         unsigned Pos;
11655         if (isa<CXXThisExpr>(E)) {
11656           Pos = ParamPositions[FD];
11657         } else {
11658           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11659                                 ->getCanonicalDecl();
11660           Pos = ParamPositions[PVD];
11661         }
11662         ParamAttrs[Pos].Kind = Uniform;
11663       }
11664       // Get alignment info.
11665       auto NI = Attr->alignments_begin();
11666       for (const Expr *E : Attr->aligneds()) {
11667         E = E->IgnoreParenImpCasts();
11668         unsigned Pos;
11669         QualType ParmTy;
11670         if (isa<CXXThisExpr>(E)) {
11671           Pos = ParamPositions[FD];
11672           ParmTy = E->getType();
11673         } else {
11674           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11675                                 ->getCanonicalDecl();
11676           Pos = ParamPositions[PVD];
11677           ParmTy = PVD->getType();
11678         }
11679         ParamAttrs[Pos].Alignment =
11680             (*NI)
11681                 ? (*NI)->EvaluateKnownConstInt(C)
11682                 : llvm::APSInt::getUnsigned(
11683                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11684                           .getQuantity());
11685         ++NI;
11686       }
11687       // Mark linear parameters.
11688       auto SI = Attr->steps_begin();
11689       auto MI = Attr->modifiers_begin();
11690       for (const Expr *E : Attr->linears()) {
11691         E = E->IgnoreParenImpCasts();
11692         unsigned Pos;
11693         // Rescaling factor needed to compute the linear parameter
11694         // value in the mangled name.
11695         unsigned PtrRescalingFactor = 1;
11696         if (isa<CXXThisExpr>(E)) {
11697           Pos = ParamPositions[FD];
11698         } else {
11699           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11700                                 ->getCanonicalDecl();
11701           Pos = ParamPositions[PVD];
11702           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11703             PtrRescalingFactor = CGM.getContext()
11704                                      .getTypeSizeInChars(P->getPointeeType())
11705                                      .getQuantity();
11706         }
11707         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11708         ParamAttr.Kind = Linear;
11709         // Assuming a stride of 1, for `linear` without modifiers.
11710         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11711         if (*SI) {
11712           Expr::EvalResult Result;
11713           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11714             if (const auto *DRE =
11715                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11716               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11717                 ParamAttr.Kind = LinearWithVarStride;
11718                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11719                     ParamPositions[StridePVD->getCanonicalDecl()]);
11720               }
11721             }
11722           } else {
11723             ParamAttr.StrideOrArg = Result.Val.getInt();
11724           }
11725         }
11726         // If we are using a linear clause on a pointer, we need to
11727         // rescale the value of linear_step with the byte size of the
11728         // pointee type.
11729         if (Linear == ParamAttr.Kind)
11730           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11731         ++SI;
11732         ++MI;
11733       }
11734       llvm::APSInt VLENVal;
11735       SourceLocation ExprLoc;
11736       const Expr *VLENExpr = Attr->getSimdlen();
11737       if (VLENExpr) {
11738         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11739         ExprLoc = VLENExpr->getExprLoc();
11740       }
11741       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11742       if (CGM.getTriple().isX86()) {
11743         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11744       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11745         unsigned VLEN = VLENVal.getExtValue();
11746         StringRef MangledName = Fn->getName();
11747         if (CGM.getTarget().hasFeature("sve"))
11748           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11749                                          MangledName, 's', 128, Fn, ExprLoc);
11750         if (CGM.getTarget().hasFeature("neon"))
11751           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11752                                          MangledName, 'n', 128, Fn, ExprLoc);
11753       }
11754     }
11755     FD = FD->getPreviousDecl();
11756   }
11757 }
11758 
11759 namespace {
11760 /// Cleanup action for doacross support.
11761 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11762 public:
11763   static const int DoacrossFinArgs = 2;
11764 
11765 private:
11766   llvm::FunctionCallee RTLFn;
11767   llvm::Value *Args[DoacrossFinArgs];
11768 
11769 public:
11770   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11771                     ArrayRef<llvm::Value *> CallArgs)
11772       : RTLFn(RTLFn) {
11773     assert(CallArgs.size() == DoacrossFinArgs);
11774     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11775   }
11776   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11777     if (!CGF.HaveInsertPoint())
11778       return;
11779     CGF.EmitRuntimeCall(RTLFn, Args);
11780   }
11781 };
11782 } // namespace
11783 
11784 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11785                                        const OMPLoopDirective &D,
11786                                        ArrayRef<Expr *> NumIterations) {
11787   if (!CGF.HaveInsertPoint())
11788     return;
11789 
11790   ASTContext &C = CGM.getContext();
11791   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11792   RecordDecl *RD;
11793   if (KmpDimTy.isNull()) {
11794     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11795     //  kmp_int64 lo; // lower
11796     //  kmp_int64 up; // upper
11797     //  kmp_int64 st; // stride
11798     // };
11799     RD = C.buildImplicitRecord("kmp_dim");
11800     RD->startDefinition();
11801     addFieldToRecordDecl(C, RD, Int64Ty);
11802     addFieldToRecordDecl(C, RD, Int64Ty);
11803     addFieldToRecordDecl(C, RD, Int64Ty);
11804     RD->completeDefinition();
11805     KmpDimTy = C.getRecordType(RD);
11806   } else {
11807     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11808   }
11809   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11810   QualType ArrayTy =
11811       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11812 
11813   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11814   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11815   enum { LowerFD = 0, UpperFD, StrideFD };
11816   // Fill dims with data.
11817   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11818     LValue DimsLVal = CGF.MakeAddrLValue(
11819         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11820     // dims.upper = num_iterations;
11821     LValue UpperLVal = CGF.EmitLValueForField(
11822         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11823     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11824         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11825         Int64Ty, NumIterations[I]->getExprLoc());
11826     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11827     // dims.stride = 1;
11828     LValue StrideLVal = CGF.EmitLValueForField(
11829         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11830     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11831                           StrideLVal);
11832   }
11833 
11834   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11835   // kmp_int32 num_dims, struct kmp_dim * dims);
11836   llvm::Value *Args[] = {
11837       emitUpdateLocation(CGF, D.getBeginLoc()),
11838       getThreadID(CGF, D.getBeginLoc()),
11839       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11840       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11841           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11842           CGM.VoidPtrTy)};
11843 
11844   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11845       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11846   CGF.EmitRuntimeCall(RTLFn, Args);
11847   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11848       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11849   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11850       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11851   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11852                                              llvm::makeArrayRef(FiniArgs));
11853 }
11854 
11855 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11856                                           const OMPDependClause *C) {
11857   QualType Int64Ty =
11858       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11859   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11860   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11861       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11862   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11863   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11864     const Expr *CounterVal = C->getLoopData(I);
11865     assert(CounterVal);
11866     llvm::Value *CntVal = CGF.EmitScalarConversion(
11867         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11868         CounterVal->getExprLoc());
11869     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11870                           /*Volatile=*/false, Int64Ty);
11871   }
11872   llvm::Value *Args[] = {
11873       emitUpdateLocation(CGF, C->getBeginLoc()),
11874       getThreadID(CGF, C->getBeginLoc()),
11875       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11876   llvm::FunctionCallee RTLFn;
11877   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11878     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11879                                                   OMPRTL___kmpc_doacross_post);
11880   } else {
11881     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11882     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11883                                                   OMPRTL___kmpc_doacross_wait);
11884   }
11885   CGF.EmitRuntimeCall(RTLFn, Args);
11886 }
11887 
11888 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11889                                llvm::FunctionCallee Callee,
11890                                ArrayRef<llvm::Value *> Args) const {
11891   assert(Loc.isValid() && "Outlined function call location must be valid.");
11892   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11893 
11894   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11895     if (Fn->doesNotThrow()) {
11896       CGF.EmitNounwindRuntimeCall(Fn, Args);
11897       return;
11898     }
11899   }
11900   CGF.EmitRuntimeCall(Callee, Args);
11901 }
11902 
11903 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11904     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11905     ArrayRef<llvm::Value *> Args) const {
11906   emitCall(CGF, Loc, OutlinedFn, Args);
11907 }
11908 
11909 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11910   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11911     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11912       HasEmittedDeclareTargetRegion = true;
11913 }
11914 
11915 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11916                                              const VarDecl *NativeParam,
11917                                              const VarDecl *TargetParam) const {
11918   return CGF.GetAddrOfLocalVar(NativeParam);
11919 }
11920 
11921 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11922                                                    const VarDecl *VD) {
11923   if (!VD)
11924     return Address::invalid();
11925   Address UntiedAddr = Address::invalid();
11926   Address UntiedRealAddr = Address::invalid();
11927   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11928   if (It != FunctionToUntiedTaskStackMap.end()) {
11929     const UntiedLocalVarsAddressesMap &UntiedData =
11930         UntiedLocalVarsStack[It->second];
11931     auto I = UntiedData.find(VD);
11932     if (I != UntiedData.end()) {
11933       UntiedAddr = I->second.first;
11934       UntiedRealAddr = I->second.second;
11935     }
11936   }
11937   const VarDecl *CVD = VD->getCanonicalDecl();
11938   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11939     // Use the default allocation.
11940     if (!isAllocatableDecl(VD))
11941       return UntiedAddr;
11942     llvm::Value *Size;
11943     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11944     if (CVD->getType()->isVariablyModifiedType()) {
11945       Size = CGF.getTypeSize(CVD->getType());
11946       // Align the size: ((size + align - 1) / align) * align
11947       Size = CGF.Builder.CreateNUWAdd(
11948           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11949       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11950       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11951     } else {
11952       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11953       Size = CGM.getSize(Sz.alignTo(Align));
11954     }
11955     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11956     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11957     assert(AA->getAllocator() &&
11958            "Expected allocator expression for non-default allocator.");
11959     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11960     // According to the standard, the original allocator type is a enum
11961     // (integer). Convert to pointer type, if required.
11962     Allocator = CGF.EmitScalarConversion(
11963         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11964         AA->getAllocator()->getExprLoc());
11965     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11966 
11967     llvm::Value *Addr =
11968         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11969                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11970                             Args, getName({CVD->getName(), ".void.addr"}));
11971     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11972         CGM.getModule(), OMPRTL___kmpc_free);
11973     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11974     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11975         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11976     if (UntiedAddr.isValid())
11977       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11978 
11979     // Cleanup action for allocate support.
11980     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11981       llvm::FunctionCallee RTLFn;
11982       unsigned LocEncoding;
11983       Address Addr;
11984       const Expr *Allocator;
11985 
11986     public:
11987       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11988                            Address Addr, const Expr *Allocator)
11989           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11990             Allocator(Allocator) {}
11991       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11992         if (!CGF.HaveInsertPoint())
11993           return;
11994         llvm::Value *Args[3];
11995         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11996             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11997         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11998             Addr.getPointer(), CGF.VoidPtrTy);
11999         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12000         // According to the standard, the original allocator type is a enum
12001         // (integer). Convert to pointer type, if required.
12002         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12003                                             CGF.getContext().VoidPtrTy,
12004                                             Allocator->getExprLoc());
12005         Args[2] = AllocVal;
12006 
12007         CGF.EmitRuntimeCall(RTLFn, Args);
12008       }
12009     };
12010     Address VDAddr =
12011         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12012     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12013         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12014         VDAddr, AA->getAllocator());
12015     if (UntiedRealAddr.isValid())
12016       if (auto *Region =
12017               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12018         Region->emitUntiedSwitch(CGF);
12019     return VDAddr;
12020   }
12021   return UntiedAddr;
12022 }
12023 
12024 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12025                                              const VarDecl *VD) const {
12026   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12027   if (It == FunctionToUntiedTaskStackMap.end())
12028     return false;
12029   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12030 }
12031 
12032 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12033     CodeGenModule &CGM, const OMPLoopDirective &S)
12034     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12035   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12036   if (!NeedToPush)
12037     return;
12038   NontemporalDeclsSet &DS =
12039       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12040   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12041     for (const Stmt *Ref : C->private_refs()) {
12042       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12043       const ValueDecl *VD;
12044       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12045         VD = DRE->getDecl();
12046       } else {
12047         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12048         assert((ME->isImplicitCXXThis() ||
12049                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12050                "Expected member of current class.");
12051         VD = ME->getMemberDecl();
12052       }
12053       DS.insert(VD);
12054     }
12055   }
12056 }
12057 
12058 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12059   if (!NeedToPush)
12060     return;
12061   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12062 }
12063 
12064 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12065     CodeGenFunction &CGF,
12066     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12067                           std::pair<Address, Address>> &LocalVars)
12068     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12069   if (!NeedToPush)
12070     return;
12071   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12072       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12073   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12074 }
12075 
12076 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12077   if (!NeedToPush)
12078     return;
12079   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12080 }
12081 
12082 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12083   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12084 
12085   return llvm::any_of(
12086       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12087       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12088 }
12089 
12090 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12091     const OMPExecutableDirective &S,
12092     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12093     const {
12094   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12095   // Vars in target/task regions must be excluded completely.
12096   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12097       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12098     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12099     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12100     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12101     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12102       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12103         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12104     }
12105   }
12106   // Exclude vars in private clauses.
12107   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12108     for (const Expr *Ref : C->varlists()) {
12109       if (!Ref->getType()->isScalarType())
12110         continue;
12111       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12112       if (!DRE)
12113         continue;
12114       NeedToCheckForLPCs.insert(DRE->getDecl());
12115     }
12116   }
12117   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12118     for (const Expr *Ref : C->varlists()) {
12119       if (!Ref->getType()->isScalarType())
12120         continue;
12121       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12122       if (!DRE)
12123         continue;
12124       NeedToCheckForLPCs.insert(DRE->getDecl());
12125     }
12126   }
12127   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12128     for (const Expr *Ref : C->varlists()) {
12129       if (!Ref->getType()->isScalarType())
12130         continue;
12131       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12132       if (!DRE)
12133         continue;
12134       NeedToCheckForLPCs.insert(DRE->getDecl());
12135     }
12136   }
12137   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12138     for (const Expr *Ref : C->varlists()) {
12139       if (!Ref->getType()->isScalarType())
12140         continue;
12141       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12142       if (!DRE)
12143         continue;
12144       NeedToCheckForLPCs.insert(DRE->getDecl());
12145     }
12146   }
12147   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12148     for (const Expr *Ref : C->varlists()) {
12149       if (!Ref->getType()->isScalarType())
12150         continue;
12151       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12152       if (!DRE)
12153         continue;
12154       NeedToCheckForLPCs.insert(DRE->getDecl());
12155     }
12156   }
12157   for (const Decl *VD : NeedToCheckForLPCs) {
12158     for (const LastprivateConditionalData &Data :
12159          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12160       if (Data.DeclToUniqueName.count(VD) > 0) {
12161         if (!Data.Disabled)
12162           NeedToAddForLPCsAsDisabled.insert(VD);
12163         break;
12164       }
12165     }
12166   }
12167 }
12168 
12169 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12170     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12171     : CGM(CGF.CGM),
12172       Action((CGM.getLangOpts().OpenMP >= 50 &&
12173               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12174                            [](const OMPLastprivateClause *C) {
12175                              return C->getKind() ==
12176                                     OMPC_LASTPRIVATE_conditional;
12177                            }))
12178                  ? ActionToDo::PushAsLastprivateConditional
12179                  : ActionToDo::DoNotPush) {
12180   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12181   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12182     return;
12183   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12184          "Expected a push action.");
12185   LastprivateConditionalData &Data =
12186       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12187   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12188     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12189       continue;
12190 
12191     for (const Expr *Ref : C->varlists()) {
12192       Data.DeclToUniqueName.insert(std::make_pair(
12193           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12194           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12195     }
12196   }
12197   Data.IVLVal = IVLVal;
12198   Data.Fn = CGF.CurFn;
12199 }
12200 
12201 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12202     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12203     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12204   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12205   if (CGM.getLangOpts().OpenMP < 50)
12206     return;
12207   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12208   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12209   if (!NeedToAddForLPCsAsDisabled.empty()) {
12210     Action = ActionToDo::DisableLastprivateConditional;
12211     LastprivateConditionalData &Data =
12212         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12213     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12214       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12215     Data.Fn = CGF.CurFn;
12216     Data.Disabled = true;
12217   }
12218 }
12219 
12220 CGOpenMPRuntime::LastprivateConditionalRAII
12221 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12222     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12223   return LastprivateConditionalRAII(CGF, S);
12224 }
12225 
12226 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12227   if (CGM.getLangOpts().OpenMP < 50)
12228     return;
12229   if (Action == ActionToDo::DisableLastprivateConditional) {
12230     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12231            "Expected list of disabled private vars.");
12232     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12233   }
12234   if (Action == ActionToDo::PushAsLastprivateConditional) {
12235     assert(
12236         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12237         "Expected list of lastprivate conditional vars.");
12238     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12239   }
12240 }
12241 
12242 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12243                                                         const VarDecl *VD) {
12244   ASTContext &C = CGM.getContext();
12245   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12246   if (I == LastprivateConditionalToTypes.end())
12247     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12248   QualType NewType;
12249   const FieldDecl *VDField;
12250   const FieldDecl *FiredField;
12251   LValue BaseLVal;
12252   auto VI = I->getSecond().find(VD);
12253   if (VI == I->getSecond().end()) {
12254     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12255     RD->startDefinition();
12256     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12257     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12258     RD->completeDefinition();
12259     NewType = C.getRecordType(RD);
12260     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12261     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12262     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12263   } else {
12264     NewType = std::get<0>(VI->getSecond());
12265     VDField = std::get<1>(VI->getSecond());
12266     FiredField = std::get<2>(VI->getSecond());
12267     BaseLVal = std::get<3>(VI->getSecond());
12268   }
12269   LValue FiredLVal =
12270       CGF.EmitLValueForField(BaseLVal, FiredField);
12271   CGF.EmitStoreOfScalar(
12272       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12273       FiredLVal);
12274   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12275 }
12276 
12277 namespace {
12278 /// Checks if the lastprivate conditional variable is referenced in LHS.
12279 class LastprivateConditionalRefChecker final
12280     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12281   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12282   const Expr *FoundE = nullptr;
12283   const Decl *FoundD = nullptr;
12284   StringRef UniqueDeclName;
12285   LValue IVLVal;
12286   llvm::Function *FoundFn = nullptr;
12287   SourceLocation Loc;
12288 
12289 public:
12290   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12291     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12292          llvm::reverse(LPM)) {
12293       auto It = D.DeclToUniqueName.find(E->getDecl());
12294       if (It == D.DeclToUniqueName.end())
12295         continue;
12296       if (D.Disabled)
12297         return false;
12298       FoundE = E;
12299       FoundD = E->getDecl()->getCanonicalDecl();
12300       UniqueDeclName = It->second;
12301       IVLVal = D.IVLVal;
12302       FoundFn = D.Fn;
12303       break;
12304     }
12305     return FoundE == E;
12306   }
12307   bool VisitMemberExpr(const MemberExpr *E) {
12308     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12309       return false;
12310     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12311          llvm::reverse(LPM)) {
12312       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12313       if (It == D.DeclToUniqueName.end())
12314         continue;
12315       if (D.Disabled)
12316         return false;
12317       FoundE = E;
12318       FoundD = E->getMemberDecl()->getCanonicalDecl();
12319       UniqueDeclName = It->second;
12320       IVLVal = D.IVLVal;
12321       FoundFn = D.Fn;
12322       break;
12323     }
12324     return FoundE == E;
12325   }
12326   bool VisitStmt(const Stmt *S) {
12327     for (const Stmt *Child : S->children()) {
12328       if (!Child)
12329         continue;
12330       if (const auto *E = dyn_cast<Expr>(Child))
12331         if (!E->isGLValue())
12332           continue;
12333       if (Visit(Child))
12334         return true;
12335     }
12336     return false;
12337   }
12338   explicit LastprivateConditionalRefChecker(
12339       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12340       : LPM(LPM) {}
12341   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12342   getFoundData() const {
12343     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12344   }
12345 };
12346 } // namespace
12347 
12348 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12349                                                        LValue IVLVal,
12350                                                        StringRef UniqueDeclName,
12351                                                        LValue LVal,
12352                                                        SourceLocation Loc) {
12353   // Last updated loop counter for the lastprivate conditional var.
12354   // int<xx> last_iv = 0;
12355   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12356   llvm::Constant *LastIV =
12357       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12358   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12359       IVLVal.getAlignment().getAsAlign());
12360   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12361 
12362   // Last value of the lastprivate conditional.
12363   // decltype(priv_a) last_a;
12364   llvm::Constant *Last = getOrCreateInternalVariable(
12365       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12366   cast<llvm::GlobalVariable>(Last)->setAlignment(
12367       LVal.getAlignment().getAsAlign());
12368   LValue LastLVal =
12369       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12370 
12371   // Global loop counter. Required to handle inner parallel-for regions.
12372   // iv
12373   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12374 
12375   // #pragma omp critical(a)
12376   // if (last_iv <= iv) {
12377   //   last_iv = iv;
12378   //   last_a = priv_a;
12379   // }
12380   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12381                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12382     Action.Enter(CGF);
12383     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12384     // (last_iv <= iv) ? Check if the variable is updated and store new
12385     // value in global var.
12386     llvm::Value *CmpRes;
12387     if (IVLVal.getType()->isSignedIntegerType()) {
12388       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12389     } else {
12390       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12391              "Loop iteration variable must be integer.");
12392       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12393     }
12394     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12395     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12396     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12397     // {
12398     CGF.EmitBlock(ThenBB);
12399 
12400     //   last_iv = iv;
12401     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12402 
12403     //   last_a = priv_a;
12404     switch (CGF.getEvaluationKind(LVal.getType())) {
12405     case TEK_Scalar: {
12406       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12407       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12408       break;
12409     }
12410     case TEK_Complex: {
12411       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12412       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12413       break;
12414     }
12415     case TEK_Aggregate:
12416       llvm_unreachable(
12417           "Aggregates are not supported in lastprivate conditional.");
12418     }
12419     // }
12420     CGF.EmitBranch(ExitBB);
12421     // There is no need to emit line number for unconditional branch.
12422     (void)ApplyDebugLocation::CreateEmpty(CGF);
12423     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12424   };
12425 
12426   if (CGM.getLangOpts().OpenMPSimd) {
12427     // Do not emit as a critical region as no parallel region could be emitted.
12428     RegionCodeGenTy ThenRCG(CodeGen);
12429     ThenRCG(CGF);
12430   } else {
12431     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12432   }
12433 }
12434 
12435 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12436                                                          const Expr *LHS) {
12437   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12438     return;
12439   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12440   if (!Checker.Visit(LHS))
12441     return;
12442   const Expr *FoundE;
12443   const Decl *FoundD;
12444   StringRef UniqueDeclName;
12445   LValue IVLVal;
12446   llvm::Function *FoundFn;
12447   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12448       Checker.getFoundData();
12449   if (FoundFn != CGF.CurFn) {
12450     // Special codegen for inner parallel regions.
12451     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12452     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12453     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12454            "Lastprivate conditional is not found in outer region.");
12455     QualType StructTy = std::get<0>(It->getSecond());
12456     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12457     LValue PrivLVal = CGF.EmitLValue(FoundE);
12458     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12459         PrivLVal.getAddress(CGF),
12460         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12461     LValue BaseLVal =
12462         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12463     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12464     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12465                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12466                         FiredLVal, llvm::AtomicOrdering::Unordered,
12467                         /*IsVolatile=*/true, /*isInit=*/false);
12468     return;
12469   }
12470 
12471   // Private address of the lastprivate conditional in the current context.
12472   // priv_a
12473   LValue LVal = CGF.EmitLValue(FoundE);
12474   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12475                                    FoundE->getExprLoc());
12476 }
12477 
12478 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12479     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12480     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12481   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12482     return;
12483   auto Range = llvm::reverse(LastprivateConditionalStack);
12484   auto It = llvm::find_if(
12485       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12486   if (It == Range.end() || It->Fn != CGF.CurFn)
12487     return;
12488   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12489   assert(LPCI != LastprivateConditionalToTypes.end() &&
12490          "Lastprivates must be registered already.");
12491   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12492   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12493   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12494   for (const auto &Pair : It->DeclToUniqueName) {
12495     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12496     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12497       continue;
12498     auto I = LPCI->getSecond().find(Pair.first);
12499     assert(I != LPCI->getSecond().end() &&
12500            "Lastprivate must be rehistered already.");
12501     // bool Cmp = priv_a.Fired != 0;
12502     LValue BaseLVal = std::get<3>(I->getSecond());
12503     LValue FiredLVal =
12504         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12505     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12506     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12507     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12508     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12509     // if (Cmp) {
12510     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12511     CGF.EmitBlock(ThenBB);
12512     Address Addr = CGF.GetAddrOfLocalVar(VD);
12513     LValue LVal;
12514     if (VD->getType()->isReferenceType())
12515       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12516                                            AlignmentSource::Decl);
12517     else
12518       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12519                                 AlignmentSource::Decl);
12520     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12521                                      D.getBeginLoc());
12522     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12523     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12524     // }
12525   }
12526 }
12527 
12528 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12529     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12530     SourceLocation Loc) {
12531   if (CGF.getLangOpts().OpenMP < 50)
12532     return;
12533   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12534   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12535          "Unknown lastprivate conditional variable.");
12536   StringRef UniqueName = It->second;
12537   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12538   // The variable was not updated in the region - exit.
12539   if (!GV)
12540     return;
12541   LValue LPLVal = CGF.MakeAddrLValue(
12542       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12543   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12544   CGF.EmitStoreOfScalar(Res, PrivLVal);
12545 }
12546 
12547 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12548     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12549     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12550   llvm_unreachable("Not supported in SIMD-only mode");
12551 }
12552 
12553 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12554     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12555     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12556   llvm_unreachable("Not supported in SIMD-only mode");
12557 }
12558 
12559 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12560     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12561     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12562     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12563     bool Tied, unsigned &NumberOfParts) {
12564   llvm_unreachable("Not supported in SIMD-only mode");
12565 }
12566 
12567 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12568                                            SourceLocation Loc,
12569                                            llvm::Function *OutlinedFn,
12570                                            ArrayRef<llvm::Value *> CapturedVars,
12571                                            const Expr *IfCond) {
12572   llvm_unreachable("Not supported in SIMD-only mode");
12573 }
12574 
12575 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12576     CodeGenFunction &CGF, StringRef CriticalName,
12577     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12578     const Expr *Hint) {
12579   llvm_unreachable("Not supported in SIMD-only mode");
12580 }
12581 
12582 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12583                                            const RegionCodeGenTy &MasterOpGen,
12584                                            SourceLocation Loc) {
12585   llvm_unreachable("Not supported in SIMD-only mode");
12586 }
12587 
12588 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12589                                            const RegionCodeGenTy &MasterOpGen,
12590                                            SourceLocation Loc,
12591                                            const Expr *Filter) {
12592   llvm_unreachable("Not supported in SIMD-only mode");
12593 }
12594 
12595 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12596                                             SourceLocation Loc) {
12597   llvm_unreachable("Not supported in SIMD-only mode");
12598 }
12599 
12600 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12601     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12602     SourceLocation Loc) {
12603   llvm_unreachable("Not supported in SIMD-only mode");
12604 }
12605 
12606 void CGOpenMPSIMDRuntime::emitSingleRegion(
12607     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12608     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12609     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12610     ArrayRef<const Expr *> AssignmentOps) {
12611   llvm_unreachable("Not supported in SIMD-only mode");
12612 }
12613 
12614 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12615                                             const RegionCodeGenTy &OrderedOpGen,
12616                                             SourceLocation Loc,
12617                                             bool IsThreads) {
12618   llvm_unreachable("Not supported in SIMD-only mode");
12619 }
12620 
12621 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12622                                           SourceLocation Loc,
12623                                           OpenMPDirectiveKind Kind,
12624                                           bool EmitChecks,
12625                                           bool ForceSimpleCall) {
12626   llvm_unreachable("Not supported in SIMD-only mode");
12627 }
12628 
12629 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12630     CodeGenFunction &CGF, SourceLocation Loc,
12631     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12632     bool Ordered, const DispatchRTInput &DispatchValues) {
12633   llvm_unreachable("Not supported in SIMD-only mode");
12634 }
12635 
12636 void CGOpenMPSIMDRuntime::emitForStaticInit(
12637     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12638     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12639   llvm_unreachable("Not supported in SIMD-only mode");
12640 }
12641 
12642 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12643     CodeGenFunction &CGF, SourceLocation Loc,
12644     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12645   llvm_unreachable("Not supported in SIMD-only mode");
12646 }
12647 
12648 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12649                                                      SourceLocation Loc,
12650                                                      unsigned IVSize,
12651                                                      bool IVSigned) {
12652   llvm_unreachable("Not supported in SIMD-only mode");
12653 }
12654 
12655 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12656                                               SourceLocation Loc,
12657                                               OpenMPDirectiveKind DKind) {
12658   llvm_unreachable("Not supported in SIMD-only mode");
12659 }
12660 
12661 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12662                                               SourceLocation Loc,
12663                                               unsigned IVSize, bool IVSigned,
12664                                               Address IL, Address LB,
12665                                               Address UB, Address ST) {
12666   llvm_unreachable("Not supported in SIMD-only mode");
12667 }
12668 
12669 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12670                                                llvm::Value *NumThreads,
12671                                                SourceLocation Loc) {
12672   llvm_unreachable("Not supported in SIMD-only mode");
12673 }
12674 
12675 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12676                                              ProcBindKind ProcBind,
12677                                              SourceLocation Loc) {
12678   llvm_unreachable("Not supported in SIMD-only mode");
12679 }
12680 
12681 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12682                                                     const VarDecl *VD,
12683                                                     Address VDAddr,
12684                                                     SourceLocation Loc) {
12685   llvm_unreachable("Not supported in SIMD-only mode");
12686 }
12687 
12688 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12689     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12690     CodeGenFunction *CGF) {
12691   llvm_unreachable("Not supported in SIMD-only mode");
12692 }
12693 
12694 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12695     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12696   llvm_unreachable("Not supported in SIMD-only mode");
12697 }
12698 
12699 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12700                                     ArrayRef<const Expr *> Vars,
12701                                     SourceLocation Loc,
12702                                     llvm::AtomicOrdering AO) {
12703   llvm_unreachable("Not supported in SIMD-only mode");
12704 }
12705 
12706 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12707                                        const OMPExecutableDirective &D,
12708                                        llvm::Function *TaskFunction,
12709                                        QualType SharedsTy, Address Shareds,
12710                                        const Expr *IfCond,
12711                                        const OMPTaskDataTy &Data) {
12712   llvm_unreachable("Not supported in SIMD-only mode");
12713 }
12714 
12715 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12716     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12717     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12718     const Expr *IfCond, const OMPTaskDataTy &Data) {
12719   llvm_unreachable("Not supported in SIMD-only mode");
12720 }
12721 
12722 void CGOpenMPSIMDRuntime::emitReduction(
12723     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12724     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12725     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12726   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12727   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12728                                  ReductionOps, Options);
12729 }
12730 
12731 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12732     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12733     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12734   llvm_unreachable("Not supported in SIMD-only mode");
12735 }
12736 
12737 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12738                                                 SourceLocation Loc,
12739                                                 bool IsWorksharingReduction) {
12740   llvm_unreachable("Not supported in SIMD-only mode");
12741 }
12742 
12743 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12744                                                   SourceLocation Loc,
12745                                                   ReductionCodeGen &RCG,
12746                                                   unsigned N) {
12747   llvm_unreachable("Not supported in SIMD-only mode");
12748 }
12749 
12750 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12751                                                   SourceLocation Loc,
12752                                                   llvm::Value *ReductionsPtr,
12753                                                   LValue SharedLVal) {
12754   llvm_unreachable("Not supported in SIMD-only mode");
12755 }
12756 
12757 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12758                                            SourceLocation Loc) {
12759   llvm_unreachable("Not supported in SIMD-only mode");
12760 }
12761 
12762 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12763     CodeGenFunction &CGF, SourceLocation Loc,
12764     OpenMPDirectiveKind CancelRegion) {
12765   llvm_unreachable("Not supported in SIMD-only mode");
12766 }
12767 
12768 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12769                                          SourceLocation Loc, const Expr *IfCond,
12770                                          OpenMPDirectiveKind CancelRegion) {
12771   llvm_unreachable("Not supported in SIMD-only mode");
12772 }
12773 
12774 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12775     const OMPExecutableDirective &D, StringRef ParentName,
12776     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12777     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12778   llvm_unreachable("Not supported in SIMD-only mode");
12779 }
12780 
12781 void CGOpenMPSIMDRuntime::emitTargetCall(
12782     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12783     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12784     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12785     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12786                                      const OMPLoopDirective &D)>
12787         SizeEmitter) {
12788   llvm_unreachable("Not supported in SIMD-only mode");
12789 }
12790 
12791 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12792   llvm_unreachable("Not supported in SIMD-only mode");
12793 }
12794 
12795 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12796   llvm_unreachable("Not supported in SIMD-only mode");
12797 }
12798 
12799 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12800   return false;
12801 }
12802 
12803 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12804                                         const OMPExecutableDirective &D,
12805                                         SourceLocation Loc,
12806                                         llvm::Function *OutlinedFn,
12807                                         ArrayRef<llvm::Value *> CapturedVars) {
12808   llvm_unreachable("Not supported in SIMD-only mode");
12809 }
12810 
12811 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12812                                              const Expr *NumTeams,
12813                                              const Expr *ThreadLimit,
12814                                              SourceLocation Loc) {
12815   llvm_unreachable("Not supported in SIMD-only mode");
12816 }
12817 
12818 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12819     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12820     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12821   llvm_unreachable("Not supported in SIMD-only mode");
12822 }
12823 
12824 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12825     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12826     const Expr *Device) {
12827   llvm_unreachable("Not supported in SIMD-only mode");
12828 }
12829 
12830 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12831                                            const OMPLoopDirective &D,
12832                                            ArrayRef<Expr *> NumIterations) {
12833   llvm_unreachable("Not supported in SIMD-only mode");
12834 }
12835 
12836 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12837                                               const OMPDependClause *C) {
12838   llvm_unreachable("Not supported in SIMD-only mode");
12839 }
12840 
12841 const VarDecl *
12842 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12843                                         const VarDecl *NativeParam) const {
12844   llvm_unreachable("Not supported in SIMD-only mode");
12845 }
12846 
12847 Address
12848 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12849                                          const VarDecl *NativeParam,
12850                                          const VarDecl *TargetParam) const {
12851   llvm_unreachable("Not supported in SIMD-only mode");
12852 }
12853