1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd =
702       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
703   // The basic structure here is a while-do loop.
704   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
705   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
706   llvm::Value *IsEmpty =
707       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
708   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
709 
710   // Enter the loop body, making that address the current address.
711   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
712   CGF.EmitBlock(BodyBB);
713 
714   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
715 
716   llvm::PHINode *SrcElementPHI = nullptr;
717   Address SrcElementCurrent = Address::invalid();
718   if (DRD) {
719     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
720                                           "omp.arraycpy.srcElementPast");
721     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
722     SrcElementCurrent =
723         Address(SrcElementPHI,
724                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
725   }
726   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
727       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
728   DestElementPHI->addIncoming(DestBegin, EntryBB);
729   Address DestElementCurrent =
730       Address(DestElementPHI,
731               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
732 
733   // Emit copy.
734   {
735     CodeGenFunction::RunCleanupsScope InitScope(CGF);
736     if (EmitDeclareReductionInit) {
737       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
738                                        SrcElementCurrent, ElementTy);
739     } else
740       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
741                            /*IsInitializer=*/false);
742   }
743 
744   if (DRD) {
745     // Shift the address forward by one element.
746     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
747         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
748         "omp.arraycpy.dest.element");
749     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
750   }
751 
752   // Shift the address forward by one element.
753   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
754       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
755       "omp.arraycpy.dest.element");
756   // Check whether we've reached the end.
757   llvm::Value *Done =
758       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
759   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
760   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
761 
762   // Done.
763   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
764 }
765 
766 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
767   return CGF.EmitOMPSharedLValue(E);
768 }
769 
770 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
771                                             const Expr *E) {
772   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
773     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
774   return LValue();
775 }
776 
777 void ReductionCodeGen::emitAggregateInitialization(
778     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
779     const OMPDeclareReductionDecl *DRD) {
780   // Emit VarDecl with copy init for arrays.
781   // Get the address of the original variable captured in current
782   // captured region.
783   const auto *PrivateVD =
784       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
785   bool EmitDeclareReductionInit =
786       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
787   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
788                        EmitDeclareReductionInit,
789                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
790                                                 : PrivateVD->getInit(),
791                        DRD, SharedLVal.getAddress(CGF));
792 }
793 
794 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
795                                    ArrayRef<const Expr *> Origs,
796                                    ArrayRef<const Expr *> Privates,
797                                    ArrayRef<const Expr *> ReductionOps) {
798   ClausesData.reserve(Shareds.size());
799   SharedAddresses.reserve(Shareds.size());
800   Sizes.reserve(Shareds.size());
801   BaseDecls.reserve(Shareds.size());
802   const auto *IOrig = Origs.begin();
803   const auto *IPriv = Privates.begin();
804   const auto *IRed = ReductionOps.begin();
805   for (const Expr *Ref : Shareds) {
806     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
807     std::advance(IOrig, 1);
808     std::advance(IPriv, 1);
809     std::advance(IRed, 1);
810   }
811 }
812 
813 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
814   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
815          "Number of generated lvalues must be exactly N.");
816   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
817   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
818   SharedAddresses.emplace_back(First, Second);
819   if (ClausesData[N].Shared == ClausesData[N].Ref) {
820     OrigAddresses.emplace_back(First, Second);
821   } else {
822     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
823     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
824     OrigAddresses.emplace_back(First, Second);
825   }
826 }
827 
828 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
829   const auto *PrivateVD =
830       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
831   QualType PrivateType = PrivateVD->getType();
832   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
833   if (!PrivateType->isVariablyModifiedType()) {
834     Sizes.emplace_back(
835         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
836         nullptr);
837     return;
838   }
839   llvm::Value *Size;
840   llvm::Value *SizeInChars;
841   auto *ElemType =
842       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
843           ->getElementType();
844   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
845   if (AsArraySection) {
846     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
847                                      OrigAddresses[N].first.getPointer(CGF));
848     Size = CGF.Builder.CreateNUWAdd(
849         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
850     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
851   } else {
852     SizeInChars =
853         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
854     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
855   }
856   Sizes.emplace_back(SizeInChars, Size);
857   CodeGenFunction::OpaqueValueMapping OpaqueMap(
858       CGF,
859       cast<OpaqueValueExpr>(
860           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
861       RValue::get(Size));
862   CGF.EmitVariablyModifiedType(PrivateType);
863 }
864 
865 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
866                                          llvm::Value *Size) {
867   const auto *PrivateVD =
868       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
869   QualType PrivateType = PrivateVD->getType();
870   if (!PrivateType->isVariablyModifiedType()) {
871     assert(!Size && !Sizes[N].second &&
872            "Size should be nullptr for non-variably modified reduction "
873            "items.");
874     return;
875   }
876   CodeGenFunction::OpaqueValueMapping OpaqueMap(
877       CGF,
878       cast<OpaqueValueExpr>(
879           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
880       RValue::get(Size));
881   CGF.EmitVariablyModifiedType(PrivateType);
882 }
883 
884 void ReductionCodeGen::emitInitialization(
885     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
886     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
887   assert(SharedAddresses.size() > N && "No variable was generated");
888   const auto *PrivateVD =
889       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
890   const OMPDeclareReductionDecl *DRD =
891       getReductionInit(ClausesData[N].ReductionOp);
892   QualType PrivateType = PrivateVD->getType();
893   PrivateAddr = CGF.Builder.CreateElementBitCast(
894       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
895   QualType SharedType = SharedAddresses[N].first.getType();
896   SharedLVal = CGF.MakeAddrLValue(
897       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
898                                        CGF.ConvertTypeForMem(SharedType)),
899       SharedType, SharedAddresses[N].first.getBaseInfo(),
900       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
901   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
902     if (DRD && DRD->getInitializer())
903       (void)DefaultInit(CGF);
904     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
905   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
906     (void)DefaultInit(CGF);
907     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
908                                      PrivateAddr, SharedLVal.getAddress(CGF),
909                                      SharedLVal.getType());
910   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
911              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
912     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
913                          PrivateVD->getType().getQualifiers(),
914                          /*IsInitializer=*/false);
915   }
916 }
917 
918 bool ReductionCodeGen::needCleanups(unsigned N) {
919   const auto *PrivateVD =
920       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
921   QualType PrivateType = PrivateVD->getType();
922   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
923   return DTorKind != QualType::DK_none;
924 }
925 
926 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
927                                     Address PrivateAddr) {
928   const auto *PrivateVD =
929       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
930   QualType PrivateType = PrivateVD->getType();
931   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
932   if (needCleanups(N)) {
933     PrivateAddr = CGF.Builder.CreateElementBitCast(
934         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
935     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
936   }
937 }
938 
939 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940                           LValue BaseLV) {
941   BaseTy = BaseTy.getNonReferenceType();
942   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
943          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
944     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
945       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
946     } else {
947       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
948       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
949     }
950     BaseTy = BaseTy->getPointeeType();
951   }
952   return CGF.MakeAddrLValue(
953       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
954                                        CGF.ConvertTypeForMem(ElTy)),
955       BaseLV.getType(), BaseLV.getBaseInfo(),
956       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
957 }
958 
959 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
960                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
961                           llvm::Value *Addr) {
962   Address Tmp = Address::invalid();
963   Address TopTmp = Address::invalid();
964   Address MostTopTmp = Address::invalid();
965   BaseTy = BaseTy.getNonReferenceType();
966   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
967          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
968     Tmp = CGF.CreateMemTemp(BaseTy);
969     if (TopTmp.isValid())
970       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
971     else
972       MostTopTmp = Tmp;
973     TopTmp = Tmp;
974     BaseTy = BaseTy->getPointeeType();
975   }
976   llvm::Type *Ty = BaseLVType;
977   if (Tmp.isValid())
978     Ty = Tmp.getElementType();
979   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
980   if (Tmp.isValid()) {
981     CGF.Builder.CreateStore(Addr, Tmp);
982     return MostTopTmp;
983   }
984   return Address(Addr, BaseLVAlignment);
985 }
986 
987 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
988   const VarDecl *OrigVD = nullptr;
989   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
990     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
991     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
992       Base = TempOASE->getBase()->IgnoreParenImpCasts();
993     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
994       Base = TempASE->getBase()->IgnoreParenImpCasts();
995     DE = cast<DeclRefExpr>(Base);
996     OrigVD = cast<VarDecl>(DE->getDecl());
997   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
998     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
999     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1000       Base = TempASE->getBase()->IgnoreParenImpCasts();
1001     DE = cast<DeclRefExpr>(Base);
1002     OrigVD = cast<VarDecl>(DE->getDecl());
1003   }
1004   return OrigVD;
1005 }
1006 
1007 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1008                                                Address PrivateAddr) {
1009   const DeclRefExpr *DE;
1010   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1011     BaseDecls.emplace_back(OrigVD);
1012     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1013     LValue BaseLValue =
1014         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1015                     OriginalBaseLValue);
1016     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1017     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1018         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1019     llvm::Value *PrivatePointer =
1020         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1021             PrivateAddr.getPointer(), SharedAddr.getType());
1022     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1023         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1024     return castToBase(CGF, OrigVD->getType(),
1025                       SharedAddresses[N].first.getType(),
1026                       OriginalBaseLValue.getAddress(CGF).getType(),
1027                       OriginalBaseLValue.getAlignment(), Ptr);
1028   }
1029   BaseDecls.emplace_back(
1030       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1031   return PrivateAddr;
1032 }
1033 
1034 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1035   const OMPDeclareReductionDecl *DRD =
1036       getReductionInit(ClausesData[N].ReductionOp);
1037   return DRD && DRD->getInitializer();
1038 }
1039 
1040 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1041   return CGF.EmitLoadOfPointerLValue(
1042       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1043       getThreadIDVariable()->getType()->castAs<PointerType>());
1044 }
1045 
1046 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1047   if (!CGF.HaveInsertPoint())
1048     return;
1049   // 1.2.2 OpenMP Language Terminology
1050   // Structured block - An executable statement with a single entry at the
1051   // top and a single exit at the bottom.
1052   // The point of exit cannot be a branch out of the structured block.
1053   // longjmp() and throw() must not violate the entry/exit criteria.
1054   CGF.EHStack.pushTerminate();
1055   if (S)
1056     CGF.incrementProfileCounter(S);
1057   CodeGen(CGF);
1058   CGF.EHStack.popTerminate();
1059 }
1060 
1061 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1062     CodeGenFunction &CGF) {
1063   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1064                             getThreadIDVariable()->getType(),
1065                             AlignmentSource::Decl);
1066 }
1067 
1068 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1069                                        QualType FieldTy) {
1070   auto *Field = FieldDecl::Create(
1071       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1072       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1073       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1074   Field->setAccess(AS_public);
1075   DC->addDecl(Field);
1076   return Field;
1077 }
1078 
1079 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1080                                  StringRef Separator)
1081     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1082       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1083   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1084 
1085   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1086   OMPBuilder.initialize();
1087   loadOffloadInfoMetadata();
1088 }
1089 
1090 void CGOpenMPRuntime::clear() {
1091   InternalVars.clear();
1092   // Clean non-target variable declarations possibly used only in debug info.
1093   for (const auto &Data : EmittedNonTargetVariables) {
1094     if (!Data.getValue().pointsToAliveValue())
1095       continue;
1096     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1097     if (!GV)
1098       continue;
1099     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1100       continue;
1101     GV->eraseFromParent();
1102   }
1103 }
1104 
1105 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1106   SmallString<128> Buffer;
1107   llvm::raw_svector_ostream OS(Buffer);
1108   StringRef Sep = FirstSeparator;
1109   for (StringRef Part : Parts) {
1110     OS << Sep << Part;
1111     Sep = Separator;
1112   }
1113   return std::string(OS.str());
1114 }
1115 
1116 static llvm::Function *
1117 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1118                           const Expr *CombinerInitializer, const VarDecl *In,
1119                           const VarDecl *Out, bool IsCombiner) {
1120   // void .omp_combiner.(Ty *in, Ty *out);
1121   ASTContext &C = CGM.getContext();
1122   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1123   FunctionArgList Args;
1124   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1125                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1127                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1128   Args.push_back(&OmpOutParm);
1129   Args.push_back(&OmpInParm);
1130   const CGFunctionInfo &FnInfo =
1131       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1132   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1133   std::string Name = CGM.getOpenMPRuntime().getName(
1134       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1135   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1136                                     Name, &CGM.getModule());
1137   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1138   if (CGM.getLangOpts().Optimize) {
1139     Fn->removeFnAttr(llvm::Attribute::NoInline);
1140     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1141     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1142   }
1143   CodeGenFunction CGF(CGM);
1144   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1145   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1146   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1147                     Out->getLocation());
1148   CodeGenFunction::OMPPrivateScope Scope(CGF);
1149   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1150   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1151     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1152         .getAddress(CGF);
1153   });
1154   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1155   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1156     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1157         .getAddress(CGF);
1158   });
1159   (void)Scope.Privatize();
1160   if (!IsCombiner && Out->hasInit() &&
1161       !CGF.isTrivialInitializer(Out->getInit())) {
1162     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1163                          Out->getType().getQualifiers(),
1164                          /*IsInitializer=*/true);
1165   }
1166   if (CombinerInitializer)
1167     CGF.EmitIgnoredExpr(CombinerInitializer);
1168   Scope.ForceCleanup();
1169   CGF.FinishFunction();
1170   return Fn;
1171 }
1172 
1173 void CGOpenMPRuntime::emitUserDefinedReduction(
1174     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1175   if (UDRMap.count(D) > 0)
1176     return;
1177   llvm::Function *Combiner = emitCombinerOrInitializer(
1178       CGM, D->getType(), D->getCombiner(),
1179       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1180       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1181       /*IsCombiner=*/true);
1182   llvm::Function *Initializer = nullptr;
1183   if (const Expr *Init = D->getInitializer()) {
1184     Initializer = emitCombinerOrInitializer(
1185         CGM, D->getType(),
1186         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1187                                                                      : nullptr,
1188         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1189         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1190         /*IsCombiner=*/false);
1191   }
1192   UDRMap.try_emplace(D, Combiner, Initializer);
1193   if (CGF) {
1194     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1195     Decls.second.push_back(D);
1196   }
1197 }
1198 
1199 std::pair<llvm::Function *, llvm::Function *>
1200 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1201   auto I = UDRMap.find(D);
1202   if (I != UDRMap.end())
1203     return I->second;
1204   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1205   return UDRMap.lookup(D);
1206 }
1207 
1208 namespace {
1209 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1210 // Builder if one is present.
1211 struct PushAndPopStackRAII {
1212   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1213                       bool HasCancel, llvm::omp::Directive Kind)
1214       : OMPBuilder(OMPBuilder) {
1215     if (!OMPBuilder)
1216       return;
1217 
1218     // The following callback is the crucial part of clangs cleanup process.
1219     //
1220     // NOTE:
1221     // Once the OpenMPIRBuilder is used to create parallel regions (and
1222     // similar), the cancellation destination (Dest below) is determined via
1223     // IP. That means if we have variables to finalize we split the block at IP,
1224     // use the new block (=BB) as destination to build a JumpDest (via
1225     // getJumpDestInCurrentScope(BB)) which then is fed to
1226     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1227     // to push & pop an FinalizationInfo object.
1228     // The FiniCB will still be needed but at the point where the
1229     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1230     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1231       assert(IP.getBlock()->end() == IP.getPoint() &&
1232              "Clang CG should cause non-terminated block!");
1233       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1234       CGF.Builder.restoreIP(IP);
1235       CodeGenFunction::JumpDest Dest =
1236           CGF.getOMPCancelDestination(OMPD_parallel);
1237       CGF.EmitBranchThroughCleanup(Dest);
1238     };
1239 
1240     // TODO: Remove this once we emit parallel regions through the
1241     //       OpenMPIRBuilder as it can do this setup internally.
1242     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1243     OMPBuilder->pushFinalizationCB(std::move(FI));
1244   }
1245   ~PushAndPopStackRAII() {
1246     if (OMPBuilder)
1247       OMPBuilder->popFinalizationCB();
1248   }
1249   llvm::OpenMPIRBuilder *OMPBuilder;
1250 };
1251 } // namespace
1252 
1253 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1254     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1255     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1256     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1257   assert(ThreadIDVar->getType()->isPointerType() &&
1258          "thread id variable must be of type kmp_int32 *");
1259   CodeGenFunction CGF(CGM, true);
1260   bool HasCancel = false;
1261   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1262     HasCancel = OPD->hasCancel();
1263   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1264     HasCancel = OPD->hasCancel();
1265   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1266     HasCancel = OPSD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1270     HasCancel = OPFD->hasCancel();
1271   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1272     HasCancel = OPFD->hasCancel();
1273   else if (const auto *OPFD =
1274                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1275     HasCancel = OPFD->hasCancel();
1276   else if (const auto *OPFD =
1277                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1278     HasCancel = OPFD->hasCancel();
1279 
1280   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1281   //       parallel region to make cancellation barriers work properly.
1282   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1283   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1285                                     HasCancel, OutlinedHelperName);
1286   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1287   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1293   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1294   return emitParallelOrTeamsOutlinedFunction(
1295       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1296 }
1297 
1298 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1299     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1300     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1301   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1302   return emitParallelOrTeamsOutlinedFunction(
1303       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1304 }
1305 
1306 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1307     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1308     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1309     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1310     bool Tied, unsigned &NumberOfParts) {
1311   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1312                                               PrePostActionTy &) {
1313     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1314     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1315     llvm::Value *TaskArgs[] = {
1316         UpLoc, ThreadID,
1317         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1318                                     TaskTVar->getType()->castAs<PointerType>())
1319             .getPointer(CGF)};
1320     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1321                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1322                         TaskArgs);
1323   };
1324   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1325                                                             UntiedCodeGen);
1326   CodeGen.setAction(Action);
1327   assert(!ThreadIDVar->getType()->isPointerType() &&
1328          "thread id variable must be of type kmp_int32 for tasks");
1329   const OpenMPDirectiveKind Region =
1330       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1331                                                       : OMPD_task;
1332   const CapturedStmt *CS = D.getCapturedStmt(Region);
1333   bool HasCancel = false;
1334   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1339     HasCancel = TD->hasCancel();
1340   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1341     HasCancel = TD->hasCancel();
1342 
1343   CodeGenFunction CGF(CGM, true);
1344   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1345                                         InnermostKind, HasCancel, Action);
1346   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1347   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1348   if (!Tied)
1349     NumberOfParts = Action.getNumberOfParts();
1350   return Res;
1351 }
1352 
1353 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1354                              const RecordDecl *RD, const CGRecordLayout &RL,
1355                              ArrayRef<llvm::Constant *> Data) {
1356   llvm::StructType *StructTy = RL.getLLVMType();
1357   unsigned PrevIdx = 0;
1358   ConstantInitBuilder CIBuilder(CGM);
1359   auto DI = Data.begin();
1360   for (const FieldDecl *FD : RD->fields()) {
1361     unsigned Idx = RL.getLLVMFieldNo(FD);
1362     // Fill the alignment.
1363     for (unsigned I = PrevIdx; I < Idx; ++I)
1364       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1365     PrevIdx = Idx + 1;
1366     Fields.add(*DI);
1367     ++DI;
1368   }
1369 }
1370 
1371 template <class... As>
1372 static llvm::GlobalVariable *
1373 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1374                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1375                    As &&... Args) {
1376   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1377   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1378   ConstantInitBuilder CIBuilder(CGM);
1379   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1380   buildStructValue(Fields, CGM, RD, RL, Data);
1381   return Fields.finishAndCreateGlobal(
1382       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1383       std::forward<As>(Args)...);
1384 }
1385 
1386 template <typename T>
1387 static void
1388 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1389                                          ArrayRef<llvm::Constant *> Data,
1390                                          T &Parent) {
1391   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1392   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1393   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1394   buildStructValue(Fields, CGM, RD, RL, Data);
1395   Fields.finishAndAddTo(Parent);
1396 }
1397 
1398 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1399                                              bool AtCurrentPoint) {
1400   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1401   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1402 
1403   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1404   if (AtCurrentPoint) {
1405     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1406         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1407   } else {
1408     Elem.second.ServiceInsertPt =
1409         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1410     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1411   }
1412 }
1413 
1414 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1415   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1416   if (Elem.second.ServiceInsertPt) {
1417     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1418     Elem.second.ServiceInsertPt = nullptr;
1419     Ptr->eraseFromParent();
1420   }
1421 }
1422 
1423 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1424                                                   SourceLocation Loc,
1425                                                   SmallString<128> &Buffer) {
1426   llvm::raw_svector_ostream OS(Buffer);
1427   // Build debug location
1428   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1429   OS << ";" << PLoc.getFilename() << ";";
1430   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1431     OS << FD->getQualifiedNameAsString();
1432   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1433   return OS.str();
1434 }
1435 
1436 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1437                                                  SourceLocation Loc,
1438                                                  unsigned Flags) {
1439   llvm::Constant *SrcLocStr;
1440   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1441       Loc.isInvalid()) {
1442     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1443   } else {
1444     std::string FunctionName = "";
1445     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1446       FunctionName = FD->getQualifiedNameAsString();
1447     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1448     const char *FileName = PLoc.getFilename();
1449     unsigned Line = PLoc.getLine();
1450     unsigned Column = PLoc.getColumn();
1451     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1452                                                 Line, Column);
1453   }
1454   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1455   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1456                                      Reserved2Flags);
1457 }
1458 
1459 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1460                                           SourceLocation Loc) {
1461   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1462   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1463   // the clang invariants used below might be broken.
1464   if (CGM.getLangOpts().OpenMPIRBuilder) {
1465     SmallString<128> Buffer;
1466     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1467     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1468         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1469     return OMPBuilder.getOrCreateThreadID(
1470         OMPBuilder.getOrCreateIdent(SrcLocStr));
1471   }
1472 
1473   llvm::Value *ThreadID = nullptr;
1474   // Check whether we've already cached a load of the thread id in this
1475   // function.
1476   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1477   if (I != OpenMPLocThreadIDMap.end()) {
1478     ThreadID = I->second.ThreadID;
1479     if (ThreadID != nullptr)
1480       return ThreadID;
1481   }
1482   // If exceptions are enabled, do not use parameter to avoid possible crash.
1483   if (auto *OMPRegionInfo =
1484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1485     if (OMPRegionInfo->getThreadIDVariable()) {
1486       // Check if this an outlined function with thread id passed as argument.
1487       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1488       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1489       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1490           !CGF.getLangOpts().CXXExceptions ||
1491           CGF.Builder.GetInsertBlock() == TopBlock ||
1492           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1493           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1494               TopBlock ||
1495           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1496               CGF.Builder.GetInsertBlock()) {
1497         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1498         // If value loaded in entry block, cache it and use it everywhere in
1499         // function.
1500         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1501           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1502           Elem.second.ThreadID = ThreadID;
1503         }
1504         return ThreadID;
1505       }
1506     }
1507   }
1508 
1509   // This is not an outlined function region - need to call __kmpc_int32
1510   // kmpc_global_thread_num(ident_t *loc).
1511   // Generate thread id value and cache this value for use across the
1512   // function.
1513   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1514   if (!Elem.second.ServiceInsertPt)
1515     setLocThreadIdInsertPt(CGF);
1516   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1517   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1518   llvm::CallInst *Call = CGF.Builder.CreateCall(
1519       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1520                                             OMPRTL___kmpc_global_thread_num),
1521       emitUpdateLocation(CGF, Loc));
1522   Call->setCallingConv(CGF.getRuntimeCC());
1523   Elem.second.ThreadID = Call;
1524   return Call;
1525 }
1526 
1527 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1528   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1529   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1530     clearLocThreadIdInsertPt(CGF);
1531     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1532   }
1533   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1534     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1535       UDRMap.erase(D);
1536     FunctionUDRMap.erase(CGF.CurFn);
1537   }
1538   auto I = FunctionUDMMap.find(CGF.CurFn);
1539   if (I != FunctionUDMMap.end()) {
1540     for(const auto *D : I->second)
1541       UDMMap.erase(D);
1542     FunctionUDMMap.erase(I);
1543   }
1544   LastprivateConditionalToTypes.erase(CGF.CurFn);
1545   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1549   return OMPBuilder.IdentPtr;
1550 }
1551 
1552 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1553   if (!Kmpc_MicroTy) {
1554     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1555     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1556                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1557     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1558   }
1559   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1560 }
1561 
1562 llvm::FunctionCallee
1563 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1564   assert((IVSize == 32 || IVSize == 64) &&
1565          "IV size is not compatible with the omp runtime");
1566   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567                                             : "__kmpc_for_static_init_4u")
1568                                 : (IVSigned ? "__kmpc_for_static_init_8"
1569                                             : "__kmpc_for_static_init_8u");
1570   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1571   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1572   llvm::Type *TypeParams[] = {
1573     getIdentTyPointerTy(),                     // loc
1574     CGM.Int32Ty,                               // tid
1575     CGM.Int32Ty,                               // schedtype
1576     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1577     PtrTy,                                     // p_lower
1578     PtrTy,                                     // p_upper
1579     PtrTy,                                     // p_stride
1580     ITy,                                       // incr
1581     ITy                                        // chunk
1582   };
1583   auto *FnTy =
1584       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1585   return CGM.CreateRuntimeFunction(FnTy, Name);
1586 }
1587 
1588 llvm::FunctionCallee
1589 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1590   assert((IVSize == 32 || IVSize == 64) &&
1591          "IV size is not compatible with the omp runtime");
1592   StringRef Name =
1593       IVSize == 32
1594           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1595           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1596   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1597   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1598                                CGM.Int32Ty,           // tid
1599                                CGM.Int32Ty,           // schedtype
1600                                ITy,                   // lower
1601                                ITy,                   // upper
1602                                ITy,                   // stride
1603                                ITy                    // chunk
1604   };
1605   auto *FnTy =
1606       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1607   return CGM.CreateRuntimeFunction(FnTy, Name);
1608 }
1609 
1610 llvm::FunctionCallee
1611 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1612   assert((IVSize == 32 || IVSize == 64) &&
1613          "IV size is not compatible with the omp runtime");
1614   StringRef Name =
1615       IVSize == 32
1616           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1617           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1618   llvm::Type *TypeParams[] = {
1619       getIdentTyPointerTy(), // loc
1620       CGM.Int32Ty,           // tid
1621   };
1622   auto *FnTy =
1623       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1624   return CGM.CreateRuntimeFunction(FnTy, Name);
1625 }
1626 
1627 llvm::FunctionCallee
1628 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1629   assert((IVSize == 32 || IVSize == 64) &&
1630          "IV size is not compatible with the omp runtime");
1631   StringRef Name =
1632       IVSize == 32
1633           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1634           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1635   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1636   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1637   llvm::Type *TypeParams[] = {
1638     getIdentTyPointerTy(),                     // loc
1639     CGM.Int32Ty,                               // tid
1640     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1641     PtrTy,                                     // p_lower
1642     PtrTy,                                     // p_upper
1643     PtrTy                                      // p_stride
1644   };
1645   auto *FnTy =
1646       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1647   return CGM.CreateRuntimeFunction(FnTy, Name);
1648 }
1649 
1650 /// Obtain information that uniquely identifies a target entry. This
1651 /// consists of the file and device IDs as well as line number associated with
1652 /// the relevant entry source location.
1653 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1654                                      unsigned &DeviceID, unsigned &FileID,
1655                                      unsigned &LineNum) {
1656   SourceManager &SM = C.getSourceManager();
1657 
1658   // The loc should be always valid and have a file ID (the user cannot use
1659   // #pragma directives in macros)
1660 
1661   assert(Loc.isValid() && "Source location is expected to be always valid.");
1662 
1663   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1664   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1665 
1666   llvm::sys::fs::UniqueID ID;
1667   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1668     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1669     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1670     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1671       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1672           << PLoc.getFilename() << EC.message();
1673   }
1674 
1675   DeviceID = ID.getDevice();
1676   FileID = ID.getFile();
1677   LineNum = PLoc.getLine();
1678 }
1679 
1680 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1681   if (CGM.getLangOpts().OpenMPSimd)
1682     return Address::invalid();
1683   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1684       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1685   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1686               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1687                HasRequiresUnifiedSharedMemory))) {
1688     SmallString<64> PtrName;
1689     {
1690       llvm::raw_svector_ostream OS(PtrName);
1691       OS << CGM.getMangledName(GlobalDecl(VD));
1692       if (!VD->isExternallyVisible()) {
1693         unsigned DeviceID, FileID, Line;
1694         getTargetEntryUniqueInfo(CGM.getContext(),
1695                                  VD->getCanonicalDecl()->getBeginLoc(),
1696                                  DeviceID, FileID, Line);
1697         OS << llvm::format("_%x", FileID);
1698       }
1699       OS << "_decl_tgt_ref_ptr";
1700     }
1701     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1702     if (!Ptr) {
1703       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1704       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1705                                         PtrName);
1706 
1707       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1708       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1709 
1710       if (!CGM.getLangOpts().OpenMPIsDevice)
1711         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1712       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1713     }
1714     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1715   }
1716   return Address::invalid();
1717 }
1718 
1719 llvm::Constant *
1720 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1721   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1722          !CGM.getContext().getTargetInfo().isTLSSupported());
1723   // Lookup the entry, lazily creating it if necessary.
1724   std::string Suffix = getName({"cache", ""});
1725   return getOrCreateInternalVariable(
1726       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1727 }
1728 
1729 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1730                                                 const VarDecl *VD,
1731                                                 Address VDAddr,
1732                                                 SourceLocation Loc) {
1733   if (CGM.getLangOpts().OpenMPUseTLS &&
1734       CGM.getContext().getTargetInfo().isTLSSupported())
1735     return VDAddr;
1736 
1737   llvm::Type *VarTy = VDAddr.getElementType();
1738   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1739                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1740                                                        CGM.Int8PtrTy),
1741                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1742                          getOrCreateThreadPrivateCache(VD)};
1743   return Address(CGF.EmitRuntimeCall(
1744                      OMPBuilder.getOrCreateRuntimeFunction(
1745                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1746                      Args),
1747                  VDAddr.getAlignment());
1748 }
1749 
1750 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1751     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1752     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1753   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1754   // library.
1755   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1756   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1757                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1758                       OMPLoc);
1759   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1760   // to register constructor/destructor for variable.
1761   llvm::Value *Args[] = {
1762       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1763       Ctor, CopyCtor, Dtor};
1764   CGF.EmitRuntimeCall(
1765       OMPBuilder.getOrCreateRuntimeFunction(
1766           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1767       Args);
1768 }
1769 
1770 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1771     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1772     bool PerformInit, CodeGenFunction *CGF) {
1773   if (CGM.getLangOpts().OpenMPUseTLS &&
1774       CGM.getContext().getTargetInfo().isTLSSupported())
1775     return nullptr;
1776 
1777   VD = VD->getDefinition(CGM.getContext());
1778   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1779     QualType ASTTy = VD->getType();
1780 
1781     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1782     const Expr *Init = VD->getAnyInitializer();
1783     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1784       // Generate function that re-emits the declaration's initializer into the
1785       // threadprivate copy of the variable VD
1786       CodeGenFunction CtorCGF(CGM);
1787       FunctionArgList Args;
1788       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1789                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1790                             ImplicitParamDecl::Other);
1791       Args.push_back(&Dst);
1792 
1793       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1794           CGM.getContext().VoidPtrTy, Args);
1795       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1796       std::string Name = getName({"__kmpc_global_ctor_", ""});
1797       llvm::Function *Fn =
1798           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1799       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1800                             Args, Loc, Loc);
1801       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1802           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1803           CGM.getContext().VoidPtrTy, Dst.getLocation());
1804       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1805       Arg = CtorCGF.Builder.CreateElementBitCast(
1806           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1807       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1808                                /*IsInitializer=*/true);
1809       ArgVal = CtorCGF.EmitLoadOfScalar(
1810           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1811           CGM.getContext().VoidPtrTy, Dst.getLocation());
1812       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1813       CtorCGF.FinishFunction();
1814       Ctor = Fn;
1815     }
1816     if (VD->getType().isDestructedType() != QualType::DK_none) {
1817       // Generate function that emits destructor call for the threadprivate copy
1818       // of the variable VD
1819       CodeGenFunction DtorCGF(CGM);
1820       FunctionArgList Args;
1821       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1822                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1823                             ImplicitParamDecl::Other);
1824       Args.push_back(&Dst);
1825 
1826       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1827           CGM.getContext().VoidTy, Args);
1828       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1829       std::string Name = getName({"__kmpc_global_dtor_", ""});
1830       llvm::Function *Fn =
1831           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1832       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1833       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1834                             Loc, Loc);
1835       // Create a scope with an artificial location for the body of this function.
1836       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1837       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1838           DtorCGF.GetAddrOfLocalVar(&Dst),
1839           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1840       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1841                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1842                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1843       DtorCGF.FinishFunction();
1844       Dtor = Fn;
1845     }
1846     // Do not emit init function if it is not required.
1847     if (!Ctor && !Dtor)
1848       return nullptr;
1849 
1850     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1851     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1852                                                /*isVarArg=*/false)
1853                            ->getPointerTo();
1854     // Copying constructor for the threadprivate variable.
1855     // Must be NULL - reserved by runtime, but currently it requires that this
1856     // parameter is always NULL. Otherwise it fires assertion.
1857     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1858     if (Ctor == nullptr) {
1859       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1860                                              /*isVarArg=*/false)
1861                          ->getPointerTo();
1862       Ctor = llvm::Constant::getNullValue(CtorTy);
1863     }
1864     if (Dtor == nullptr) {
1865       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1866                                              /*isVarArg=*/false)
1867                          ->getPointerTo();
1868       Dtor = llvm::Constant::getNullValue(DtorTy);
1869     }
1870     if (!CGF) {
1871       auto *InitFunctionTy =
1872           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1873       std::string Name = getName({"__omp_threadprivate_init_", ""});
1874       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1875           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1876       CodeGenFunction InitCGF(CGM);
1877       FunctionArgList ArgList;
1878       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1879                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1880                             Loc, Loc);
1881       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882       InitCGF.FinishFunction();
1883       return InitFunction;
1884     }
1885     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886   }
1887   return nullptr;
1888 }
1889 
1890 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1891                                                      llvm::GlobalVariable *Addr,
1892                                                      bool PerformInit) {
1893   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1894       !CGM.getLangOpts().OpenMPIsDevice)
1895     return false;
1896   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1897       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1898   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1899       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1900        HasRequiresUnifiedSharedMemory))
1901     return CGM.getLangOpts().OpenMPIsDevice;
1902   VD = VD->getDefinition(CGM.getContext());
1903   assert(VD && "Unknown VarDecl");
1904 
1905   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1906     return CGM.getLangOpts().OpenMPIsDevice;
1907 
1908   QualType ASTTy = VD->getType();
1909   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1910 
1911   // Produce the unique prefix to identify the new target regions. We use
1912   // the source location of the variable declaration which we know to not
1913   // conflict with any target region.
1914   unsigned DeviceID;
1915   unsigned FileID;
1916   unsigned Line;
1917   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1918   SmallString<128> Buffer, Out;
1919   {
1920     llvm::raw_svector_ostream OS(Buffer);
1921     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1922        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1923   }
1924 
1925   const Expr *Init = VD->getAnyInitializer();
1926   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1927     llvm::Constant *Ctor;
1928     llvm::Constant *ID;
1929     if (CGM.getLangOpts().OpenMPIsDevice) {
1930       // Generate function that re-emits the declaration's initializer into
1931       // the threadprivate copy of the variable VD
1932       CodeGenFunction CtorCGF(CGM);
1933 
1934       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1935       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1936       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1937           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1938       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1939       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1940                             FunctionArgList(), Loc, Loc);
1941       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1942       CtorCGF.EmitAnyExprToMem(Init,
1943                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1944                                Init->getType().getQualifiers(),
1945                                /*IsInitializer=*/true);
1946       CtorCGF.FinishFunction();
1947       Ctor = Fn;
1948       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1949       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1950     } else {
1951       Ctor = new llvm::GlobalVariable(
1952           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1953           llvm::GlobalValue::PrivateLinkage,
1954           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1955       ID = Ctor;
1956     }
1957 
1958     // Register the information for the entry associated with the constructor.
1959     Out.clear();
1960     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1961         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1962         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1963   }
1964   if (VD->getType().isDestructedType() != QualType::DK_none) {
1965     llvm::Constant *Dtor;
1966     llvm::Constant *ID;
1967     if (CGM.getLangOpts().OpenMPIsDevice) {
1968       // Generate function that emits destructor call for the threadprivate
1969       // copy of the variable VD
1970       CodeGenFunction DtorCGF(CGM);
1971 
1972       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1973       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1974       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1975           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1976       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1977       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1978                             FunctionArgList(), Loc, Loc);
1979       // Create a scope with an artificial location for the body of this
1980       // function.
1981       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1982       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1983                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1984                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1985       DtorCGF.FinishFunction();
1986       Dtor = Fn;
1987       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1988       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1989     } else {
1990       Dtor = new llvm::GlobalVariable(
1991           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1992           llvm::GlobalValue::PrivateLinkage,
1993           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1994       ID = Dtor;
1995     }
1996     // Register the information for the entry associated with the destructor.
1997     Out.clear();
1998     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1999         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2000         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2001   }
2002   return CGM.getLangOpts().OpenMPIsDevice;
2003 }
2004 
2005 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2006                                                           QualType VarType,
2007                                                           StringRef Name) {
2008   std::string Suffix = getName({"artificial", ""});
2009   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2010   llvm::Value *GAddr =
2011       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2012   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2013       CGM.getTarget().isTLSSupported()) {
2014     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2015     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2016   }
2017   std::string CacheSuffix = getName({"cache", ""});
2018   llvm::Value *Args[] = {
2019       emitUpdateLocation(CGF, SourceLocation()),
2020       getThreadID(CGF, SourceLocation()),
2021       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2022       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2023                                 /*isSigned=*/false),
2024       getOrCreateInternalVariable(
2025           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2026   return Address(
2027       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2028           CGF.EmitRuntimeCall(
2029               OMPBuilder.getOrCreateRuntimeFunction(
2030                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2031               Args),
2032           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2033       CGM.getContext().getTypeAlignInChars(VarType));
2034 }
2035 
2036 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2037                                    const RegionCodeGenTy &ThenGen,
2038                                    const RegionCodeGenTy &ElseGen) {
2039   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2040 
2041   // If the condition constant folds and can be elided, try to avoid emitting
2042   // the condition and the dead arm of the if/else.
2043   bool CondConstant;
2044   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2045     if (CondConstant)
2046       ThenGen(CGF);
2047     else
2048       ElseGen(CGF);
2049     return;
2050   }
2051 
2052   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2053   // emit the conditional branch.
2054   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2055   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2056   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2057   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2058 
2059   // Emit the 'then' code.
2060   CGF.EmitBlock(ThenBlock);
2061   ThenGen(CGF);
2062   CGF.EmitBranch(ContBlock);
2063   // Emit the 'else' code if present.
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBlock(ElseBlock);
2067   ElseGen(CGF);
2068   // There is no need to emit line number for unconditional branch.
2069   (void)ApplyDebugLocation::CreateEmpty(CGF);
2070   CGF.EmitBranch(ContBlock);
2071   // Emit the continuation block for code after the if.
2072   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2073 }
2074 
2075 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2076                                        llvm::Function *OutlinedFn,
2077                                        ArrayRef<llvm::Value *> CapturedVars,
2078                                        const Expr *IfCond) {
2079   if (!CGF.HaveInsertPoint())
2080     return;
2081   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2082   auto &M = CGM.getModule();
2083   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2084                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2085     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2086     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2087     llvm::Value *Args[] = {
2088         RTLoc,
2089         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2090         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2091     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2092     RealArgs.append(std::begin(Args), std::end(Args));
2093     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2094 
2095     llvm::FunctionCallee RTLFn =
2096         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2097     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2098   };
2099   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2100                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2101     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2102     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2103     // Build calls:
2104     // __kmpc_serialized_parallel(&Loc, GTid);
2105     llvm::Value *Args[] = {RTLoc, ThreadID};
2106     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2107                             M, OMPRTL___kmpc_serialized_parallel),
2108                         Args);
2109 
2110     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2111     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2112     Address ZeroAddrBound =
2113         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2114                                          /*Name=*/".bound.zero.addr");
2115     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2116     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2117     // ThreadId for serialized parallels is 0.
2118     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2119     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2120     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2121 
2122     // Ensure we do not inline the function. This is trivially true for the ones
2123     // passed to __kmpc_fork_call but the ones called in serialized regions
2124     // could be inlined. This is not a perfect but it is closer to the invariant
2125     // we want, namely, every data environment starts with a new function.
2126     // TODO: We should pass the if condition to the runtime function and do the
2127     //       handling there. Much cleaner code.
2128     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2129     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2130     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2131 
2132     // __kmpc_end_serialized_parallel(&Loc, GTid);
2133     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2134     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2135                             M, OMPRTL___kmpc_end_serialized_parallel),
2136                         EndArgs);
2137   };
2138   if (IfCond) {
2139     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2140   } else {
2141     RegionCodeGenTy ThenRCG(ThenGen);
2142     ThenRCG(CGF);
2143   }
2144 }
2145 
2146 // If we're inside an (outlined) parallel region, use the region info's
2147 // thread-ID variable (it is passed in a first argument of the outlined function
2148 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2149 // regular serial code region, get thread ID by calling kmp_int32
2150 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2151 // return the address of that temp.
2152 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2153                                              SourceLocation Loc) {
2154   if (auto *OMPRegionInfo =
2155           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2156     if (OMPRegionInfo->getThreadIDVariable())
2157       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2158 
2159   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2160   QualType Int32Ty =
2161       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2162   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2163   CGF.EmitStoreOfScalar(ThreadID,
2164                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2165 
2166   return ThreadIDTemp;
2167 }
2168 
2169 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2170     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2171   SmallString<256> Buffer;
2172   llvm::raw_svector_ostream Out(Buffer);
2173   Out << Name;
2174   StringRef RuntimeName = Out.str();
2175   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2176   if (Elem.second) {
2177     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2178            "OMP internal variable has different type than requested");
2179     return &*Elem.second;
2180   }
2181 
2182   return Elem.second = new llvm::GlobalVariable(
2183              CGM.getModule(), Ty, /*IsConstant*/ false,
2184              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2185              Elem.first(), /*InsertBefore=*/nullptr,
2186              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2187 }
2188 
2189 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2190   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2191   std::string Name = getName({Prefix, "var"});
2192   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2193 }
2194 
2195 namespace {
2196 /// Common pre(post)-action for different OpenMP constructs.
2197 class CommonActionTy final : public PrePostActionTy {
2198   llvm::FunctionCallee EnterCallee;
2199   ArrayRef<llvm::Value *> EnterArgs;
2200   llvm::FunctionCallee ExitCallee;
2201   ArrayRef<llvm::Value *> ExitArgs;
2202   bool Conditional;
2203   llvm::BasicBlock *ContBlock = nullptr;
2204 
2205 public:
2206   CommonActionTy(llvm::FunctionCallee EnterCallee,
2207                  ArrayRef<llvm::Value *> EnterArgs,
2208                  llvm::FunctionCallee ExitCallee,
2209                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2210       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2211         ExitArgs(ExitArgs), Conditional(Conditional) {}
2212   void Enter(CodeGenFunction &CGF) override {
2213     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2214     if (Conditional) {
2215       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2216       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2217       ContBlock = CGF.createBasicBlock("omp_if.end");
2218       // Generate the branch (If-stmt)
2219       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2220       CGF.EmitBlock(ThenBlock);
2221     }
2222   }
2223   void Done(CodeGenFunction &CGF) {
2224     // Emit the rest of blocks/branches
2225     CGF.EmitBranch(ContBlock);
2226     CGF.EmitBlock(ContBlock, true);
2227   }
2228   void Exit(CodeGenFunction &CGF) override {
2229     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2230   }
2231 };
2232 } // anonymous namespace
2233 
2234 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2235                                          StringRef CriticalName,
2236                                          const RegionCodeGenTy &CriticalOpGen,
2237                                          SourceLocation Loc, const Expr *Hint) {
2238   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2239   // CriticalOpGen();
2240   // __kmpc_end_critical(ident_t *, gtid, Lock);
2241   // Prepare arguments and build a call to __kmpc_critical
2242   if (!CGF.HaveInsertPoint())
2243     return;
2244   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2245                          getCriticalRegionLock(CriticalName)};
2246   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2247                                                 std::end(Args));
2248   if (Hint) {
2249     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2250         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2251   }
2252   CommonActionTy Action(
2253       OMPBuilder.getOrCreateRuntimeFunction(
2254           CGM.getModule(),
2255           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2256       EnterArgs,
2257       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2258                                             OMPRTL___kmpc_end_critical),
2259       Args);
2260   CriticalOpGen.setAction(Action);
2261   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2262 }
2263 
2264 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2265                                        const RegionCodeGenTy &MasterOpGen,
2266                                        SourceLocation Loc) {
2267   if (!CGF.HaveInsertPoint())
2268     return;
2269   // if(__kmpc_master(ident_t *, gtid)) {
2270   //   MasterOpGen();
2271   //   __kmpc_end_master(ident_t *, gtid);
2272   // }
2273   // Prepare arguments and build a call to __kmpc_master
2274   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276                             CGM.getModule(), OMPRTL___kmpc_master),
2277                         Args,
2278                         OMPBuilder.getOrCreateRuntimeFunction(
2279                             CGM.getModule(), OMPRTL___kmpc_end_master),
2280                         Args,
2281                         /*Conditional=*/true);
2282   MasterOpGen.setAction(Action);
2283   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2284   Action.Done(CGF);
2285 }
2286 
2287 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2288                                        const RegionCodeGenTy &MaskedOpGen,
2289                                        SourceLocation Loc, const Expr *Filter) {
2290   if (!CGF.HaveInsertPoint())
2291     return;
2292   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2293   //   MaskedOpGen();
2294   //   __kmpc_end_masked(iden_t *, gtid);
2295   // }
2296   // Prepare arguments and build a call to __kmpc_masked
2297   llvm::Value *FilterVal = Filter
2298                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2299                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2300   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2301                          FilterVal};
2302   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2303                             getThreadID(CGF, Loc)};
2304   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2305                             CGM.getModule(), OMPRTL___kmpc_masked),
2306                         Args,
2307                         OMPBuilder.getOrCreateRuntimeFunction(
2308                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2309                         ArgsEnd,
2310                         /*Conditional=*/true);
2311   MaskedOpGen.setAction(Action);
2312   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2313   Action.Done(CGF);
2314 }
2315 
2316 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2317                                         SourceLocation Loc) {
2318   if (!CGF.HaveInsertPoint())
2319     return;
2320   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2321     OMPBuilder.createTaskyield(CGF.Builder);
2322   } else {
2323     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2324     llvm::Value *Args[] = {
2325         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2326         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2327     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2328                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2329                         Args);
2330   }
2331 
2332   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2333     Region->emitUntiedSwitch(CGF);
2334 }
2335 
2336 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2337                                           const RegionCodeGenTy &TaskgroupOpGen,
2338                                           SourceLocation Loc) {
2339   if (!CGF.HaveInsertPoint())
2340     return;
2341   // __kmpc_taskgroup(ident_t *, gtid);
2342   // TaskgroupOpGen();
2343   // __kmpc_end_taskgroup(ident_t *, gtid);
2344   // Prepare arguments and build a call to __kmpc_taskgroup
2345   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2346   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2347                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2348                         Args,
2349                         OMPBuilder.getOrCreateRuntimeFunction(
2350                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2351                         Args);
2352   TaskgroupOpGen.setAction(Action);
2353   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2354 }
2355 
2356 /// Given an array of pointers to variables, project the address of a
2357 /// given variable.
2358 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2359                                       unsigned Index, const VarDecl *Var) {
2360   // Pull out the pointer to the variable.
2361   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2362   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2363 
2364   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2365   Addr = CGF.Builder.CreateElementBitCast(
2366       Addr, CGF.ConvertTypeForMem(Var->getType()));
2367   return Addr;
2368 }
2369 
2370 static llvm::Value *emitCopyprivateCopyFunction(
2371     CodeGenModule &CGM, llvm::Type *ArgsType,
2372     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2373     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2374     SourceLocation Loc) {
2375   ASTContext &C = CGM.getContext();
2376   // void copy_func(void *LHSArg, void *RHSArg);
2377   FunctionArgList Args;
2378   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2379                            ImplicitParamDecl::Other);
2380   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2381                            ImplicitParamDecl::Other);
2382   Args.push_back(&LHSArg);
2383   Args.push_back(&RHSArg);
2384   const auto &CGFI =
2385       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2386   std::string Name =
2387       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2388   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2389                                     llvm::GlobalValue::InternalLinkage, Name,
2390                                     &CGM.getModule());
2391   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2392   Fn->setDoesNotRecurse();
2393   CodeGenFunction CGF(CGM);
2394   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2395   // Dest = (void*[n])(LHSArg);
2396   // Src = (void*[n])(RHSArg);
2397   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2398       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2399       ArgsType), CGF.getPointerAlign());
2400   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2401       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2402       ArgsType), CGF.getPointerAlign());
2403   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2404   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2405   // ...
2406   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2407   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2408     const auto *DestVar =
2409         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2410     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2411 
2412     const auto *SrcVar =
2413         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2414     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2415 
2416     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2417     QualType Type = VD->getType();
2418     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2419   }
2420   CGF.FinishFunction();
2421   return Fn;
2422 }
2423 
2424 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2425                                        const RegionCodeGenTy &SingleOpGen,
2426                                        SourceLocation Loc,
2427                                        ArrayRef<const Expr *> CopyprivateVars,
2428                                        ArrayRef<const Expr *> SrcExprs,
2429                                        ArrayRef<const Expr *> DstExprs,
2430                                        ArrayRef<const Expr *> AssignmentOps) {
2431   if (!CGF.HaveInsertPoint())
2432     return;
2433   assert(CopyprivateVars.size() == SrcExprs.size() &&
2434          CopyprivateVars.size() == DstExprs.size() &&
2435          CopyprivateVars.size() == AssignmentOps.size());
2436   ASTContext &C = CGM.getContext();
2437   // int32 did_it = 0;
2438   // if(__kmpc_single(ident_t *, gtid)) {
2439   //   SingleOpGen();
2440   //   __kmpc_end_single(ident_t *, gtid);
2441   //   did_it = 1;
2442   // }
2443   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2444   // <copy_func>, did_it);
2445 
2446   Address DidIt = Address::invalid();
2447   if (!CopyprivateVars.empty()) {
2448     // int32 did_it = 0;
2449     QualType KmpInt32Ty =
2450         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2451     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2452     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2453   }
2454   // Prepare arguments and build a call to __kmpc_single
2455   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2456   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2457                             CGM.getModule(), OMPRTL___kmpc_single),
2458                         Args,
2459                         OMPBuilder.getOrCreateRuntimeFunction(
2460                             CGM.getModule(), OMPRTL___kmpc_end_single),
2461                         Args,
2462                         /*Conditional=*/true);
2463   SingleOpGen.setAction(Action);
2464   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2465   if (DidIt.isValid()) {
2466     // did_it = 1;
2467     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2468   }
2469   Action.Done(CGF);
2470   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2471   // <copy_func>, did_it);
2472   if (DidIt.isValid()) {
2473     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2474     QualType CopyprivateArrayTy = C.getConstantArrayType(
2475         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2476         /*IndexTypeQuals=*/0);
2477     // Create a list of all private variables for copyprivate.
2478     Address CopyprivateList =
2479         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2480     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2481       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2482       CGF.Builder.CreateStore(
2483           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2484               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2485               CGF.VoidPtrTy),
2486           Elem);
2487     }
2488     // Build function that copies private values from single region to all other
2489     // threads in the corresponding parallel region.
2490     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2491         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2492         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2493     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2494     Address CL =
2495       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2496                                                       CGF.VoidPtrTy);
2497     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2498     llvm::Value *Args[] = {
2499         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2500         getThreadID(CGF, Loc),        // i32 <gtid>
2501         BufSize,                      // size_t <buf_size>
2502         CL.getPointer(),              // void *<copyprivate list>
2503         CpyFn,                        // void (*) (void *, void *) <copy_func>
2504         DidItVal                      // i32 did_it
2505     };
2506     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2507                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2508                         Args);
2509   }
2510 }
2511 
2512 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2513                                         const RegionCodeGenTy &OrderedOpGen,
2514                                         SourceLocation Loc, bool IsThreads) {
2515   if (!CGF.HaveInsertPoint())
2516     return;
2517   // __kmpc_ordered(ident_t *, gtid);
2518   // OrderedOpGen();
2519   // __kmpc_end_ordered(ident_t *, gtid);
2520   // Prepare arguments and build a call to __kmpc_ordered
2521   if (IsThreads) {
2522     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2523     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2524                               CGM.getModule(), OMPRTL___kmpc_ordered),
2525                           Args,
2526                           OMPBuilder.getOrCreateRuntimeFunction(
2527                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2528                           Args);
2529     OrderedOpGen.setAction(Action);
2530     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2531     return;
2532   }
2533   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2534 }
2535 
2536 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2537   unsigned Flags;
2538   if (Kind == OMPD_for)
2539     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2540   else if (Kind == OMPD_sections)
2541     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2542   else if (Kind == OMPD_single)
2543     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2544   else if (Kind == OMPD_barrier)
2545     Flags = OMP_IDENT_BARRIER_EXPL;
2546   else
2547     Flags = OMP_IDENT_BARRIER_IMPL;
2548   return Flags;
2549 }
2550 
2551 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2552     CodeGenFunction &CGF, const OMPLoopDirective &S,
2553     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2554   // Check if the loop directive is actually a doacross loop directive. In this
2555   // case choose static, 1 schedule.
2556   if (llvm::any_of(
2557           S.getClausesOfKind<OMPOrderedClause>(),
2558           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2559     ScheduleKind = OMPC_SCHEDULE_static;
2560     // Chunk size is 1 in this case.
2561     llvm::APInt ChunkSize(32, 1);
2562     ChunkExpr = IntegerLiteral::Create(
2563         CGF.getContext(), ChunkSize,
2564         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2565         SourceLocation());
2566   }
2567 }
2568 
2569 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2570                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2571                                       bool ForceSimpleCall) {
2572   // Check if we should use the OMPBuilder
2573   auto *OMPRegionInfo =
2574       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2575   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2576     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2577         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2578     return;
2579   }
2580 
2581   if (!CGF.HaveInsertPoint())
2582     return;
2583   // Build call __kmpc_cancel_barrier(loc, thread_id);
2584   // Build call __kmpc_barrier(loc, thread_id);
2585   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2586   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2587   // thread_id);
2588   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2589                          getThreadID(CGF, Loc)};
2590   if (OMPRegionInfo) {
2591     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2592       llvm::Value *Result = CGF.EmitRuntimeCall(
2593           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2594                                                 OMPRTL___kmpc_cancel_barrier),
2595           Args);
2596       if (EmitChecks) {
2597         // if (__kmpc_cancel_barrier()) {
2598         //   exit from construct;
2599         // }
2600         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2601         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2602         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2603         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2604         CGF.EmitBlock(ExitBB);
2605         //   exit from construct;
2606         CodeGenFunction::JumpDest CancelDestination =
2607             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2608         CGF.EmitBranchThroughCleanup(CancelDestination);
2609         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2610       }
2611       return;
2612     }
2613   }
2614   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2615                           CGM.getModule(), OMPRTL___kmpc_barrier),
2616                       Args);
2617 }
2618 
2619 /// Map the OpenMP loop schedule to the runtime enumeration.
2620 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2621                                           bool Chunked, bool Ordered) {
2622   switch (ScheduleKind) {
2623   case OMPC_SCHEDULE_static:
2624     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2625                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2626   case OMPC_SCHEDULE_dynamic:
2627     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2628   case OMPC_SCHEDULE_guided:
2629     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2630   case OMPC_SCHEDULE_runtime:
2631     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2632   case OMPC_SCHEDULE_auto:
2633     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2634   case OMPC_SCHEDULE_unknown:
2635     assert(!Chunked && "chunk was specified but schedule kind not known");
2636     return Ordered ? OMP_ord_static : OMP_sch_static;
2637   }
2638   llvm_unreachable("Unexpected runtime schedule");
2639 }
2640 
2641 /// Map the OpenMP distribute schedule to the runtime enumeration.
2642 static OpenMPSchedType
2643 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2644   // only static is allowed for dist_schedule
2645   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2646 }
2647 
2648 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2649                                          bool Chunked) const {
2650   OpenMPSchedType Schedule =
2651       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2652   return Schedule == OMP_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticNonchunked(
2656     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2657   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2658   return Schedule == OMP_dist_sch_static;
2659 }
2660 
2661 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2662                                       bool Chunked) const {
2663   OpenMPSchedType Schedule =
2664       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2665   return Schedule == OMP_sch_static_chunked;
2666 }
2667 
2668 bool CGOpenMPRuntime::isStaticChunked(
2669     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2670   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2671   return Schedule == OMP_dist_sch_static_chunked;
2672 }
2673 
2674 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2675   OpenMPSchedType Schedule =
2676       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2677   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2678   return Schedule != OMP_sch_static;
2679 }
2680 
2681 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2682                                   OpenMPScheduleClauseModifier M1,
2683                                   OpenMPScheduleClauseModifier M2) {
2684   int Modifier = 0;
2685   switch (M1) {
2686   case OMPC_SCHEDULE_MODIFIER_monotonic:
2687     Modifier = OMP_sch_modifier_monotonic;
2688     break;
2689   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2690     Modifier = OMP_sch_modifier_nonmonotonic;
2691     break;
2692   case OMPC_SCHEDULE_MODIFIER_simd:
2693     if (Schedule == OMP_sch_static_chunked)
2694       Schedule = OMP_sch_static_balanced_chunked;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_last:
2697   case OMPC_SCHEDULE_MODIFIER_unknown:
2698     break;
2699   }
2700   switch (M2) {
2701   case OMPC_SCHEDULE_MODIFIER_monotonic:
2702     Modifier = OMP_sch_modifier_monotonic;
2703     break;
2704   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2705     Modifier = OMP_sch_modifier_nonmonotonic;
2706     break;
2707   case OMPC_SCHEDULE_MODIFIER_simd:
2708     if (Schedule == OMP_sch_static_chunked)
2709       Schedule = OMP_sch_static_balanced_chunked;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_last:
2712   case OMPC_SCHEDULE_MODIFIER_unknown:
2713     break;
2714   }
2715   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2716   // If the static schedule kind is specified or if the ordered clause is
2717   // specified, and if the nonmonotonic modifier is not specified, the effect is
2718   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2719   // modifier is specified, the effect is as if the nonmonotonic modifier is
2720   // specified.
2721   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2722     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2723           Schedule == OMP_sch_static_balanced_chunked ||
2724           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2725           Schedule == OMP_dist_sch_static_chunked ||
2726           Schedule == OMP_dist_sch_static))
2727       Modifier = OMP_sch_modifier_nonmonotonic;
2728   }
2729   return Schedule | Modifier;
2730 }
2731 
2732 void CGOpenMPRuntime::emitForDispatchInit(
2733     CodeGenFunction &CGF, SourceLocation Loc,
2734     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2735     bool Ordered, const DispatchRTInput &DispatchValues) {
2736   if (!CGF.HaveInsertPoint())
2737     return;
2738   OpenMPSchedType Schedule = getRuntimeSchedule(
2739       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2740   assert(Ordered ||
2741          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2742           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2743           Schedule != OMP_sch_static_balanced_chunked));
2744   // Call __kmpc_dispatch_init(
2745   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2746   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2747   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2748 
2749   // If the Chunk was not specified in the clause - use default value 1.
2750   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2751                                             : CGF.Builder.getIntN(IVSize, 1);
2752   llvm::Value *Args[] = {
2753       emitUpdateLocation(CGF, Loc),
2754       getThreadID(CGF, Loc),
2755       CGF.Builder.getInt32(addMonoNonMonoModifier(
2756           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2757       DispatchValues.LB,                                     // Lower
2758       DispatchValues.UB,                                     // Upper
2759       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2760       Chunk                                                  // Chunk
2761   };
2762   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2763 }
2764 
2765 static void emitForStaticInitCall(
2766     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2767     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2768     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2769     const CGOpenMPRuntime::StaticRTInput &Values) {
2770   if (!CGF.HaveInsertPoint())
2771     return;
2772 
2773   assert(!Values.Ordered);
2774   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2775          Schedule == OMP_sch_static_balanced_chunked ||
2776          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2777          Schedule == OMP_dist_sch_static ||
2778          Schedule == OMP_dist_sch_static_chunked);
2779 
2780   // Call __kmpc_for_static_init(
2781   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2782   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2783   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2784   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2785   llvm::Value *Chunk = Values.Chunk;
2786   if (Chunk == nullptr) {
2787     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2788             Schedule == OMP_dist_sch_static) &&
2789            "expected static non-chunked schedule");
2790     // If the Chunk was not specified in the clause - use default value 1.
2791     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2792   } else {
2793     assert((Schedule == OMP_sch_static_chunked ||
2794             Schedule == OMP_sch_static_balanced_chunked ||
2795             Schedule == OMP_ord_static_chunked ||
2796             Schedule == OMP_dist_sch_static_chunked) &&
2797            "expected static chunked schedule");
2798   }
2799   llvm::Value *Args[] = {
2800       UpdateLocation,
2801       ThreadId,
2802       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2803                                                   M2)), // Schedule type
2804       Values.IL.getPointer(),                           // &isLastIter
2805       Values.LB.getPointer(),                           // &LB
2806       Values.UB.getPointer(),                           // &UB
2807       Values.ST.getPointer(),                           // &Stride
2808       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2809       Chunk                                             // Chunk
2810   };
2811   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2812 }
2813 
2814 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2815                                         SourceLocation Loc,
2816                                         OpenMPDirectiveKind DKind,
2817                                         const OpenMPScheduleTy &ScheduleKind,
2818                                         const StaticRTInput &Values) {
2819   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2820       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2821   assert(isOpenMPWorksharingDirective(DKind) &&
2822          "Expected loop-based or sections-based directive.");
2823   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2824                                              isOpenMPLoopDirective(DKind)
2825                                                  ? OMP_IDENT_WORK_LOOP
2826                                                  : OMP_IDENT_WORK_SECTIONS);
2827   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2828   llvm::FunctionCallee StaticInitFunction =
2829       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2830   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2831   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2832                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2833 }
2834 
2835 void CGOpenMPRuntime::emitDistributeStaticInit(
2836     CodeGenFunction &CGF, SourceLocation Loc,
2837     OpenMPDistScheduleClauseKind SchedKind,
2838     const CGOpenMPRuntime::StaticRTInput &Values) {
2839   OpenMPSchedType ScheduleNum =
2840       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2841   llvm::Value *UpdatedLocation =
2842       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2843   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2844   llvm::FunctionCallee StaticInitFunction =
2845       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2846   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2847                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2848                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2849 }
2850 
2851 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2852                                           SourceLocation Loc,
2853                                           OpenMPDirectiveKind DKind) {
2854   if (!CGF.HaveInsertPoint())
2855     return;
2856   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2857   llvm::Value *Args[] = {
2858       emitUpdateLocation(CGF, Loc,
2859                          isOpenMPDistributeDirective(DKind)
2860                              ? OMP_IDENT_WORK_DISTRIBUTE
2861                              : isOpenMPLoopDirective(DKind)
2862                                    ? OMP_IDENT_WORK_LOOP
2863                                    : OMP_IDENT_WORK_SECTIONS),
2864       getThreadID(CGF, Loc)};
2865   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2866   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2867                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2868                       Args);
2869 }
2870 
2871 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2872                                                  SourceLocation Loc,
2873                                                  unsigned IVSize,
2874                                                  bool IVSigned) {
2875   if (!CGF.HaveInsertPoint())
2876     return;
2877   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2878   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2879   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2880 }
2881 
2882 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2883                                           SourceLocation Loc, unsigned IVSize,
2884                                           bool IVSigned, Address IL,
2885                                           Address LB, Address UB,
2886                                           Address ST) {
2887   // Call __kmpc_dispatch_next(
2888   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2889   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2890   //          kmp_int[32|64] *p_stride);
2891   llvm::Value *Args[] = {
2892       emitUpdateLocation(CGF, Loc),
2893       getThreadID(CGF, Loc),
2894       IL.getPointer(), // &isLastIter
2895       LB.getPointer(), // &Lower
2896       UB.getPointer(), // &Upper
2897       ST.getPointer()  // &Stride
2898   };
2899   llvm::Value *Call =
2900       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2901   return CGF.EmitScalarConversion(
2902       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2903       CGF.getContext().BoolTy, Loc);
2904 }
2905 
2906 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2907                                            llvm::Value *NumThreads,
2908                                            SourceLocation Loc) {
2909   if (!CGF.HaveInsertPoint())
2910     return;
2911   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2912   llvm::Value *Args[] = {
2913       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2914       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2915   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2916                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2917                       Args);
2918 }
2919 
2920 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2921                                          ProcBindKind ProcBind,
2922                                          SourceLocation Loc) {
2923   if (!CGF.HaveInsertPoint())
2924     return;
2925   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2926   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2927   llvm::Value *Args[] = {
2928       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2929       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2930   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2931                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2932                       Args);
2933 }
2934 
2935 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2936                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2937   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2938     OMPBuilder.createFlush(CGF.Builder);
2939   } else {
2940     if (!CGF.HaveInsertPoint())
2941       return;
2942     // Build call void __kmpc_flush(ident_t *loc)
2943     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2944                             CGM.getModule(), OMPRTL___kmpc_flush),
2945                         emitUpdateLocation(CGF, Loc));
2946   }
2947 }
2948 
2949 namespace {
2950 /// Indexes of fields for type kmp_task_t.
2951 enum KmpTaskTFields {
2952   /// List of shared variables.
2953   KmpTaskTShareds,
2954   /// Task routine.
2955   KmpTaskTRoutine,
2956   /// Partition id for the untied tasks.
2957   KmpTaskTPartId,
2958   /// Function with call of destructors for private variables.
2959   Data1,
2960   /// Task priority.
2961   Data2,
2962   /// (Taskloops only) Lower bound.
2963   KmpTaskTLowerBound,
2964   /// (Taskloops only) Upper bound.
2965   KmpTaskTUpperBound,
2966   /// (Taskloops only) Stride.
2967   KmpTaskTStride,
2968   /// (Taskloops only) Is last iteration flag.
2969   KmpTaskTLastIter,
2970   /// (Taskloops only) Reduction data.
2971   KmpTaskTReductions,
2972 };
2973 } // anonymous namespace
2974 
2975 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2976   return OffloadEntriesTargetRegion.empty() &&
2977          OffloadEntriesDeviceGlobalVar.empty();
2978 }
2979 
2980 /// Initialize target region entry.
2981 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2982     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2983                                     StringRef ParentName, unsigned LineNum,
2984                                     unsigned Order) {
2985   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2986                                              "only required for the device "
2987                                              "code generation.");
2988   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2989       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2990                                    OMPTargetRegionEntryTargetRegion);
2991   ++OffloadingEntriesNum;
2992 }
2993 
2994 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2995     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2996                                   StringRef ParentName, unsigned LineNum,
2997                                   llvm::Constant *Addr, llvm::Constant *ID,
2998                                   OMPTargetRegionEntryKind Flags) {
2999   // If we are emitting code for a target, the entry is already initialized,
3000   // only has to be registered.
3001   if (CGM.getLangOpts().OpenMPIsDevice) {
3002     // This could happen if the device compilation is invoked standalone.
3003     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3004       return;
3005     auto &Entry =
3006         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3007     Entry.setAddress(Addr);
3008     Entry.setID(ID);
3009     Entry.setFlags(Flags);
3010   } else {
3011     if (Flags ==
3012             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3013         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3014                                  /*IgnoreAddressId*/ true))
3015       return;
3016     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3017            "Target region entry already registered!");
3018     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3019     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3020     ++OffloadingEntriesNum;
3021   }
3022 }
3023 
3024 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3025     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3026     bool IgnoreAddressId) const {
3027   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3028   if (PerDevice == OffloadEntriesTargetRegion.end())
3029     return false;
3030   auto PerFile = PerDevice->second.find(FileID);
3031   if (PerFile == PerDevice->second.end())
3032     return false;
3033   auto PerParentName = PerFile->second.find(ParentName);
3034   if (PerParentName == PerFile->second.end())
3035     return false;
3036   auto PerLine = PerParentName->second.find(LineNum);
3037   if (PerLine == PerParentName->second.end())
3038     return false;
3039   // Fail if this entry is already registered.
3040   if (!IgnoreAddressId &&
3041       (PerLine->second.getAddress() || PerLine->second.getID()))
3042     return false;
3043   return true;
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3047     const OffloadTargetRegionEntryInfoActTy &Action) {
3048   // Scan all target region entries and perform the provided action.
3049   for (const auto &D : OffloadEntriesTargetRegion)
3050     for (const auto &F : D.second)
3051       for (const auto &P : F.second)
3052         for (const auto &L : P.second)
3053           Action(D.first, F.first, P.first(), L.first, L.second);
3054 }
3055 
3056 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3057     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3058                                        OMPTargetGlobalVarEntryKind Flags,
3059                                        unsigned Order) {
3060   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3061                                              "only required for the device "
3062                                              "code generation.");
3063   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3064   ++OffloadingEntriesNum;
3065 }
3066 
3067 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3068     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3069                                      CharUnits VarSize,
3070                                      OMPTargetGlobalVarEntryKind Flags,
3071                                      llvm::GlobalValue::LinkageTypes Linkage) {
3072   if (CGM.getLangOpts().OpenMPIsDevice) {
3073     // This could happen if the device compilation is invoked standalone.
3074     if (!hasDeviceGlobalVarEntryInfo(VarName))
3075       return;
3076     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3077     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3078       if (Entry.getVarSize().isZero()) {
3079         Entry.setVarSize(VarSize);
3080         Entry.setLinkage(Linkage);
3081       }
3082       return;
3083     }
3084     Entry.setVarSize(VarSize);
3085     Entry.setLinkage(Linkage);
3086     Entry.setAddress(Addr);
3087   } else {
3088     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3089       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3090       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3091              "Entry not initialized!");
3092       if (Entry.getVarSize().isZero()) {
3093         Entry.setVarSize(VarSize);
3094         Entry.setLinkage(Linkage);
3095       }
3096       return;
3097     }
3098     OffloadEntriesDeviceGlobalVar.try_emplace(
3099         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3100     ++OffloadingEntriesNum;
3101   }
3102 }
3103 
3104 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3105     actOnDeviceGlobalVarEntriesInfo(
3106         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3107   // Scan all target region entries and perform the provided action.
3108   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3109     Action(E.getKey(), E.getValue());
3110 }
3111 
3112 void CGOpenMPRuntime::createOffloadEntry(
3113     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3114     llvm::GlobalValue::LinkageTypes Linkage) {
3115   StringRef Name = Addr->getName();
3116   llvm::Module &M = CGM.getModule();
3117   llvm::LLVMContext &C = M.getContext();
3118 
3119   // Create constant string with the name.
3120   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3121 
3122   std::string StringName = getName({"omp_offloading", "entry_name"});
3123   auto *Str = new llvm::GlobalVariable(
3124       M, StrPtrInit->getType(), /*isConstant=*/true,
3125       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3126   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3127 
3128   llvm::Constant *Data[] = {
3129       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3130       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3131       llvm::ConstantInt::get(CGM.SizeTy, Size),
3132       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3133       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3134   std::string EntryName = getName({"omp_offloading", "entry", ""});
3135   llvm::GlobalVariable *Entry = createGlobalStruct(
3136       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3137       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3138 
3139   // The entry has to be created in the section the linker expects it to be.
3140   Entry->setSection("omp_offloading_entries");
3141 }
3142 
3143 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3144   // Emit the offloading entries and metadata so that the device codegen side
3145   // can easily figure out what to emit. The produced metadata looks like
3146   // this:
3147   //
3148   // !omp_offload.info = !{!1, ...}
3149   //
3150   // Right now we only generate metadata for function that contain target
3151   // regions.
3152 
3153   // If we are in simd mode or there are no entries, we don't need to do
3154   // anything.
3155   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3156     return;
3157 
3158   llvm::Module &M = CGM.getModule();
3159   llvm::LLVMContext &C = M.getContext();
3160   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3161                          SourceLocation, StringRef>,
3162               16>
3163       OrderedEntries(OffloadEntriesInfoManager.size());
3164   llvm::SmallVector<StringRef, 16> ParentFunctions(
3165       OffloadEntriesInfoManager.size());
3166 
3167   // Auxiliary methods to create metadata values and strings.
3168   auto &&GetMDInt = [this](unsigned V) {
3169     return llvm::ConstantAsMetadata::get(
3170         llvm::ConstantInt::get(CGM.Int32Ty, V));
3171   };
3172 
3173   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3174 
3175   // Create the offloading info metadata node.
3176   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3177 
3178   // Create function that emits metadata for each target region entry;
3179   auto &&TargetRegionMetadataEmitter =
3180       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3181        &GetMDString](
3182           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3183           unsigned Line,
3184           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3185         // Generate metadata for target regions. Each entry of this metadata
3186         // contains:
3187         // - Entry 0 -> Kind of this type of metadata (0).
3188         // - Entry 1 -> Device ID of the file where the entry was identified.
3189         // - Entry 2 -> File ID of the file where the entry was identified.
3190         // - Entry 3 -> Mangled name of the function where the entry was
3191         // identified.
3192         // - Entry 4 -> Line in the file where the entry was identified.
3193         // - Entry 5 -> Order the entry was created.
3194         // The first element of the metadata node is the kind.
3195         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3196                                  GetMDInt(FileID),      GetMDString(ParentName),
3197                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3198 
3199         SourceLocation Loc;
3200         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3201                   E = CGM.getContext().getSourceManager().fileinfo_end();
3202              I != E; ++I) {
3203           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3204               I->getFirst()->getUniqueID().getFile() == FileID) {
3205             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3206                 I->getFirst(), Line, 1);
3207             break;
3208           }
3209         }
3210         // Save this entry in the right position of the ordered entries array.
3211         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3212         ParentFunctions[E.getOrder()] = ParentName;
3213 
3214         // Add metadata to the named metadata node.
3215         MD->addOperand(llvm::MDNode::get(C, Ops));
3216       };
3217 
3218   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3219       TargetRegionMetadataEmitter);
3220 
3221   // Create function that emits metadata for each device global variable entry;
3222   auto &&DeviceGlobalVarMetadataEmitter =
3223       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3224        MD](StringRef MangledName,
3225            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3226                &E) {
3227         // Generate metadata for global variables. Each entry of this metadata
3228         // contains:
3229         // - Entry 0 -> Kind of this type of metadata (1).
3230         // - Entry 1 -> Mangled name of the variable.
3231         // - Entry 2 -> Declare target kind.
3232         // - Entry 3 -> Order the entry was created.
3233         // The first element of the metadata node is the kind.
3234         llvm::Metadata *Ops[] = {
3235             GetMDInt(E.getKind()), GetMDString(MangledName),
3236             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3237 
3238         // Save this entry in the right position of the ordered entries array.
3239         OrderedEntries[E.getOrder()] =
3240             std::make_tuple(&E, SourceLocation(), MangledName);
3241 
3242         // Add metadata to the named metadata node.
3243         MD->addOperand(llvm::MDNode::get(C, Ops));
3244       };
3245 
3246   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3247       DeviceGlobalVarMetadataEmitter);
3248 
3249   for (const auto &E : OrderedEntries) {
3250     assert(std::get<0>(E) && "All ordered entries must exist!");
3251     if (const auto *CE =
3252             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3253                 std::get<0>(E))) {
3254       if (!CE->getID() || !CE->getAddress()) {
3255         // Do not blame the entry if the parent funtion is not emitted.
3256         StringRef FnName = ParentFunctions[CE->getOrder()];
3257         if (!CGM.GetGlobalValue(FnName))
3258           continue;
3259         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3260             DiagnosticsEngine::Error,
3261             "Offloading entry for target region in %0 is incorrect: either the "
3262             "address or the ID is invalid.");
3263         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3264         continue;
3265       }
3266       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3267                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3268     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3269                                              OffloadEntryInfoDeviceGlobalVar>(
3270                    std::get<0>(E))) {
3271       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3272           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3273               CE->getFlags());
3274       switch (Flags) {
3275       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3276         if (CGM.getLangOpts().OpenMPIsDevice &&
3277             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3278           continue;
3279         if (!CE->getAddress()) {
3280           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3281               DiagnosticsEngine::Error, "Offloading entry for declare target "
3282                                         "variable %0 is incorrect: the "
3283                                         "address is invalid.");
3284           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3285           continue;
3286         }
3287         // The vaiable has no definition - no need to add the entry.
3288         if (CE->getVarSize().isZero())
3289           continue;
3290         break;
3291       }
3292       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3293         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3294                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3295                "Declaret target link address is set.");
3296         if (CGM.getLangOpts().OpenMPIsDevice)
3297           continue;
3298         if (!CE->getAddress()) {
3299           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3300               DiagnosticsEngine::Error,
3301               "Offloading entry for declare target variable is incorrect: the "
3302               "address is invalid.");
3303           CGM.getDiags().Report(DiagID);
3304           continue;
3305         }
3306         break;
3307       }
3308       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3309                          CE->getVarSize().getQuantity(), Flags,
3310                          CE->getLinkage());
3311     } else {
3312       llvm_unreachable("Unsupported entry kind.");
3313     }
3314   }
3315 }
3316 
3317 /// Loads all the offload entries information from the host IR
3318 /// metadata.
3319 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3320   // If we are in target mode, load the metadata from the host IR. This code has
3321   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3322 
3323   if (!CGM.getLangOpts().OpenMPIsDevice)
3324     return;
3325 
3326   if (CGM.getLangOpts().OMPHostIRFile.empty())
3327     return;
3328 
3329   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3330   if (auto EC = Buf.getError()) {
3331     CGM.getDiags().Report(diag::err_cannot_open_file)
3332         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3333     return;
3334   }
3335 
3336   llvm::LLVMContext C;
3337   auto ME = expectedToErrorOrAndEmitErrors(
3338       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3339 
3340   if (auto EC = ME.getError()) {
3341     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3342         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3343     CGM.getDiags().Report(DiagID)
3344         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3345     return;
3346   }
3347 
3348   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3349   if (!MD)
3350     return;
3351 
3352   for (llvm::MDNode *MN : MD->operands()) {
3353     auto &&GetMDInt = [MN](unsigned Idx) {
3354       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3355       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3356     };
3357 
3358     auto &&GetMDString = [MN](unsigned Idx) {
3359       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3360       return V->getString();
3361     };
3362 
3363     switch (GetMDInt(0)) {
3364     default:
3365       llvm_unreachable("Unexpected metadata!");
3366       break;
3367     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3368         OffloadingEntryInfoTargetRegion:
3369       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3370           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3371           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3372           /*Order=*/GetMDInt(5));
3373       break;
3374     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375         OffloadingEntryInfoDeviceGlobalVar:
3376       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3377           /*MangledName=*/GetMDString(1),
3378           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3379               /*Flags=*/GetMDInt(2)),
3380           /*Order=*/GetMDInt(3));
3381       break;
3382     }
3383   }
3384 }
3385 
3386 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3387   if (!KmpRoutineEntryPtrTy) {
3388     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3389     ASTContext &C = CGM.getContext();
3390     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3391     FunctionProtoType::ExtProtoInfo EPI;
3392     KmpRoutineEntryPtrQTy = C.getPointerType(
3393         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3394     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3395   }
3396 }
3397 
3398 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3399   // Make sure the type of the entry is already created. This is the type we
3400   // have to create:
3401   // struct __tgt_offload_entry{
3402   //   void      *addr;       // Pointer to the offload entry info.
3403   //                          // (function or global)
3404   //   char      *name;       // Name of the function or global.
3405   //   size_t     size;       // Size of the entry info (0 if it a function).
3406   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3407   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3408   // };
3409   if (TgtOffloadEntryQTy.isNull()) {
3410     ASTContext &C = CGM.getContext();
3411     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3412     RD->startDefinition();
3413     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3414     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3415     addFieldToRecordDecl(C, RD, C.getSizeType());
3416     addFieldToRecordDecl(
3417         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3418     addFieldToRecordDecl(
3419         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3420     RD->completeDefinition();
3421     RD->addAttr(PackedAttr::CreateImplicit(C));
3422     TgtOffloadEntryQTy = C.getRecordType(RD);
3423   }
3424   return TgtOffloadEntryQTy;
3425 }
3426 
3427 namespace {
3428 struct PrivateHelpersTy {
3429   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3430                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3431       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3432         PrivateElemInit(PrivateElemInit) {}
3433   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3434   const Expr *OriginalRef = nullptr;
3435   const VarDecl *Original = nullptr;
3436   const VarDecl *PrivateCopy = nullptr;
3437   const VarDecl *PrivateElemInit = nullptr;
3438   bool isLocalPrivate() const {
3439     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3440   }
3441 };
3442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3443 } // anonymous namespace
3444 
3445 static bool isAllocatableDecl(const VarDecl *VD) {
3446   const VarDecl *CVD = VD->getCanonicalDecl();
3447   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3448     return false;
3449   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3450   // Use the default allocation.
3451   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3452             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3453            !AA->getAllocator());
3454 }
3455 
3456 static RecordDecl *
3457 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3458   if (!Privates.empty()) {
3459     ASTContext &C = CGM.getContext();
3460     // Build struct .kmp_privates_t. {
3461     //         /*  private vars  */
3462     //       };
3463     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3464     RD->startDefinition();
3465     for (const auto &Pair : Privates) {
3466       const VarDecl *VD = Pair.second.Original;
3467       QualType Type = VD->getType().getNonReferenceType();
3468       // If the private variable is a local variable with lvalue ref type,
3469       // allocate the pointer instead of the pointee type.
3470       if (Pair.second.isLocalPrivate()) {
3471         if (VD->getType()->isLValueReferenceType())
3472           Type = C.getPointerType(Type);
3473         if (isAllocatableDecl(VD))
3474           Type = C.getPointerType(Type);
3475       }
3476       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3477       if (VD->hasAttrs()) {
3478         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3479              E(VD->getAttrs().end());
3480              I != E; ++I)
3481           FD->addAttr(*I);
3482       }
3483     }
3484     RD->completeDefinition();
3485     return RD;
3486   }
3487   return nullptr;
3488 }
3489 
3490 static RecordDecl *
3491 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3492                          QualType KmpInt32Ty,
3493                          QualType KmpRoutineEntryPointerQTy) {
3494   ASTContext &C = CGM.getContext();
3495   // Build struct kmp_task_t {
3496   //         void *              shareds;
3497   //         kmp_routine_entry_t routine;
3498   //         kmp_int32           part_id;
3499   //         kmp_cmplrdata_t data1;
3500   //         kmp_cmplrdata_t data2;
3501   // For taskloops additional fields:
3502   //         kmp_uint64          lb;
3503   //         kmp_uint64          ub;
3504   //         kmp_int64           st;
3505   //         kmp_int32           liter;
3506   //         void *              reductions;
3507   //       };
3508   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3509   UD->startDefinition();
3510   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3511   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3512   UD->completeDefinition();
3513   QualType KmpCmplrdataTy = C.getRecordType(UD);
3514   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3515   RD->startDefinition();
3516   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3517   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3518   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3519   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3520   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3521   if (isOpenMPTaskLoopDirective(Kind)) {
3522     QualType KmpUInt64Ty =
3523         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3524     QualType KmpInt64Ty =
3525         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3526     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3527     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3528     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3529     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3530     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3531   }
3532   RD->completeDefinition();
3533   return RD;
3534 }
3535 
3536 static RecordDecl *
3537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3538                                      ArrayRef<PrivateDataTy> Privates) {
3539   ASTContext &C = CGM.getContext();
3540   // Build struct kmp_task_t_with_privates {
3541   //         kmp_task_t task_data;
3542   //         .kmp_privates_t. privates;
3543   //       };
3544   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3545   RD->startDefinition();
3546   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3547   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3548     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3549   RD->completeDefinition();
3550   return RD;
3551 }
3552 
3553 /// Emit a proxy function which accepts kmp_task_t as the second
3554 /// argument.
3555 /// \code
3556 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3557 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3558 ///   For taskloops:
3559 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3560 ///   tt->reductions, tt->shareds);
3561 ///   return 0;
3562 /// }
3563 /// \endcode
3564 static llvm::Function *
3565 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3566                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3567                       QualType KmpTaskTWithPrivatesPtrQTy,
3568                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3569                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3570                       llvm::Value *TaskPrivatesMap) {
3571   ASTContext &C = CGM.getContext();
3572   FunctionArgList Args;
3573   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3574                             ImplicitParamDecl::Other);
3575   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3576                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3577                                 ImplicitParamDecl::Other);
3578   Args.push_back(&GtidArg);
3579   Args.push_back(&TaskTypeArg);
3580   const auto &TaskEntryFnInfo =
3581       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3582   llvm::FunctionType *TaskEntryTy =
3583       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3584   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3585   auto *TaskEntry = llvm::Function::Create(
3586       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3587   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3588   TaskEntry->setDoesNotRecurse();
3589   CodeGenFunction CGF(CGM);
3590   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3591                     Loc, Loc);
3592 
3593   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3594   // tt,
3595   // For taskloops:
3596   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3597   // tt->task_data.shareds);
3598   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3599       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3600   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3601       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3602       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3603   const auto *KmpTaskTWithPrivatesQTyRD =
3604       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3605   LValue Base =
3606       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3607   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3608   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3609   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3610   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3611 
3612   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3613   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3614   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3615       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3616       CGF.ConvertTypeForMem(SharedsPtrTy));
3617 
3618   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3619   llvm::Value *PrivatesParam;
3620   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3621     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3622     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3623         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3624   } else {
3625     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3626   }
3627 
3628   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3629                                TaskPrivatesMap,
3630                                CGF.Builder
3631                                    .CreatePointerBitCastOrAddrSpaceCast(
3632                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3633                                    .getPointer()};
3634   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3635                                           std::end(CommonArgs));
3636   if (isOpenMPTaskLoopDirective(Kind)) {
3637     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3638     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3639     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3640     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3641     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3642     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3643     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3644     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3645     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3646     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3647     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3648     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3649     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3650     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3651     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3652     CallArgs.push_back(LBParam);
3653     CallArgs.push_back(UBParam);
3654     CallArgs.push_back(StParam);
3655     CallArgs.push_back(LIParam);
3656     CallArgs.push_back(RParam);
3657   }
3658   CallArgs.push_back(SharedsParam);
3659 
3660   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3661                                                   CallArgs);
3662   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3663                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3664   CGF.FinishFunction();
3665   return TaskEntry;
3666 }
3667 
3668 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3669                                             SourceLocation Loc,
3670                                             QualType KmpInt32Ty,
3671                                             QualType KmpTaskTWithPrivatesPtrQTy,
3672                                             QualType KmpTaskTWithPrivatesQTy) {
3673   ASTContext &C = CGM.getContext();
3674   FunctionArgList Args;
3675   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3676                             ImplicitParamDecl::Other);
3677   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3678                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3679                                 ImplicitParamDecl::Other);
3680   Args.push_back(&GtidArg);
3681   Args.push_back(&TaskTypeArg);
3682   const auto &DestructorFnInfo =
3683       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3684   llvm::FunctionType *DestructorFnTy =
3685       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3686   std::string Name =
3687       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3688   auto *DestructorFn =
3689       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3690                              Name, &CGM.getModule());
3691   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3692                                     DestructorFnInfo);
3693   DestructorFn->setDoesNotRecurse();
3694   CodeGenFunction CGF(CGM);
3695   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3696                     Args, Loc, Loc);
3697 
3698   LValue Base = CGF.EmitLoadOfPointerLValue(
3699       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3700       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3701   const auto *KmpTaskTWithPrivatesQTyRD =
3702       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3703   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3704   Base = CGF.EmitLValueForField(Base, *FI);
3705   for (const auto *Field :
3706        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3707     if (QualType::DestructionKind DtorKind =
3708             Field->getType().isDestructedType()) {
3709       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3710       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3711     }
3712   }
3713   CGF.FinishFunction();
3714   return DestructorFn;
3715 }
3716 
3717 /// Emit a privates mapping function for correct handling of private and
3718 /// firstprivate variables.
3719 /// \code
3720 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3721 /// **noalias priv1,...,  <tyn> **noalias privn) {
3722 ///   *priv1 = &.privates.priv1;
3723 ///   ...;
3724 ///   *privn = &.privates.privn;
3725 /// }
3726 /// \endcode
3727 static llvm::Value *
3728 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3729                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3730                                ArrayRef<PrivateDataTy> Privates) {
3731   ASTContext &C = CGM.getContext();
3732   FunctionArgList Args;
3733   ImplicitParamDecl TaskPrivatesArg(
3734       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3735       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3736       ImplicitParamDecl::Other);
3737   Args.push_back(&TaskPrivatesArg);
3738   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3739   unsigned Counter = 1;
3740   for (const Expr *E : Data.PrivateVars) {
3741     Args.push_back(ImplicitParamDecl::Create(
3742         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3743         C.getPointerType(C.getPointerType(E->getType()))
3744             .withConst()
3745             .withRestrict(),
3746         ImplicitParamDecl::Other));
3747     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3748     PrivateVarsPos[VD] = Counter;
3749     ++Counter;
3750   }
3751   for (const Expr *E : Data.FirstprivateVars) {
3752     Args.push_back(ImplicitParamDecl::Create(
3753         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3754         C.getPointerType(C.getPointerType(E->getType()))
3755             .withConst()
3756             .withRestrict(),
3757         ImplicitParamDecl::Other));
3758     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3759     PrivateVarsPos[VD] = Counter;
3760     ++Counter;
3761   }
3762   for (const Expr *E : Data.LastprivateVars) {
3763     Args.push_back(ImplicitParamDecl::Create(
3764         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765         C.getPointerType(C.getPointerType(E->getType()))
3766             .withConst()
3767             .withRestrict(),
3768         ImplicitParamDecl::Other));
3769     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3770     PrivateVarsPos[VD] = Counter;
3771     ++Counter;
3772   }
3773   for (const VarDecl *VD : Data.PrivateLocals) {
3774     QualType Ty = VD->getType().getNonReferenceType();
3775     if (VD->getType()->isLValueReferenceType())
3776       Ty = C.getPointerType(Ty);
3777     if (isAllocatableDecl(VD))
3778       Ty = C.getPointerType(Ty);
3779     Args.push_back(ImplicitParamDecl::Create(
3780         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3782         ImplicitParamDecl::Other));
3783     PrivateVarsPos[VD] = Counter;
3784     ++Counter;
3785   }
3786   const auto &TaskPrivatesMapFnInfo =
3787       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3788   llvm::FunctionType *TaskPrivatesMapTy =
3789       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3790   std::string Name =
3791       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3792   auto *TaskPrivatesMap = llvm::Function::Create(
3793       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3794       &CGM.getModule());
3795   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3796                                     TaskPrivatesMapFnInfo);
3797   if (CGM.getLangOpts().Optimize) {
3798     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3799     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3800     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3801   }
3802   CodeGenFunction CGF(CGM);
3803   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3804                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3805 
3806   // *privi = &.privates.privi;
3807   LValue Base = CGF.EmitLoadOfPointerLValue(
3808       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3809       TaskPrivatesArg.getType()->castAs<PointerType>());
3810   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3811   Counter = 0;
3812   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3813     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3814     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3815     LValue RefLVal =
3816         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3817     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3818         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3819     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3820     ++Counter;
3821   }
3822   CGF.FinishFunction();
3823   return TaskPrivatesMap;
3824 }
3825 
3826 /// Emit initialization for private variables in task-based directives.
3827 static void emitPrivatesInit(CodeGenFunction &CGF,
3828                              const OMPExecutableDirective &D,
3829                              Address KmpTaskSharedsPtr, LValue TDBase,
3830                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3831                              QualType SharedsTy, QualType SharedsPtrTy,
3832                              const OMPTaskDataTy &Data,
3833                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3834   ASTContext &C = CGF.getContext();
3835   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3836   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3837   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3838                                  ? OMPD_taskloop
3839                                  : OMPD_task;
3840   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3841   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3842   LValue SrcBase;
3843   bool IsTargetTask =
3844       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3845       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3846   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3847   // PointersArray, SizesArray, and MappersArray. The original variables for
3848   // these arrays are not captured and we get their addresses explicitly.
3849   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3850       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3851     SrcBase = CGF.MakeAddrLValue(
3852         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3853             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3854         SharedsTy);
3855   }
3856   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3857   for (const PrivateDataTy &Pair : Privates) {
3858     // Do not initialize private locals.
3859     if (Pair.second.isLocalPrivate()) {
3860       ++FI;
3861       continue;
3862     }
3863     const VarDecl *VD = Pair.second.PrivateCopy;
3864     const Expr *Init = VD->getAnyInitializer();
3865     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3866                              !CGF.isTrivialInitializer(Init)))) {
3867       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3868       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3869         const VarDecl *OriginalVD = Pair.second.Original;
3870         // Check if the variable is the target-based BasePointersArray,
3871         // PointersArray, SizesArray, or MappersArray.
3872         LValue SharedRefLValue;
3873         QualType Type = PrivateLValue.getType();
3874         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3875         if (IsTargetTask && !SharedField) {
3876           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3877                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3878                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3879                          ->getNumParams() == 0 &&
3880                  isa<TranslationUnitDecl>(
3881                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3882                          ->getDeclContext()) &&
3883                  "Expected artificial target data variable.");
3884           SharedRefLValue =
3885               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3886         } else if (ForDup) {
3887           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3888           SharedRefLValue = CGF.MakeAddrLValue(
3889               Address(SharedRefLValue.getPointer(CGF),
3890                       C.getDeclAlign(OriginalVD)),
3891               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3892               SharedRefLValue.getTBAAInfo());
3893         } else if (CGF.LambdaCaptureFields.count(
3894                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3895                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3896           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3897         } else {
3898           // Processing for implicitly captured variables.
3899           InlinedOpenMPRegionRAII Region(
3900               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3901               /*HasCancel=*/false, /*NoInheritance=*/true);
3902           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3903         }
3904         if (Type->isArrayType()) {
3905           // Initialize firstprivate array.
3906           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3907             // Perform simple memcpy.
3908             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3909           } else {
3910             // Initialize firstprivate array using element-by-element
3911             // initialization.
3912             CGF.EmitOMPAggregateAssign(
3913                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3914                 Type,
3915                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3916                                                   Address SrcElement) {
3917                   // Clean up any temporaries needed by the initialization.
3918                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3919                   InitScope.addPrivate(
3920                       Elem, [SrcElement]() -> Address { return SrcElement; });
3921                   (void)InitScope.Privatize();
3922                   // Emit initialization for single element.
3923                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3924                       CGF, &CapturesInfo);
3925                   CGF.EmitAnyExprToMem(Init, DestElement,
3926                                        Init->getType().getQualifiers(),
3927                                        /*IsInitializer=*/false);
3928                 });
3929           }
3930         } else {
3931           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3932           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3933             return SharedRefLValue.getAddress(CGF);
3934           });
3935           (void)InitScope.Privatize();
3936           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3937           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3938                              /*capturedByInit=*/false);
3939         }
3940       } else {
3941         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3942       }
3943     }
3944     ++FI;
3945   }
3946 }
3947 
3948 /// Check if duplication function is required for taskloops.
3949 static bool checkInitIsRequired(CodeGenFunction &CGF,
3950                                 ArrayRef<PrivateDataTy> Privates) {
3951   bool InitRequired = false;
3952   for (const PrivateDataTy &Pair : Privates) {
3953     if (Pair.second.isLocalPrivate())
3954       continue;
3955     const VarDecl *VD = Pair.second.PrivateCopy;
3956     const Expr *Init = VD->getAnyInitializer();
3957     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3958                                     !CGF.isTrivialInitializer(Init));
3959     if (InitRequired)
3960       break;
3961   }
3962   return InitRequired;
3963 }
3964 
3965 
3966 /// Emit task_dup function (for initialization of
3967 /// private/firstprivate/lastprivate vars and last_iter flag)
3968 /// \code
3969 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3970 /// lastpriv) {
3971 /// // setup lastprivate flag
3972 ///    task_dst->last = lastpriv;
3973 /// // could be constructor calls here...
3974 /// }
3975 /// \endcode
3976 static llvm::Value *
3977 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3978                     const OMPExecutableDirective &D,
3979                     QualType KmpTaskTWithPrivatesPtrQTy,
3980                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3981                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3982                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3983                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3984   ASTContext &C = CGM.getContext();
3985   FunctionArgList Args;
3986   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3987                            KmpTaskTWithPrivatesPtrQTy,
3988                            ImplicitParamDecl::Other);
3989   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3990                            KmpTaskTWithPrivatesPtrQTy,
3991                            ImplicitParamDecl::Other);
3992   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3993                                 ImplicitParamDecl::Other);
3994   Args.push_back(&DstArg);
3995   Args.push_back(&SrcArg);
3996   Args.push_back(&LastprivArg);
3997   const auto &TaskDupFnInfo =
3998       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3999   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4000   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4001   auto *TaskDup = llvm::Function::Create(
4002       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4003   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4004   TaskDup->setDoesNotRecurse();
4005   CodeGenFunction CGF(CGM);
4006   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4007                     Loc);
4008 
4009   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4010       CGF.GetAddrOfLocalVar(&DstArg),
4011       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4012   // task_dst->liter = lastpriv;
4013   if (WithLastIter) {
4014     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4015     LValue Base = CGF.EmitLValueForField(
4016         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4017     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4018     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4019         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4020     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4021   }
4022 
4023   // Emit initial values for private copies (if any).
4024   assert(!Privates.empty());
4025   Address KmpTaskSharedsPtr = Address::invalid();
4026   if (!Data.FirstprivateVars.empty()) {
4027     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4028         CGF.GetAddrOfLocalVar(&SrcArg),
4029         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4030     LValue Base = CGF.EmitLValueForField(
4031         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4032     KmpTaskSharedsPtr = Address(
4033         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4034                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4035                                                   KmpTaskTShareds)),
4036                              Loc),
4037         CGM.getNaturalTypeAlignment(SharedsTy));
4038   }
4039   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4040                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4041   CGF.FinishFunction();
4042   return TaskDup;
4043 }
4044 
4045 /// Checks if destructor function is required to be generated.
4046 /// \return true if cleanups are required, false otherwise.
4047 static bool
4048 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4049                          ArrayRef<PrivateDataTy> Privates) {
4050   for (const PrivateDataTy &P : Privates) {
4051     if (P.second.isLocalPrivate())
4052       continue;
4053     QualType Ty = P.second.Original->getType().getNonReferenceType();
4054     if (Ty.isDestructedType())
4055       return true;
4056   }
4057   return false;
4058 }
4059 
4060 namespace {
4061 /// Loop generator for OpenMP iterator expression.
4062 class OMPIteratorGeneratorScope final
4063     : public CodeGenFunction::OMPPrivateScope {
4064   CodeGenFunction &CGF;
4065   const OMPIteratorExpr *E = nullptr;
4066   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4067   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4068   OMPIteratorGeneratorScope() = delete;
4069   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4070 
4071 public:
4072   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4073       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4074     if (!E)
4075       return;
4076     SmallVector<llvm::Value *, 4> Uppers;
4077     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4078       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4079       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4080       addPrivate(VD, [&CGF, VD]() {
4081         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4082       });
4083       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4084       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4085         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4086                                  "counter.addr");
4087       });
4088     }
4089     Privatize();
4090 
4091     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4092       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4093       LValue CLVal =
4094           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4095                              HelperData.CounterVD->getType());
4096       // Counter = 0;
4097       CGF.EmitStoreOfScalar(
4098           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4099           CLVal);
4100       CodeGenFunction::JumpDest &ContDest =
4101           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4102       CodeGenFunction::JumpDest &ExitDest =
4103           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4104       // N = <number-of_iterations>;
4105       llvm::Value *N = Uppers[I];
4106       // cont:
4107       // if (Counter < N) goto body; else goto exit;
4108       CGF.EmitBlock(ContDest.getBlock());
4109       auto *CVal =
4110           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4111       llvm::Value *Cmp =
4112           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4113               ? CGF.Builder.CreateICmpSLT(CVal, N)
4114               : CGF.Builder.CreateICmpULT(CVal, N);
4115       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4116       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4117       // body:
4118       CGF.EmitBlock(BodyBB);
4119       // Iteri = Begini + Counter * Stepi;
4120       CGF.EmitIgnoredExpr(HelperData.Update);
4121     }
4122   }
4123   ~OMPIteratorGeneratorScope() {
4124     if (!E)
4125       return;
4126     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4127       // Counter = Counter + 1;
4128       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4129       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4130       // goto cont;
4131       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4132       // exit:
4133       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4134     }
4135   }
4136 };
4137 } // namespace
4138 
4139 static std::pair<llvm::Value *, llvm::Value *>
4140 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4141   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4142   llvm::Value *Addr;
4143   if (OASE) {
4144     const Expr *Base = OASE->getBase();
4145     Addr = CGF.EmitScalarExpr(Base);
4146   } else {
4147     Addr = CGF.EmitLValue(E).getPointer(CGF);
4148   }
4149   llvm::Value *SizeVal;
4150   QualType Ty = E->getType();
4151   if (OASE) {
4152     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4153     for (const Expr *SE : OASE->getDimensions()) {
4154       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4155       Sz = CGF.EmitScalarConversion(
4156           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4157       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4158     }
4159   } else if (const auto *ASE =
4160                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4161     LValue UpAddrLVal =
4162         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4163     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4164     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4165         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4166     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4167     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4168     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4169   } else {
4170     SizeVal = CGF.getTypeSize(Ty);
4171   }
4172   return std::make_pair(Addr, SizeVal);
4173 }
4174 
4175 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4176 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4177   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4178   if (KmpTaskAffinityInfoTy.isNull()) {
4179     RecordDecl *KmpAffinityInfoRD =
4180         C.buildImplicitRecord("kmp_task_affinity_info_t");
4181     KmpAffinityInfoRD->startDefinition();
4182     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4183     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4184     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4185     KmpAffinityInfoRD->completeDefinition();
4186     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4187   }
4188 }
4189 
4190 CGOpenMPRuntime::TaskResultTy
4191 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4192                               const OMPExecutableDirective &D,
4193                               llvm::Function *TaskFunction, QualType SharedsTy,
4194                               Address Shareds, const OMPTaskDataTy &Data) {
4195   ASTContext &C = CGM.getContext();
4196   llvm::SmallVector<PrivateDataTy, 4> Privates;
4197   // Aggregate privates and sort them by the alignment.
4198   const auto *I = Data.PrivateCopies.begin();
4199   for (const Expr *E : Data.PrivateVars) {
4200     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4201     Privates.emplace_back(
4202         C.getDeclAlign(VD),
4203         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4204                          /*PrivateElemInit=*/nullptr));
4205     ++I;
4206   }
4207   I = Data.FirstprivateCopies.begin();
4208   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4209   for (const Expr *E : Data.FirstprivateVars) {
4210     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4211     Privates.emplace_back(
4212         C.getDeclAlign(VD),
4213         PrivateHelpersTy(
4214             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4215             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4216     ++I;
4217     ++IElemInitRef;
4218   }
4219   I = Data.LastprivateCopies.begin();
4220   for (const Expr *E : Data.LastprivateVars) {
4221     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4222     Privates.emplace_back(
4223         C.getDeclAlign(VD),
4224         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4225                          /*PrivateElemInit=*/nullptr));
4226     ++I;
4227   }
4228   for (const VarDecl *VD : Data.PrivateLocals) {
4229     if (isAllocatableDecl(VD))
4230       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4231     else
4232       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4233   }
4234   llvm::stable_sort(Privates,
4235                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4236                       return L.first > R.first;
4237                     });
4238   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4239   // Build type kmp_routine_entry_t (if not built yet).
4240   emitKmpRoutineEntryT(KmpInt32Ty);
4241   // Build type kmp_task_t (if not built yet).
4242   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4243     if (SavedKmpTaskloopTQTy.isNull()) {
4244       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4245           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4246     }
4247     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4248   } else {
4249     assert((D.getDirectiveKind() == OMPD_task ||
4250             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4251             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4252            "Expected taskloop, task or target directive");
4253     if (SavedKmpTaskTQTy.isNull()) {
4254       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4255           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4256     }
4257     KmpTaskTQTy = SavedKmpTaskTQTy;
4258   }
4259   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4260   // Build particular struct kmp_task_t for the given task.
4261   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4262       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4263   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4264   QualType KmpTaskTWithPrivatesPtrQTy =
4265       C.getPointerType(KmpTaskTWithPrivatesQTy);
4266   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4267   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4268       KmpTaskTWithPrivatesTy->getPointerTo();
4269   llvm::Value *KmpTaskTWithPrivatesTySize =
4270       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4271   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4272 
4273   // Emit initial values for private copies (if any).
4274   llvm::Value *TaskPrivatesMap = nullptr;
4275   llvm::Type *TaskPrivatesMapTy =
4276       std::next(TaskFunction->arg_begin(), 3)->getType();
4277   if (!Privates.empty()) {
4278     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4279     TaskPrivatesMap =
4280         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4281     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4282         TaskPrivatesMap, TaskPrivatesMapTy);
4283   } else {
4284     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4285         cast<llvm::PointerType>(TaskPrivatesMapTy));
4286   }
4287   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4288   // kmp_task_t *tt);
4289   llvm::Function *TaskEntry = emitProxyTaskFunction(
4290       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4291       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4292       TaskPrivatesMap);
4293 
4294   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4295   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4296   // kmp_routine_entry_t *task_entry);
4297   // Task flags. Format is taken from
4298   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4299   // description of kmp_tasking_flags struct.
4300   enum {
4301     TiedFlag = 0x1,
4302     FinalFlag = 0x2,
4303     DestructorsFlag = 0x8,
4304     PriorityFlag = 0x20,
4305     DetachableFlag = 0x40,
4306   };
4307   unsigned Flags = Data.Tied ? TiedFlag : 0;
4308   bool NeedsCleanup = false;
4309   if (!Privates.empty()) {
4310     NeedsCleanup =
4311         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4312     if (NeedsCleanup)
4313       Flags = Flags | DestructorsFlag;
4314   }
4315   if (Data.Priority.getInt())
4316     Flags = Flags | PriorityFlag;
4317   if (D.hasClausesOfKind<OMPDetachClause>())
4318     Flags = Flags | DetachableFlag;
4319   llvm::Value *TaskFlags =
4320       Data.Final.getPointer()
4321           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4322                                      CGF.Builder.getInt32(FinalFlag),
4323                                      CGF.Builder.getInt32(/*C=*/0))
4324           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4325   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4326   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4327   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4328       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4329       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4330           TaskEntry, KmpRoutineEntryPtrTy)};
4331   llvm::Value *NewTask;
4332   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4333     // Check if we have any device clause associated with the directive.
4334     const Expr *Device = nullptr;
4335     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4336       Device = C->getDevice();
4337     // Emit device ID if any otherwise use default value.
4338     llvm::Value *DeviceID;
4339     if (Device)
4340       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4341                                            CGF.Int64Ty, /*isSigned=*/true);
4342     else
4343       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4344     AllocArgs.push_back(DeviceID);
4345     NewTask = CGF.EmitRuntimeCall(
4346         OMPBuilder.getOrCreateRuntimeFunction(
4347             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4348         AllocArgs);
4349   } else {
4350     NewTask =
4351         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4352                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4353                             AllocArgs);
4354   }
4355   // Emit detach clause initialization.
4356   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4357   // task_descriptor);
4358   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4359     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4360     LValue EvtLVal = CGF.EmitLValue(Evt);
4361 
4362     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4363     // int gtid, kmp_task_t *task);
4364     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4365     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4366     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4367     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4368         OMPBuilder.getOrCreateRuntimeFunction(
4369             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4370         {Loc, Tid, NewTask});
4371     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4372                                       Evt->getExprLoc());
4373     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4374   }
4375   // Process affinity clauses.
4376   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4377     // Process list of affinity data.
4378     ASTContext &C = CGM.getContext();
4379     Address AffinitiesArray = Address::invalid();
4380     // Calculate number of elements to form the array of affinity data.
4381     llvm::Value *NumOfElements = nullptr;
4382     unsigned NumAffinities = 0;
4383     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4384       if (const Expr *Modifier = C->getModifier()) {
4385         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4386         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4387           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4388           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4389           NumOfElements =
4390               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4391         }
4392       } else {
4393         NumAffinities += C->varlist_size();
4394       }
4395     }
4396     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4397     // Fields ids in kmp_task_affinity_info record.
4398     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4399 
4400     QualType KmpTaskAffinityInfoArrayTy;
4401     if (NumOfElements) {
4402       NumOfElements = CGF.Builder.CreateNUWAdd(
4403           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4404       auto *OVE = new (C) OpaqueValueExpr(
4405           Loc,
4406           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4407           VK_PRValue);
4408       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4409                                                     RValue::get(NumOfElements));
4410       KmpTaskAffinityInfoArrayTy =
4411           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4412                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4413       // Properly emit variable-sized array.
4414       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4415                                            ImplicitParamDecl::Other);
4416       CGF.EmitVarDecl(*PD);
4417       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4418       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4419                                                 /*isSigned=*/false);
4420     } else {
4421       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4422           KmpTaskAffinityInfoTy,
4423           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4424           ArrayType::Normal, /*IndexTypeQuals=*/0);
4425       AffinitiesArray =
4426           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4427       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4428       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4429                                              /*isSigned=*/false);
4430     }
4431 
4432     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4433     // Fill array by elements without iterators.
4434     unsigned Pos = 0;
4435     bool HasIterator = false;
4436     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4437       if (C->getModifier()) {
4438         HasIterator = true;
4439         continue;
4440       }
4441       for (const Expr *E : C->varlists()) {
4442         llvm::Value *Addr;
4443         llvm::Value *Size;
4444         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4445         LValue Base =
4446             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4447                                KmpTaskAffinityInfoTy);
4448         // affs[i].base_addr = &<Affinities[i].second>;
4449         LValue BaseAddrLVal = CGF.EmitLValueForField(
4450             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4451         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4452                               BaseAddrLVal);
4453         // affs[i].len = sizeof(<Affinities[i].second>);
4454         LValue LenLVal = CGF.EmitLValueForField(
4455             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4456         CGF.EmitStoreOfScalar(Size, LenLVal);
4457         ++Pos;
4458       }
4459     }
4460     LValue PosLVal;
4461     if (HasIterator) {
4462       PosLVal = CGF.MakeAddrLValue(
4463           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4464           C.getSizeType());
4465       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4466     }
4467     // Process elements with iterators.
4468     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4469       const Expr *Modifier = C->getModifier();
4470       if (!Modifier)
4471         continue;
4472       OMPIteratorGeneratorScope IteratorScope(
4473           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4474       for (const Expr *E : C->varlists()) {
4475         llvm::Value *Addr;
4476         llvm::Value *Size;
4477         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4478         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4479         LValue Base = CGF.MakeAddrLValue(
4480             Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(),
4481                                           AffinitiesArray.getPointer(), Idx),
4482                     AffinitiesArray.getAlignment()),
4483             KmpTaskAffinityInfoTy);
4484         // affs[i].base_addr = &<Affinities[i].second>;
4485         LValue BaseAddrLVal = CGF.EmitLValueForField(
4486             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4487         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4488                               BaseAddrLVal);
4489         // affs[i].len = sizeof(<Affinities[i].second>);
4490         LValue LenLVal = CGF.EmitLValueForField(
4491             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4492         CGF.EmitStoreOfScalar(Size, LenLVal);
4493         Idx = CGF.Builder.CreateNUWAdd(
4494             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4495         CGF.EmitStoreOfScalar(Idx, PosLVal);
4496       }
4497     }
4498     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4499     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4500     // naffins, kmp_task_affinity_info_t *affin_list);
4501     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4502     llvm::Value *GTid = getThreadID(CGF, Loc);
4503     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4504         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4505     // FIXME: Emit the function and ignore its result for now unless the
4506     // runtime function is properly implemented.
4507     (void)CGF.EmitRuntimeCall(
4508         OMPBuilder.getOrCreateRuntimeFunction(
4509             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4510         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4511   }
4512   llvm::Value *NewTaskNewTaskTTy =
4513       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4514           NewTask, KmpTaskTWithPrivatesPtrTy);
4515   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4516                                                KmpTaskTWithPrivatesQTy);
4517   LValue TDBase =
4518       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4519   // Fill the data in the resulting kmp_task_t record.
4520   // Copy shareds if there are any.
4521   Address KmpTaskSharedsPtr = Address::invalid();
4522   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4523     KmpTaskSharedsPtr =
4524         Address(CGF.EmitLoadOfScalar(
4525                     CGF.EmitLValueForField(
4526                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4527                                            KmpTaskTShareds)),
4528                     Loc),
4529                 CGM.getNaturalTypeAlignment(SharedsTy));
4530     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4531     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4532     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4533   }
4534   // Emit initial values for private copies (if any).
4535   TaskResultTy Result;
4536   if (!Privates.empty()) {
4537     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4538                      SharedsTy, SharedsPtrTy, Data, Privates,
4539                      /*ForDup=*/false);
4540     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4541         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4542       Result.TaskDupFn = emitTaskDupFunction(
4543           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4544           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4545           /*WithLastIter=*/!Data.LastprivateVars.empty());
4546     }
4547   }
4548   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4549   enum { Priority = 0, Destructors = 1 };
4550   // Provide pointer to function with destructors for privates.
4551   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4552   const RecordDecl *KmpCmplrdataUD =
4553       (*FI)->getType()->getAsUnionType()->getDecl();
4554   if (NeedsCleanup) {
4555     llvm::Value *DestructorFn = emitDestructorsFunction(
4556         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4557         KmpTaskTWithPrivatesQTy);
4558     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4559     LValue DestructorsLV = CGF.EmitLValueForField(
4560         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4561     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4562                               DestructorFn, KmpRoutineEntryPtrTy),
4563                           DestructorsLV);
4564   }
4565   // Set priority.
4566   if (Data.Priority.getInt()) {
4567     LValue Data2LV = CGF.EmitLValueForField(
4568         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4569     LValue PriorityLV = CGF.EmitLValueForField(
4570         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4571     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4572   }
4573   Result.NewTask = NewTask;
4574   Result.TaskEntry = TaskEntry;
4575   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4576   Result.TDBase = TDBase;
4577   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4578   return Result;
4579 }
4580 
4581 namespace {
4582 /// Dependence kind for RTL.
4583 enum RTLDependenceKindTy {
4584   DepIn = 0x01,
4585   DepInOut = 0x3,
4586   DepMutexInOutSet = 0x4
4587 };
4588 /// Fields ids in kmp_depend_info record.
4589 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4590 } // namespace
4591 
4592 /// Translates internal dependency kind into the runtime kind.
4593 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4594   RTLDependenceKindTy DepKind;
4595   switch (K) {
4596   case OMPC_DEPEND_in:
4597     DepKind = DepIn;
4598     break;
4599   // Out and InOut dependencies must use the same code.
4600   case OMPC_DEPEND_out:
4601   case OMPC_DEPEND_inout:
4602     DepKind = DepInOut;
4603     break;
4604   case OMPC_DEPEND_mutexinoutset:
4605     DepKind = DepMutexInOutSet;
4606     break;
4607   case OMPC_DEPEND_source:
4608   case OMPC_DEPEND_sink:
4609   case OMPC_DEPEND_depobj:
4610   case OMPC_DEPEND_unknown:
4611     llvm_unreachable("Unknown task dependence type");
4612   }
4613   return DepKind;
4614 }
4615 
4616 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4617 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4618                            QualType &FlagsTy) {
4619   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4620   if (KmpDependInfoTy.isNull()) {
4621     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4622     KmpDependInfoRD->startDefinition();
4623     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4624     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4625     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4626     KmpDependInfoRD->completeDefinition();
4627     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4628   }
4629 }
4630 
4631 std::pair<llvm::Value *, LValue>
4632 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4633                                    SourceLocation Loc) {
4634   ASTContext &C = CGM.getContext();
4635   QualType FlagsTy;
4636   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4637   RecordDecl *KmpDependInfoRD =
4638       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4639   LValue Base = CGF.EmitLoadOfPointerLValue(
4640       DepobjLVal.getAddress(CGF),
4641       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4642   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4643   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4644           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4645   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4646                             Base.getTBAAInfo());
4647   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4648       Addr.getElementType(), Addr.getPointer(),
4649       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4650   LValue NumDepsBase = CGF.MakeAddrLValue(
4651       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4652       Base.getBaseInfo(), Base.getTBAAInfo());
4653   // NumDeps = deps[i].base_addr;
4654   LValue BaseAddrLVal = CGF.EmitLValueForField(
4655       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4656   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4657   return std::make_pair(NumDeps, Base);
4658 }
4659 
4660 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4661                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4662                            const OMPTaskDataTy::DependData &Data,
4663                            Address DependenciesArray) {
4664   CodeGenModule &CGM = CGF.CGM;
4665   ASTContext &C = CGM.getContext();
4666   QualType FlagsTy;
4667   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4668   RecordDecl *KmpDependInfoRD =
4669       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4670   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4671 
4672   OMPIteratorGeneratorScope IteratorScope(
4673       CGF, cast_or_null<OMPIteratorExpr>(
4674                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4675                                  : nullptr));
4676   for (const Expr *E : Data.DepExprs) {
4677     llvm::Value *Addr;
4678     llvm::Value *Size;
4679     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4680     LValue Base;
4681     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4682       Base = CGF.MakeAddrLValue(
4683           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4684     } else {
4685       LValue &PosLVal = *Pos.get<LValue *>();
4686       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4687       Base = CGF.MakeAddrLValue(
4688           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4689                                         DependenciesArray.getPointer(), Idx),
4690                   DependenciesArray.getAlignment()),
4691           KmpDependInfoTy);
4692     }
4693     // deps[i].base_addr = &<Dependencies[i].second>;
4694     LValue BaseAddrLVal = CGF.EmitLValueForField(
4695         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4696     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4697                           BaseAddrLVal);
4698     // deps[i].len = sizeof(<Dependencies[i].second>);
4699     LValue LenLVal = CGF.EmitLValueForField(
4700         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4701     CGF.EmitStoreOfScalar(Size, LenLVal);
4702     // deps[i].flags = <Dependencies[i].first>;
4703     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4704     LValue FlagsLVal = CGF.EmitLValueForField(
4705         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4706     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4707                           FlagsLVal);
4708     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4709       ++(*P);
4710     } else {
4711       LValue &PosLVal = *Pos.get<LValue *>();
4712       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4713       Idx = CGF.Builder.CreateNUWAdd(Idx,
4714                                      llvm::ConstantInt::get(Idx->getType(), 1));
4715       CGF.EmitStoreOfScalar(Idx, PosLVal);
4716     }
4717   }
4718 }
4719 
4720 static SmallVector<llvm::Value *, 4>
4721 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4722                         const OMPTaskDataTy::DependData &Data) {
4723   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4724          "Expected depobj dependecy kind.");
4725   SmallVector<llvm::Value *, 4> Sizes;
4726   SmallVector<LValue, 4> SizeLVals;
4727   ASTContext &C = CGF.getContext();
4728   QualType FlagsTy;
4729   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4730   RecordDecl *KmpDependInfoRD =
4731       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4732   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4733   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4734   {
4735     OMPIteratorGeneratorScope IteratorScope(
4736         CGF, cast_or_null<OMPIteratorExpr>(
4737                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4738                                    : nullptr));
4739     for (const Expr *E : Data.DepExprs) {
4740       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4741       LValue Base = CGF.EmitLoadOfPointerLValue(
4742           DepobjLVal.getAddress(CGF),
4743           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4744       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4745           Base.getAddress(CGF), KmpDependInfoPtrT);
4746       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4747                                 Base.getTBAAInfo());
4748       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4749           Addr.getElementType(), Addr.getPointer(),
4750           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4751       LValue NumDepsBase = CGF.MakeAddrLValue(
4752           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4753           Base.getBaseInfo(), Base.getTBAAInfo());
4754       // NumDeps = deps[i].base_addr;
4755       LValue BaseAddrLVal = CGF.EmitLValueForField(
4756           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4757       llvm::Value *NumDeps =
4758           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4759       LValue NumLVal = CGF.MakeAddrLValue(
4760           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4761           C.getUIntPtrType());
4762       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4763                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4764       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4765       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4766       CGF.EmitStoreOfScalar(Add, NumLVal);
4767       SizeLVals.push_back(NumLVal);
4768     }
4769   }
4770   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4771     llvm::Value *Size =
4772         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4773     Sizes.push_back(Size);
4774   }
4775   return Sizes;
4776 }
4777 
4778 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4779                                LValue PosLVal,
4780                                const OMPTaskDataTy::DependData &Data,
4781                                Address DependenciesArray) {
4782   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4783          "Expected depobj dependecy kind.");
4784   ASTContext &C = CGF.getContext();
4785   QualType FlagsTy;
4786   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4787   RecordDecl *KmpDependInfoRD =
4788       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4789   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4790   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4791   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4792   {
4793     OMPIteratorGeneratorScope IteratorScope(
4794         CGF, cast_or_null<OMPIteratorExpr>(
4795                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4796                                    : nullptr));
4797     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4798       const Expr *E = Data.DepExprs[I];
4799       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4800       LValue Base = CGF.EmitLoadOfPointerLValue(
4801           DepobjLVal.getAddress(CGF),
4802           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4803       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4804           Base.getAddress(CGF), KmpDependInfoPtrT);
4805       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4806                                 Base.getTBAAInfo());
4807 
4808       // Get number of elements in a single depobj.
4809       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4810           Addr.getElementType(), Addr.getPointer(),
4811           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4812       LValue NumDepsBase = CGF.MakeAddrLValue(
4813           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4814           Base.getBaseInfo(), Base.getTBAAInfo());
4815       // NumDeps = deps[i].base_addr;
4816       LValue BaseAddrLVal = CGF.EmitLValueForField(
4817           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4818       llvm::Value *NumDeps =
4819           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4820 
4821       // memcopy dependency data.
4822       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4823           ElSize,
4824           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4825       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4826       Address DepAddr =
4827           Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(),
4828                                         DependenciesArray.getPointer(), Pos),
4829                   DependenciesArray.getAlignment());
4830       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4831 
4832       // Increase pos.
4833       // pos += size;
4834       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4835       CGF.EmitStoreOfScalar(Add, PosLVal);
4836     }
4837   }
4838 }
4839 
4840 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4841     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4842     SourceLocation Loc) {
4843   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4844         return D.DepExprs.empty();
4845       }))
4846     return std::make_pair(nullptr, Address::invalid());
4847   // Process list of dependencies.
4848   ASTContext &C = CGM.getContext();
4849   Address DependenciesArray = Address::invalid();
4850   llvm::Value *NumOfElements = nullptr;
4851   unsigned NumDependencies = std::accumulate(
4852       Dependencies.begin(), Dependencies.end(), 0,
4853       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4854         return D.DepKind == OMPC_DEPEND_depobj
4855                    ? V
4856                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4857       });
4858   QualType FlagsTy;
4859   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4860   bool HasDepobjDeps = false;
4861   bool HasRegularWithIterators = false;
4862   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4863   llvm::Value *NumOfRegularWithIterators =
4864       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4865   // Calculate number of depobj dependecies and regular deps with the iterators.
4866   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4867     if (D.DepKind == OMPC_DEPEND_depobj) {
4868       SmallVector<llvm::Value *, 4> Sizes =
4869           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4870       for (llvm::Value *Size : Sizes) {
4871         NumOfDepobjElements =
4872             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4873       }
4874       HasDepobjDeps = true;
4875       continue;
4876     }
4877     // Include number of iterations, if any.
4878     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4879       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4880         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4881         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4882         NumOfRegularWithIterators =
4883             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4884       }
4885       HasRegularWithIterators = true;
4886       continue;
4887     }
4888   }
4889 
4890   QualType KmpDependInfoArrayTy;
4891   if (HasDepobjDeps || HasRegularWithIterators) {
4892     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4893                                            /*isSigned=*/false);
4894     if (HasDepobjDeps) {
4895       NumOfElements =
4896           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4897     }
4898     if (HasRegularWithIterators) {
4899       NumOfElements =
4900           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4901     }
4902     auto *OVE = new (C) OpaqueValueExpr(
4903         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4904         VK_PRValue);
4905     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4906                                                   RValue::get(NumOfElements));
4907     KmpDependInfoArrayTy =
4908         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4909                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4910     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4911     // Properly emit variable-sized array.
4912     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4913                                          ImplicitParamDecl::Other);
4914     CGF.EmitVarDecl(*PD);
4915     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4916     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4917                                               /*isSigned=*/false);
4918   } else {
4919     KmpDependInfoArrayTy = C.getConstantArrayType(
4920         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4921         ArrayType::Normal, /*IndexTypeQuals=*/0);
4922     DependenciesArray =
4923         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4924     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4925     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4926                                            /*isSigned=*/false);
4927   }
4928   unsigned Pos = 0;
4929   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4930     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4931         Dependencies[I].IteratorExpr)
4932       continue;
4933     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4934                    DependenciesArray);
4935   }
4936   // Copy regular dependecies with iterators.
4937   LValue PosLVal = CGF.MakeAddrLValue(
4938       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4939   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4940   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4942         !Dependencies[I].IteratorExpr)
4943       continue;
4944     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4945                    DependenciesArray);
4946   }
4947   // Copy final depobj arrays without iterators.
4948   if (HasDepobjDeps) {
4949     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4950       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4951         continue;
4952       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4953                          DependenciesArray);
4954     }
4955   }
4956   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4957       DependenciesArray, CGF.VoidPtrTy);
4958   return std::make_pair(NumOfElements, DependenciesArray);
4959 }
4960 
4961 Address CGOpenMPRuntime::emitDepobjDependClause(
4962     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4963     SourceLocation Loc) {
4964   if (Dependencies.DepExprs.empty())
4965     return Address::invalid();
4966   // Process list of dependencies.
4967   ASTContext &C = CGM.getContext();
4968   Address DependenciesArray = Address::invalid();
4969   unsigned NumDependencies = Dependencies.DepExprs.size();
4970   QualType FlagsTy;
4971   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4972   RecordDecl *KmpDependInfoRD =
4973       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4974 
4975   llvm::Value *Size;
4976   // Define type kmp_depend_info[<Dependencies.size()>];
4977   // For depobj reserve one extra element to store the number of elements.
4978   // It is required to handle depobj(x) update(in) construct.
4979   // kmp_depend_info[<Dependencies.size()>] deps;
4980   llvm::Value *NumDepsVal;
4981   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4982   if (const auto *IE =
4983           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4984     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4985     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4986       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4987       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4988       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4989     }
4990     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4991                                     NumDepsVal);
4992     CharUnits SizeInBytes =
4993         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4994     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4995     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4996     NumDepsVal =
4997         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4998   } else {
4999     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5000         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5001         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5002     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5003     Size = CGM.getSize(Sz.alignTo(Align));
5004     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5005   }
5006   // Need to allocate on the dynamic memory.
5007   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5008   // Use default allocator.
5009   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5010   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5011 
5012   llvm::Value *Addr =
5013       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5014                               CGM.getModule(), OMPRTL___kmpc_alloc),
5015                           Args, ".dep.arr.addr");
5016   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5017       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5018   DependenciesArray = Address(Addr, Align);
5019   // Write number of elements in the first element of array for depobj.
5020   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5021   // deps[i].base_addr = NumDependencies;
5022   LValue BaseAddrLVal = CGF.EmitLValueForField(
5023       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5024   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5025   llvm::PointerUnion<unsigned *, LValue *> Pos;
5026   unsigned Idx = 1;
5027   LValue PosLVal;
5028   if (Dependencies.IteratorExpr) {
5029     PosLVal = CGF.MakeAddrLValue(
5030         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5031         C.getSizeType());
5032     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5033                           /*IsInit=*/true);
5034     Pos = &PosLVal;
5035   } else {
5036     Pos = &Idx;
5037   }
5038   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5039   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5040       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5041   return DependenciesArray;
5042 }
5043 
5044 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5045                                         SourceLocation Loc) {
5046   ASTContext &C = CGM.getContext();
5047   QualType FlagsTy;
5048   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5049   LValue Base = CGF.EmitLoadOfPointerLValue(
5050       DepobjLVal.getAddress(CGF),
5051       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5052   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5053   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5054       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5055   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5056       Addr.getElementType(), Addr.getPointer(),
5057       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5058   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5059                                                                CGF.VoidPtrTy);
5060   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5061   // Use default allocator.
5062   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5063   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5064 
5065   // _kmpc_free(gtid, addr, nullptr);
5066   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5067                                 CGM.getModule(), OMPRTL___kmpc_free),
5068                             Args);
5069 }
5070 
5071 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5072                                        OpenMPDependClauseKind NewDepKind,
5073                                        SourceLocation Loc) {
5074   ASTContext &C = CGM.getContext();
5075   QualType FlagsTy;
5076   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5077   RecordDecl *KmpDependInfoRD =
5078       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5079   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5080   llvm::Value *NumDeps;
5081   LValue Base;
5082   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5083 
5084   Address Begin = Base.getAddress(CGF);
5085   // Cast from pointer to array type to pointer to single element.
5086   llvm::Value *End = CGF.Builder.CreateGEP(
5087       Begin.getElementType(), Begin.getPointer(), NumDeps);
5088   // The basic structure here is a while-do loop.
5089   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5090   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5091   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5092   CGF.EmitBlock(BodyBB);
5093   llvm::PHINode *ElementPHI =
5094       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5095   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5096   Begin = Address(ElementPHI, Begin.getAlignment());
5097   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5098                             Base.getTBAAInfo());
5099   // deps[i].flags = NewDepKind;
5100   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5101   LValue FlagsLVal = CGF.EmitLValueForField(
5102       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5103   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5104                         FlagsLVal);
5105 
5106   // Shift the address forward by one element.
5107   Address ElementNext =
5108       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5109   ElementPHI->addIncoming(ElementNext.getPointer(),
5110                           CGF.Builder.GetInsertBlock());
5111   llvm::Value *IsEmpty =
5112       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5113   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5114   // Done.
5115   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5116 }
5117 
5118 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5119                                    const OMPExecutableDirective &D,
5120                                    llvm::Function *TaskFunction,
5121                                    QualType SharedsTy, Address Shareds,
5122                                    const Expr *IfCond,
5123                                    const OMPTaskDataTy &Data) {
5124   if (!CGF.HaveInsertPoint())
5125     return;
5126 
5127   TaskResultTy Result =
5128       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5129   llvm::Value *NewTask = Result.NewTask;
5130   llvm::Function *TaskEntry = Result.TaskEntry;
5131   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5132   LValue TDBase = Result.TDBase;
5133   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5134   // Process list of dependences.
5135   Address DependenciesArray = Address::invalid();
5136   llvm::Value *NumOfElements;
5137   std::tie(NumOfElements, DependenciesArray) =
5138       emitDependClause(CGF, Data.Dependences, Loc);
5139 
5140   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5141   // libcall.
5142   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5143   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5144   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5145   // list is not empty
5146   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5147   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5148   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5149   llvm::Value *DepTaskArgs[7];
5150   if (!Data.Dependences.empty()) {
5151     DepTaskArgs[0] = UpLoc;
5152     DepTaskArgs[1] = ThreadID;
5153     DepTaskArgs[2] = NewTask;
5154     DepTaskArgs[3] = NumOfElements;
5155     DepTaskArgs[4] = DependenciesArray.getPointer();
5156     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5157     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5158   }
5159   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5160                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5161     if (!Data.Tied) {
5162       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5163       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5164       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5165     }
5166     if (!Data.Dependences.empty()) {
5167       CGF.EmitRuntimeCall(
5168           OMPBuilder.getOrCreateRuntimeFunction(
5169               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5170           DepTaskArgs);
5171     } else {
5172       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5173                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5174                           TaskArgs);
5175     }
5176     // Check if parent region is untied and build return for untied task;
5177     if (auto *Region =
5178             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5179       Region->emitUntiedSwitch(CGF);
5180   };
5181 
5182   llvm::Value *DepWaitTaskArgs[6];
5183   if (!Data.Dependences.empty()) {
5184     DepWaitTaskArgs[0] = UpLoc;
5185     DepWaitTaskArgs[1] = ThreadID;
5186     DepWaitTaskArgs[2] = NumOfElements;
5187     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5188     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5189     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5190   }
5191   auto &M = CGM.getModule();
5192   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5193                         TaskEntry, &Data, &DepWaitTaskArgs,
5194                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5195     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5196     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5197     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5198     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5199     // is specified.
5200     if (!Data.Dependences.empty())
5201       CGF.EmitRuntimeCall(
5202           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5203           DepWaitTaskArgs);
5204     // Call proxy_task_entry(gtid, new_task);
5205     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5206                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5207       Action.Enter(CGF);
5208       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5209       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5210                                                           OutlinedFnArgs);
5211     };
5212 
5213     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5214     // kmp_task_t *new_task);
5215     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5216     // kmp_task_t *new_task);
5217     RegionCodeGenTy RCG(CodeGen);
5218     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5219                               M, OMPRTL___kmpc_omp_task_begin_if0),
5220                           TaskArgs,
5221                           OMPBuilder.getOrCreateRuntimeFunction(
5222                               M, OMPRTL___kmpc_omp_task_complete_if0),
5223                           TaskArgs);
5224     RCG.setAction(Action);
5225     RCG(CGF);
5226   };
5227 
5228   if (IfCond) {
5229     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5230   } else {
5231     RegionCodeGenTy ThenRCG(ThenCodeGen);
5232     ThenRCG(CGF);
5233   }
5234 }
5235 
5236 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5237                                        const OMPLoopDirective &D,
5238                                        llvm::Function *TaskFunction,
5239                                        QualType SharedsTy, Address Shareds,
5240                                        const Expr *IfCond,
5241                                        const OMPTaskDataTy &Data) {
5242   if (!CGF.HaveInsertPoint())
5243     return;
5244   TaskResultTy Result =
5245       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5246   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5247   // libcall.
5248   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5249   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5250   // sched, kmp_uint64 grainsize, void *task_dup);
5251   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5252   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5253   llvm::Value *IfVal;
5254   if (IfCond) {
5255     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5256                                       /*isSigned=*/true);
5257   } else {
5258     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5259   }
5260 
5261   LValue LBLVal = CGF.EmitLValueForField(
5262       Result.TDBase,
5263       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5264   const auto *LBVar =
5265       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5266   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5267                        LBLVal.getQuals(),
5268                        /*IsInitializer=*/true);
5269   LValue UBLVal = CGF.EmitLValueForField(
5270       Result.TDBase,
5271       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5272   const auto *UBVar =
5273       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5274   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5275                        UBLVal.getQuals(),
5276                        /*IsInitializer=*/true);
5277   LValue StLVal = CGF.EmitLValueForField(
5278       Result.TDBase,
5279       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5280   const auto *StVar =
5281       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5282   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5283                        StLVal.getQuals(),
5284                        /*IsInitializer=*/true);
5285   // Store reductions address.
5286   LValue RedLVal = CGF.EmitLValueForField(
5287       Result.TDBase,
5288       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5289   if (Data.Reductions) {
5290     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5291   } else {
5292     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5293                                CGF.getContext().VoidPtrTy);
5294   }
5295   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5296   llvm::Value *TaskArgs[] = {
5297       UpLoc,
5298       ThreadID,
5299       Result.NewTask,
5300       IfVal,
5301       LBLVal.getPointer(CGF),
5302       UBLVal.getPointer(CGF),
5303       CGF.EmitLoadOfScalar(StLVal, Loc),
5304       llvm::ConstantInt::getSigned(
5305           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5306       llvm::ConstantInt::getSigned(
5307           CGF.IntTy, Data.Schedule.getPointer()
5308                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5309                          : NoSchedule),
5310       Data.Schedule.getPointer()
5311           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5312                                       /*isSigned=*/false)
5313           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5314       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5315                              Result.TaskDupFn, CGF.VoidPtrTy)
5316                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5317   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5318                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5319                       TaskArgs);
5320 }
5321 
5322 /// Emit reduction operation for each element of array (required for
5323 /// array sections) LHS op = RHS.
5324 /// \param Type Type of array.
5325 /// \param LHSVar Variable on the left side of the reduction operation
5326 /// (references element of array in original variable).
5327 /// \param RHSVar Variable on the right side of the reduction operation
5328 /// (references element of array in original variable).
5329 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5330 /// RHSVar.
5331 static void EmitOMPAggregateReduction(
5332     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5333     const VarDecl *RHSVar,
5334     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5335                                   const Expr *, const Expr *)> &RedOpGen,
5336     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5337     const Expr *UpExpr = nullptr) {
5338   // Perform element-by-element initialization.
5339   QualType ElementTy;
5340   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5341   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5342 
5343   // Drill down to the base element type on both arrays.
5344   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5345   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5346 
5347   llvm::Value *RHSBegin = RHSAddr.getPointer();
5348   llvm::Value *LHSBegin = LHSAddr.getPointer();
5349   // Cast from pointer to array type to pointer to single element.
5350   llvm::Value *LHSEnd =
5351       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5352   // The basic structure here is a while-do loop.
5353   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5354   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5355   llvm::Value *IsEmpty =
5356       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5357   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5358 
5359   // Enter the loop body, making that address the current address.
5360   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5361   CGF.EmitBlock(BodyBB);
5362 
5363   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5364 
5365   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5366       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5367   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5368   Address RHSElementCurrent =
5369       Address(RHSElementPHI,
5370               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5371 
5372   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5373       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5374   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5375   Address LHSElementCurrent =
5376       Address(LHSElementPHI,
5377               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5378 
5379   // Emit copy.
5380   CodeGenFunction::OMPPrivateScope Scope(CGF);
5381   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5382   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5383   Scope.Privatize();
5384   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5385   Scope.ForceCleanup();
5386 
5387   // Shift the address forward by one element.
5388   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5389       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5390       "omp.arraycpy.dest.element");
5391   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5392       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5393       "omp.arraycpy.src.element");
5394   // Check whether we've reached the end.
5395   llvm::Value *Done =
5396       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5397   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5398   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5399   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5400 
5401   // Done.
5402   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5403 }
5404 
5405 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5406 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5407 /// UDR combiner function.
5408 static void emitReductionCombiner(CodeGenFunction &CGF,
5409                                   const Expr *ReductionOp) {
5410   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5411     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5412       if (const auto *DRE =
5413               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5414         if (const auto *DRD =
5415                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5416           std::pair<llvm::Function *, llvm::Function *> Reduction =
5417               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5418           RValue Func = RValue::get(Reduction.first);
5419           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5420           CGF.EmitIgnoredExpr(ReductionOp);
5421           return;
5422         }
5423   CGF.EmitIgnoredExpr(ReductionOp);
5424 }
5425 
5426 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5427     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5428     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5429     ArrayRef<const Expr *> ReductionOps) {
5430   ASTContext &C = CGM.getContext();
5431 
5432   // void reduction_func(void *LHSArg, void *RHSArg);
5433   FunctionArgList Args;
5434   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5435                            ImplicitParamDecl::Other);
5436   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5437                            ImplicitParamDecl::Other);
5438   Args.push_back(&LHSArg);
5439   Args.push_back(&RHSArg);
5440   const auto &CGFI =
5441       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5442   std::string Name = getName({"omp", "reduction", "reduction_func"});
5443   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5444                                     llvm::GlobalValue::InternalLinkage, Name,
5445                                     &CGM.getModule());
5446   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5447   Fn->setDoesNotRecurse();
5448   CodeGenFunction CGF(CGM);
5449   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5450 
5451   // Dst = (void*[n])(LHSArg);
5452   // Src = (void*[n])(RHSArg);
5453   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5454       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5455       ArgsType), CGF.getPointerAlign());
5456   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5457       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5458       ArgsType), CGF.getPointerAlign());
5459 
5460   //  ...
5461   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5462   //  ...
5463   CodeGenFunction::OMPPrivateScope Scope(CGF);
5464   auto IPriv = Privates.begin();
5465   unsigned Idx = 0;
5466   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5467     const auto *RHSVar =
5468         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5469     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5470       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5471     });
5472     const auto *LHSVar =
5473         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5474     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5475       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5476     });
5477     QualType PrivTy = (*IPriv)->getType();
5478     if (PrivTy->isVariablyModifiedType()) {
5479       // Get array size and emit VLA type.
5480       ++Idx;
5481       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5482       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5483       const VariableArrayType *VLA =
5484           CGF.getContext().getAsVariableArrayType(PrivTy);
5485       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5486       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5487           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5488       CGF.EmitVariablyModifiedType(PrivTy);
5489     }
5490   }
5491   Scope.Privatize();
5492   IPriv = Privates.begin();
5493   auto ILHS = LHSExprs.begin();
5494   auto IRHS = RHSExprs.begin();
5495   for (const Expr *E : ReductionOps) {
5496     if ((*IPriv)->getType()->isArrayType()) {
5497       // Emit reduction for array section.
5498       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5499       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5500       EmitOMPAggregateReduction(
5501           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5502           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5503             emitReductionCombiner(CGF, E);
5504           });
5505     } else {
5506       // Emit reduction for array subscript or single variable.
5507       emitReductionCombiner(CGF, E);
5508     }
5509     ++IPriv;
5510     ++ILHS;
5511     ++IRHS;
5512   }
5513   Scope.ForceCleanup();
5514   CGF.FinishFunction();
5515   return Fn;
5516 }
5517 
5518 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5519                                                   const Expr *ReductionOp,
5520                                                   const Expr *PrivateRef,
5521                                                   const DeclRefExpr *LHS,
5522                                                   const DeclRefExpr *RHS) {
5523   if (PrivateRef->getType()->isArrayType()) {
5524     // Emit reduction for array section.
5525     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5526     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5527     EmitOMPAggregateReduction(
5528         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5529         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5530           emitReductionCombiner(CGF, ReductionOp);
5531         });
5532   } else {
5533     // Emit reduction for array subscript or single variable.
5534     emitReductionCombiner(CGF, ReductionOp);
5535   }
5536 }
5537 
5538 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5539                                     ArrayRef<const Expr *> Privates,
5540                                     ArrayRef<const Expr *> LHSExprs,
5541                                     ArrayRef<const Expr *> RHSExprs,
5542                                     ArrayRef<const Expr *> ReductionOps,
5543                                     ReductionOptionsTy Options) {
5544   if (!CGF.HaveInsertPoint())
5545     return;
5546 
5547   bool WithNowait = Options.WithNowait;
5548   bool SimpleReduction = Options.SimpleReduction;
5549 
5550   // Next code should be emitted for reduction:
5551   //
5552   // static kmp_critical_name lock = { 0 };
5553   //
5554   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5555   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5556   //  ...
5557   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5558   //  *(Type<n>-1*)rhs[<n>-1]);
5559   // }
5560   //
5561   // ...
5562   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5563   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5564   // RedList, reduce_func, &<lock>)) {
5565   // case 1:
5566   //  ...
5567   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5568   //  ...
5569   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5570   // break;
5571   // case 2:
5572   //  ...
5573   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5574   //  ...
5575   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5576   // break;
5577   // default:;
5578   // }
5579   //
5580   // if SimpleReduction is true, only the next code is generated:
5581   //  ...
5582   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5583   //  ...
5584 
5585   ASTContext &C = CGM.getContext();
5586 
5587   if (SimpleReduction) {
5588     CodeGenFunction::RunCleanupsScope Scope(CGF);
5589     auto IPriv = Privates.begin();
5590     auto ILHS = LHSExprs.begin();
5591     auto IRHS = RHSExprs.begin();
5592     for (const Expr *E : ReductionOps) {
5593       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5594                                   cast<DeclRefExpr>(*IRHS));
5595       ++IPriv;
5596       ++ILHS;
5597       ++IRHS;
5598     }
5599     return;
5600   }
5601 
5602   // 1. Build a list of reduction variables.
5603   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5604   auto Size = RHSExprs.size();
5605   for (const Expr *E : Privates) {
5606     if (E->getType()->isVariablyModifiedType())
5607       // Reserve place for array size.
5608       ++Size;
5609   }
5610   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5611   QualType ReductionArrayTy =
5612       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5613                              /*IndexTypeQuals=*/0);
5614   Address ReductionList =
5615       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5616   auto IPriv = Privates.begin();
5617   unsigned Idx = 0;
5618   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5619     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5620     CGF.Builder.CreateStore(
5621         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5622             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5623         Elem);
5624     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5625       // Store array size.
5626       ++Idx;
5627       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5628       llvm::Value *Size = CGF.Builder.CreateIntCast(
5629           CGF.getVLASize(
5630                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5631               .NumElts,
5632           CGF.SizeTy, /*isSigned=*/false);
5633       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5634                               Elem);
5635     }
5636   }
5637 
5638   // 2. Emit reduce_func().
5639   llvm::Function *ReductionFn = emitReductionFunction(
5640       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5641       LHSExprs, RHSExprs, ReductionOps);
5642 
5643   // 3. Create static kmp_critical_name lock = { 0 };
5644   std::string Name = getName({"reduction"});
5645   llvm::Value *Lock = getCriticalRegionLock(Name);
5646 
5647   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5648   // RedList, reduce_func, &<lock>);
5649   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5650   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5651   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5652   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5653       ReductionList.getPointer(), CGF.VoidPtrTy);
5654   llvm::Value *Args[] = {
5655       IdentTLoc,                             // ident_t *<loc>
5656       ThreadId,                              // i32 <gtid>
5657       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5658       ReductionArrayTySize,                  // size_type sizeof(RedList)
5659       RL,                                    // void *RedList
5660       ReductionFn, // void (*) (void *, void *) <reduce_func>
5661       Lock         // kmp_critical_name *&<lock>
5662   };
5663   llvm::Value *Res = CGF.EmitRuntimeCall(
5664       OMPBuilder.getOrCreateRuntimeFunction(
5665           CGM.getModule(),
5666           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5667       Args);
5668 
5669   // 5. Build switch(res)
5670   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5671   llvm::SwitchInst *SwInst =
5672       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5673 
5674   // 6. Build case 1:
5675   //  ...
5676   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5677   //  ...
5678   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5679   // break;
5680   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5681   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5682   CGF.EmitBlock(Case1BB);
5683 
5684   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685   llvm::Value *EndArgs[] = {
5686       IdentTLoc, // ident_t *<loc>
5687       ThreadId,  // i32 <gtid>
5688       Lock       // kmp_critical_name *&<lock>
5689   };
5690   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5691                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5692     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5693     auto IPriv = Privates.begin();
5694     auto ILHS = LHSExprs.begin();
5695     auto IRHS = RHSExprs.begin();
5696     for (const Expr *E : ReductionOps) {
5697       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5698                                      cast<DeclRefExpr>(*IRHS));
5699       ++IPriv;
5700       ++ILHS;
5701       ++IRHS;
5702     }
5703   };
5704   RegionCodeGenTy RCG(CodeGen);
5705   CommonActionTy Action(
5706       nullptr, llvm::None,
5707       OMPBuilder.getOrCreateRuntimeFunction(
5708           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5709                                       : OMPRTL___kmpc_end_reduce),
5710       EndArgs);
5711   RCG.setAction(Action);
5712   RCG(CGF);
5713 
5714   CGF.EmitBranch(DefaultBB);
5715 
5716   // 7. Build case 2:
5717   //  ...
5718   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5719   //  ...
5720   // break;
5721   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5722   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5723   CGF.EmitBlock(Case2BB);
5724 
5725   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5726                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5727     auto ILHS = LHSExprs.begin();
5728     auto IRHS = RHSExprs.begin();
5729     auto IPriv = Privates.begin();
5730     for (const Expr *E : ReductionOps) {
5731       const Expr *XExpr = nullptr;
5732       const Expr *EExpr = nullptr;
5733       const Expr *UpExpr = nullptr;
5734       BinaryOperatorKind BO = BO_Comma;
5735       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5736         if (BO->getOpcode() == BO_Assign) {
5737           XExpr = BO->getLHS();
5738           UpExpr = BO->getRHS();
5739         }
5740       }
5741       // Try to emit update expression as a simple atomic.
5742       const Expr *RHSExpr = UpExpr;
5743       if (RHSExpr) {
5744         // Analyze RHS part of the whole expression.
5745         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5746                 RHSExpr->IgnoreParenImpCasts())) {
5747           // If this is a conditional operator, analyze its condition for
5748           // min/max reduction operator.
5749           RHSExpr = ACO->getCond();
5750         }
5751         if (const auto *BORHS =
5752                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5753           EExpr = BORHS->getRHS();
5754           BO = BORHS->getOpcode();
5755         }
5756       }
5757       if (XExpr) {
5758         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5759         auto &&AtomicRedGen = [BO, VD,
5760                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5761                                     const Expr *EExpr, const Expr *UpExpr) {
5762           LValue X = CGF.EmitLValue(XExpr);
5763           RValue E;
5764           if (EExpr)
5765             E = CGF.EmitAnyExpr(EExpr);
5766           CGF.EmitOMPAtomicSimpleUpdateExpr(
5767               X, E, BO, /*IsXLHSInRHSPart=*/true,
5768               llvm::AtomicOrdering::Monotonic, Loc,
5769               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5770                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5771                 PrivateScope.addPrivate(
5772                     VD, [&CGF, VD, XRValue, Loc]() {
5773                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5774                       CGF.emitOMPSimpleStore(
5775                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5776                           VD->getType().getNonReferenceType(), Loc);
5777                       return LHSTemp;
5778                     });
5779                 (void)PrivateScope.Privatize();
5780                 return CGF.EmitAnyExpr(UpExpr);
5781               });
5782         };
5783         if ((*IPriv)->getType()->isArrayType()) {
5784           // Emit atomic reduction for array section.
5785           const auto *RHSVar =
5786               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5787           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5788                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5789         } else {
5790           // Emit atomic reduction for array subscript or single variable.
5791           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5792         }
5793       } else {
5794         // Emit as a critical region.
5795         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5796                                            const Expr *, const Expr *) {
5797           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5798           std::string Name = RT.getName({"atomic_reduction"});
5799           RT.emitCriticalRegion(
5800               CGF, Name,
5801               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5802                 Action.Enter(CGF);
5803                 emitReductionCombiner(CGF, E);
5804               },
5805               Loc);
5806         };
5807         if ((*IPriv)->getType()->isArrayType()) {
5808           const auto *LHSVar =
5809               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5810           const auto *RHSVar =
5811               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5812           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5813                                     CritRedGen);
5814         } else {
5815           CritRedGen(CGF, nullptr, nullptr, nullptr);
5816         }
5817       }
5818       ++ILHS;
5819       ++IRHS;
5820       ++IPriv;
5821     }
5822   };
5823   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5824   if (!WithNowait) {
5825     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5826     llvm::Value *EndArgs[] = {
5827         IdentTLoc, // ident_t *<loc>
5828         ThreadId,  // i32 <gtid>
5829         Lock       // kmp_critical_name *&<lock>
5830     };
5831     CommonActionTy Action(nullptr, llvm::None,
5832                           OMPBuilder.getOrCreateRuntimeFunction(
5833                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5834                           EndArgs);
5835     AtomicRCG.setAction(Action);
5836     AtomicRCG(CGF);
5837   } else {
5838     AtomicRCG(CGF);
5839   }
5840 
5841   CGF.EmitBranch(DefaultBB);
5842   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5843 }
5844 
5845 /// Generates unique name for artificial threadprivate variables.
5846 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5847 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5848                                       const Expr *Ref) {
5849   SmallString<256> Buffer;
5850   llvm::raw_svector_ostream Out(Buffer);
5851   const clang::DeclRefExpr *DE;
5852   const VarDecl *D = ::getBaseDecl(Ref, DE);
5853   if (!D)
5854     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5855   D = D->getCanonicalDecl();
5856   std::string Name = CGM.getOpenMPRuntime().getName(
5857       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5858   Out << Prefix << Name << "_"
5859       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5860   return std::string(Out.str());
5861 }
5862 
5863 /// Emits reduction initializer function:
5864 /// \code
5865 /// void @.red_init(void* %arg, void* %orig) {
5866 /// %0 = bitcast void* %arg to <type>*
5867 /// store <type> <init>, <type>* %0
5868 /// ret void
5869 /// }
5870 /// \endcode
5871 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5872                                            SourceLocation Loc,
5873                                            ReductionCodeGen &RCG, unsigned N) {
5874   ASTContext &C = CGM.getContext();
5875   QualType VoidPtrTy = C.VoidPtrTy;
5876   VoidPtrTy.addRestrict();
5877   FunctionArgList Args;
5878   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5879                           ImplicitParamDecl::Other);
5880   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5881                               ImplicitParamDecl::Other);
5882   Args.emplace_back(&Param);
5883   Args.emplace_back(&ParamOrig);
5884   const auto &FnInfo =
5885       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5886   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5887   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5888   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5889                                     Name, &CGM.getModule());
5890   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5891   Fn->setDoesNotRecurse();
5892   CodeGenFunction CGF(CGM);
5893   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5894   Address PrivateAddr = CGF.EmitLoadOfPointer(
5895       CGF.GetAddrOfLocalVar(&Param),
5896       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5897   llvm::Value *Size = nullptr;
5898   // If the size of the reduction item is non-constant, load it from global
5899   // threadprivate variable.
5900   if (RCG.getSizes(N).second) {
5901     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5902         CGF, CGM.getContext().getSizeType(),
5903         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5904     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5905                                 CGM.getContext().getSizeType(), Loc);
5906   }
5907   RCG.emitAggregateType(CGF, N, Size);
5908   LValue OrigLVal;
5909   // If initializer uses initializer from declare reduction construct, emit a
5910   // pointer to the address of the original reduction item (reuired by reduction
5911   // initializer)
5912   if (RCG.usesReductionInitializer(N)) {
5913     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5914     SharedAddr = CGF.EmitLoadOfPointer(
5915         SharedAddr,
5916         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5917     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5918   } else {
5919     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5920         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5921         CGM.getContext().VoidPtrTy);
5922   }
5923   // Emit the initializer:
5924   // %0 = bitcast void* %arg to <type>*
5925   // store <type> <init>, <type>* %0
5926   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5927                          [](CodeGenFunction &) { return false; });
5928   CGF.FinishFunction();
5929   return Fn;
5930 }
5931 
5932 /// Emits reduction combiner function:
5933 /// \code
5934 /// void @.red_comb(void* %arg0, void* %arg1) {
5935 /// %lhs = bitcast void* %arg0 to <type>*
5936 /// %rhs = bitcast void* %arg1 to <type>*
5937 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5938 /// store <type> %2, <type>* %lhs
5939 /// ret void
5940 /// }
5941 /// \endcode
5942 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5943                                            SourceLocation Loc,
5944                                            ReductionCodeGen &RCG, unsigned N,
5945                                            const Expr *ReductionOp,
5946                                            const Expr *LHS, const Expr *RHS,
5947                                            const Expr *PrivateRef) {
5948   ASTContext &C = CGM.getContext();
5949   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5950   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5951   FunctionArgList Args;
5952   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5953                                C.VoidPtrTy, ImplicitParamDecl::Other);
5954   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5955                             ImplicitParamDecl::Other);
5956   Args.emplace_back(&ParamInOut);
5957   Args.emplace_back(&ParamIn);
5958   const auto &FnInfo =
5959       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5960   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5961   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5962   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5963                                     Name, &CGM.getModule());
5964   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5965   Fn->setDoesNotRecurse();
5966   CodeGenFunction CGF(CGM);
5967   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5968   llvm::Value *Size = nullptr;
5969   // If the size of the reduction item is non-constant, load it from global
5970   // threadprivate variable.
5971   if (RCG.getSizes(N).second) {
5972     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5973         CGF, CGM.getContext().getSizeType(),
5974         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5975     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5976                                 CGM.getContext().getSizeType(), Loc);
5977   }
5978   RCG.emitAggregateType(CGF, N, Size);
5979   // Remap lhs and rhs variables to the addresses of the function arguments.
5980   // %lhs = bitcast void* %arg0 to <type>*
5981   // %rhs = bitcast void* %arg1 to <type>*
5982   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5983   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5984     // Pull out the pointer to the variable.
5985     Address PtrAddr = CGF.EmitLoadOfPointer(
5986         CGF.GetAddrOfLocalVar(&ParamInOut),
5987         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5988     return CGF.Builder.CreateElementBitCast(
5989         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5990   });
5991   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5992     // Pull out the pointer to the variable.
5993     Address PtrAddr = CGF.EmitLoadOfPointer(
5994         CGF.GetAddrOfLocalVar(&ParamIn),
5995         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5996     return CGF.Builder.CreateElementBitCast(
5997         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5998   });
5999   PrivateScope.Privatize();
6000   // Emit the combiner body:
6001   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6002   // store <type> %2, <type>* %lhs
6003   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6004       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6005       cast<DeclRefExpr>(RHS));
6006   CGF.FinishFunction();
6007   return Fn;
6008 }
6009 
6010 /// Emits reduction finalizer function:
6011 /// \code
6012 /// void @.red_fini(void* %arg) {
6013 /// %0 = bitcast void* %arg to <type>*
6014 /// <destroy>(<type>* %0)
6015 /// ret void
6016 /// }
6017 /// \endcode
6018 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6019                                            SourceLocation Loc,
6020                                            ReductionCodeGen &RCG, unsigned N) {
6021   if (!RCG.needCleanups(N))
6022     return nullptr;
6023   ASTContext &C = CGM.getContext();
6024   FunctionArgList Args;
6025   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6026                           ImplicitParamDecl::Other);
6027   Args.emplace_back(&Param);
6028   const auto &FnInfo =
6029       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6030   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6031   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6032   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6033                                     Name, &CGM.getModule());
6034   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6035   Fn->setDoesNotRecurse();
6036   CodeGenFunction CGF(CGM);
6037   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6038   Address PrivateAddr = CGF.EmitLoadOfPointer(
6039       CGF.GetAddrOfLocalVar(&Param),
6040       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6041   llvm::Value *Size = nullptr;
6042   // If the size of the reduction item is non-constant, load it from global
6043   // threadprivate variable.
6044   if (RCG.getSizes(N).second) {
6045     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6046         CGF, CGM.getContext().getSizeType(),
6047         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6048     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6049                                 CGM.getContext().getSizeType(), Loc);
6050   }
6051   RCG.emitAggregateType(CGF, N, Size);
6052   // Emit the finalizer body:
6053   // <destroy>(<type>* %0)
6054   RCG.emitCleanups(CGF, N, PrivateAddr);
6055   CGF.FinishFunction(Loc);
6056   return Fn;
6057 }
6058 
6059 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6060     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6061     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6062   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6063     return nullptr;
6064 
6065   // Build typedef struct:
6066   // kmp_taskred_input {
6067   //   void *reduce_shar; // shared reduction item
6068   //   void *reduce_orig; // original reduction item used for initialization
6069   //   size_t reduce_size; // size of data item
6070   //   void *reduce_init; // data initialization routine
6071   //   void *reduce_fini; // data finalization routine
6072   //   void *reduce_comb; // data combiner routine
6073   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6074   // } kmp_taskred_input_t;
6075   ASTContext &C = CGM.getContext();
6076   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6077   RD->startDefinition();
6078   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6079   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6080   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6081   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6085       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6086   RD->completeDefinition();
6087   QualType RDType = C.getRecordType(RD);
6088   unsigned Size = Data.ReductionVars.size();
6089   llvm::APInt ArraySize(/*numBits=*/64, Size);
6090   QualType ArrayRDType = C.getConstantArrayType(
6091       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6092   // kmp_task_red_input_t .rd_input.[Size];
6093   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6094   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6095                        Data.ReductionCopies, Data.ReductionOps);
6096   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6097     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6098     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6099                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6100     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6101         TaskRedInput.getPointer(), Idxs,
6102         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6103         ".rd_input.gep.");
6104     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6105     // ElemLVal.reduce_shar = &Shareds[Cnt];
6106     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6107     RCG.emitSharedOrigLValue(CGF, Cnt);
6108     llvm::Value *CastedShared =
6109         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6110     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6111     // ElemLVal.reduce_orig = &Origs[Cnt];
6112     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6113     llvm::Value *CastedOrig =
6114         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6115     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6116     RCG.emitAggregateType(CGF, Cnt);
6117     llvm::Value *SizeValInChars;
6118     llvm::Value *SizeVal;
6119     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6120     // We use delayed creation/initialization for VLAs and array sections. It is
6121     // required because runtime does not provide the way to pass the sizes of
6122     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6123     // threadprivate global variables are used to store these values and use
6124     // them in the functions.
6125     bool DelayedCreation = !!SizeVal;
6126     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6127                                                /*isSigned=*/false);
6128     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6129     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6130     // ElemLVal.reduce_init = init;
6131     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6132     llvm::Value *InitAddr =
6133         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6134     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6135     // ElemLVal.reduce_fini = fini;
6136     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6137     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6138     llvm::Value *FiniAddr = Fini
6139                                 ? CGF.EmitCastToVoidPtr(Fini)
6140                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6141     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6142     // ElemLVal.reduce_comb = comb;
6143     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6144     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6145         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6146         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6147     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6148     // ElemLVal.flags = 0;
6149     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6150     if (DelayedCreation) {
6151       CGF.EmitStoreOfScalar(
6152           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6153           FlagsLVal);
6154     } else
6155       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6156                                  FlagsLVal.getType());
6157   }
6158   if (Data.IsReductionWithTaskMod) {
6159     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6160     // is_ws, int num, void *data);
6161     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6162     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6163                                                   CGM.IntTy, /*isSigned=*/true);
6164     llvm::Value *Args[] = {
6165         IdentTLoc, GTid,
6166         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6167                                /*isSigned=*/true),
6168         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6169         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6170             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6171     return CGF.EmitRuntimeCall(
6172         OMPBuilder.getOrCreateRuntimeFunction(
6173             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6174         Args);
6175   }
6176   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6177   llvm::Value *Args[] = {
6178       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6179                                 /*isSigned=*/true),
6180       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6181       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6182                                                       CGM.VoidPtrTy)};
6183   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6184                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6185                              Args);
6186 }
6187 
6188 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6189                                             SourceLocation Loc,
6190                                             bool IsWorksharingReduction) {
6191   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6192   // is_ws, int num, void *data);
6193   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6194   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6195                                                 CGM.IntTy, /*isSigned=*/true);
6196   llvm::Value *Args[] = {IdentTLoc, GTid,
6197                          llvm::ConstantInt::get(CGM.IntTy,
6198                                                 IsWorksharingReduction ? 1 : 0,
6199                                                 /*isSigned=*/true)};
6200   (void)CGF.EmitRuntimeCall(
6201       OMPBuilder.getOrCreateRuntimeFunction(
6202           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6203       Args);
6204 }
6205 
6206 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6207                                               SourceLocation Loc,
6208                                               ReductionCodeGen &RCG,
6209                                               unsigned N) {
6210   auto Sizes = RCG.getSizes(N);
6211   // Emit threadprivate global variable if the type is non-constant
6212   // (Sizes.second = nullptr).
6213   if (Sizes.second) {
6214     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6215                                                      /*isSigned=*/false);
6216     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6217         CGF, CGM.getContext().getSizeType(),
6218         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6219     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6220   }
6221 }
6222 
6223 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6224                                               SourceLocation Loc,
6225                                               llvm::Value *ReductionsPtr,
6226                                               LValue SharedLVal) {
6227   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6228   // *d);
6229   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6230                                                    CGM.IntTy,
6231                                                    /*isSigned=*/true),
6232                          ReductionsPtr,
6233                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6234                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6235   return Address(
6236       CGF.EmitRuntimeCall(
6237           OMPBuilder.getOrCreateRuntimeFunction(
6238               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6239           Args),
6240       SharedLVal.getAlignment());
6241 }
6242 
6243 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6244                                        SourceLocation Loc) {
6245   if (!CGF.HaveInsertPoint())
6246     return;
6247 
6248   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6249     OMPBuilder.createTaskwait(CGF.Builder);
6250   } else {
6251     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6252     // global_tid);
6253     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6254     // Ignore return result until untied tasks are supported.
6255     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6256                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6257                         Args);
6258   }
6259 
6260   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6261     Region->emitUntiedSwitch(CGF);
6262 }
6263 
6264 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6265                                            OpenMPDirectiveKind InnerKind,
6266                                            const RegionCodeGenTy &CodeGen,
6267                                            bool HasCancel) {
6268   if (!CGF.HaveInsertPoint())
6269     return;
6270   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6271                                  InnerKind != OMPD_critical &&
6272                                      InnerKind != OMPD_master &&
6273                                      InnerKind != OMPD_masked);
6274   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6275 }
6276 
6277 namespace {
6278 enum RTCancelKind {
6279   CancelNoreq = 0,
6280   CancelParallel = 1,
6281   CancelLoop = 2,
6282   CancelSections = 3,
6283   CancelTaskgroup = 4
6284 };
6285 } // anonymous namespace
6286 
6287 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6288   RTCancelKind CancelKind = CancelNoreq;
6289   if (CancelRegion == OMPD_parallel)
6290     CancelKind = CancelParallel;
6291   else if (CancelRegion == OMPD_for)
6292     CancelKind = CancelLoop;
6293   else if (CancelRegion == OMPD_sections)
6294     CancelKind = CancelSections;
6295   else {
6296     assert(CancelRegion == OMPD_taskgroup);
6297     CancelKind = CancelTaskgroup;
6298   }
6299   return CancelKind;
6300 }
6301 
6302 void CGOpenMPRuntime::emitCancellationPointCall(
6303     CodeGenFunction &CGF, SourceLocation Loc,
6304     OpenMPDirectiveKind CancelRegion) {
6305   if (!CGF.HaveInsertPoint())
6306     return;
6307   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6308   // global_tid, kmp_int32 cncl_kind);
6309   if (auto *OMPRegionInfo =
6310           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6311     // For 'cancellation point taskgroup', the task region info may not have a
6312     // cancel. This may instead happen in another adjacent task.
6313     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6314       llvm::Value *Args[] = {
6315           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6316           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6317       // Ignore return result until untied tasks are supported.
6318       llvm::Value *Result = CGF.EmitRuntimeCall(
6319           OMPBuilder.getOrCreateRuntimeFunction(
6320               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6321           Args);
6322       // if (__kmpc_cancellationpoint()) {
6323       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6324       //   exit from construct;
6325       // }
6326       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6327       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6328       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6329       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6330       CGF.EmitBlock(ExitBB);
6331       if (CancelRegion == OMPD_parallel)
6332         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6333       // exit from construct;
6334       CodeGenFunction::JumpDest CancelDest =
6335           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6336       CGF.EmitBranchThroughCleanup(CancelDest);
6337       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6338     }
6339   }
6340 }
6341 
6342 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6343                                      const Expr *IfCond,
6344                                      OpenMPDirectiveKind CancelRegion) {
6345   if (!CGF.HaveInsertPoint())
6346     return;
6347   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6348   // kmp_int32 cncl_kind);
6349   auto &M = CGM.getModule();
6350   if (auto *OMPRegionInfo =
6351           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6352     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6353                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6354       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6355       llvm::Value *Args[] = {
6356           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6357           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6358       // Ignore return result until untied tasks are supported.
6359       llvm::Value *Result = CGF.EmitRuntimeCall(
6360           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6361       // if (__kmpc_cancel()) {
6362       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6363       //   exit from construct;
6364       // }
6365       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6366       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6367       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6368       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6369       CGF.EmitBlock(ExitBB);
6370       if (CancelRegion == OMPD_parallel)
6371         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6372       // exit from construct;
6373       CodeGenFunction::JumpDest CancelDest =
6374           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6375       CGF.EmitBranchThroughCleanup(CancelDest);
6376       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6377     };
6378     if (IfCond) {
6379       emitIfClause(CGF, IfCond, ThenGen,
6380                    [](CodeGenFunction &, PrePostActionTy &) {});
6381     } else {
6382       RegionCodeGenTy ThenRCG(ThenGen);
6383       ThenRCG(CGF);
6384     }
6385   }
6386 }
6387 
6388 namespace {
6389 /// Cleanup action for uses_allocators support.
6390 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6391   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6392 
6393 public:
6394   OMPUsesAllocatorsActionTy(
6395       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6396       : Allocators(Allocators) {}
6397   void Enter(CodeGenFunction &CGF) override {
6398     if (!CGF.HaveInsertPoint())
6399       return;
6400     for (const auto &AllocatorData : Allocators) {
6401       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6402           CGF, AllocatorData.first, AllocatorData.second);
6403     }
6404   }
6405   void Exit(CodeGenFunction &CGF) override {
6406     if (!CGF.HaveInsertPoint())
6407       return;
6408     for (const auto &AllocatorData : Allocators) {
6409       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6410                                                         AllocatorData.first);
6411     }
6412   }
6413 };
6414 } // namespace
6415 
6416 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6417     const OMPExecutableDirective &D, StringRef ParentName,
6418     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6419     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6420   assert(!ParentName.empty() && "Invalid target region parent name!");
6421   HasEmittedTargetRegion = true;
6422   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6423   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6424     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6425       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6426       if (!D.AllocatorTraits)
6427         continue;
6428       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6429     }
6430   }
6431   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6432   CodeGen.setAction(UsesAllocatorAction);
6433   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6434                                    IsOffloadEntry, CodeGen);
6435 }
6436 
6437 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6438                                              const Expr *Allocator,
6439                                              const Expr *AllocatorTraits) {
6440   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6441   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6442   // Use default memspace handle.
6443   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6444   llvm::Value *NumTraits = llvm::ConstantInt::get(
6445       CGF.IntTy, cast<ConstantArrayType>(
6446                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6447                      ->getSize()
6448                      .getLimitedValue());
6449   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6450   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6451       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6452   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6453                                            AllocatorTraitsLVal.getBaseInfo(),
6454                                            AllocatorTraitsLVal.getTBAAInfo());
6455   llvm::Value *Traits =
6456       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6457 
6458   llvm::Value *AllocatorVal =
6459       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6460                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6461                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6462   // Store to allocator.
6463   CGF.EmitVarDecl(*cast<VarDecl>(
6464       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6465   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6466   AllocatorVal =
6467       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6468                                Allocator->getType(), Allocator->getExprLoc());
6469   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6470 }
6471 
6472 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6473                                              const Expr *Allocator) {
6474   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6475   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6476   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6477   llvm::Value *AllocatorVal =
6478       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6479   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6480                                           CGF.getContext().VoidPtrTy,
6481                                           Allocator->getExprLoc());
6482   (void)CGF.EmitRuntimeCall(
6483       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6484                                             OMPRTL___kmpc_destroy_allocator),
6485       {ThreadId, AllocatorVal});
6486 }
6487 
6488 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6489     const OMPExecutableDirective &D, StringRef ParentName,
6490     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6491     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6492   // Create a unique name for the entry function using the source location
6493   // information of the current target region. The name will be something like:
6494   //
6495   // __omp_offloading_DD_FFFF_PP_lBB
6496   //
6497   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6498   // mangled name of the function that encloses the target region and BB is the
6499   // line number of the target region.
6500 
6501   unsigned DeviceID;
6502   unsigned FileID;
6503   unsigned Line;
6504   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6505                            Line);
6506   SmallString<64> EntryFnName;
6507   {
6508     llvm::raw_svector_ostream OS(EntryFnName);
6509     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6510        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6511   }
6512 
6513   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6514 
6515   CodeGenFunction CGF(CGM, true);
6516   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6517   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6518 
6519   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6520 
6521   // If this target outline function is not an offload entry, we don't need to
6522   // register it.
6523   if (!IsOffloadEntry)
6524     return;
6525 
6526   // The target region ID is used by the runtime library to identify the current
6527   // target region, so it only has to be unique and not necessarily point to
6528   // anything. It could be the pointer to the outlined function that implements
6529   // the target region, but we aren't using that so that the compiler doesn't
6530   // need to keep that, and could therefore inline the host function if proven
6531   // worthwhile during optimization. In the other hand, if emitting code for the
6532   // device, the ID has to be the function address so that it can retrieved from
6533   // the offloading entry and launched by the runtime library. We also mark the
6534   // outlined function to have external linkage in case we are emitting code for
6535   // the device, because these functions will be entry points to the device.
6536 
6537   if (CGM.getLangOpts().OpenMPIsDevice) {
6538     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6539     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6540     OutlinedFn->setDSOLocal(false);
6541     if (CGM.getTriple().isAMDGCN())
6542       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6543   } else {
6544     std::string Name = getName({EntryFnName, "region_id"});
6545     OutlinedFnID = new llvm::GlobalVariable(
6546         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6547         llvm::GlobalValue::WeakAnyLinkage,
6548         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6549   }
6550 
6551   // Register the information for the entry associated with this target region.
6552   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6553       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6554       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6555 
6556   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6557   int32_t DefaultValTeams = -1;
6558   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6559   if (DefaultValTeams > 0) {
6560     OutlinedFn->addFnAttr("omp_target_num_teams",
6561                           std::to_string(DefaultValTeams));
6562   }
6563   int32_t DefaultValThreads = -1;
6564   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6565   if (DefaultValThreads > 0) {
6566     OutlinedFn->addFnAttr("omp_target_thread_limit",
6567                           std::to_string(DefaultValThreads));
6568   }
6569 }
6570 
6571 /// Checks if the expression is constant or does not have non-trivial function
6572 /// calls.
6573 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6574   // We can skip constant expressions.
6575   // We can skip expressions with trivial calls or simple expressions.
6576   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6577           !E->hasNonTrivialCall(Ctx)) &&
6578          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6579 }
6580 
6581 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6582                                                     const Stmt *Body) {
6583   const Stmt *Child = Body->IgnoreContainers();
6584   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6585     Child = nullptr;
6586     for (const Stmt *S : C->body()) {
6587       if (const auto *E = dyn_cast<Expr>(S)) {
6588         if (isTrivial(Ctx, E))
6589           continue;
6590       }
6591       // Some of the statements can be ignored.
6592       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6593           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6594         continue;
6595       // Analyze declarations.
6596       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6597         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6598               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6599                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6600                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6601                   isa<UsingDirectiveDecl>(D) ||
6602                   isa<OMPDeclareReductionDecl>(D) ||
6603                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6604                 return true;
6605               const auto *VD = dyn_cast<VarDecl>(D);
6606               if (!VD)
6607                 return false;
6608               return VD->hasGlobalStorage() || !VD->isUsed();
6609             }))
6610           continue;
6611       }
6612       // Found multiple children - cannot get the one child only.
6613       if (Child)
6614         return nullptr;
6615       Child = S;
6616     }
6617     if (Child)
6618       Child = Child->IgnoreContainers();
6619   }
6620   return Child;
6621 }
6622 
6623 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6624     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6625     int32_t &DefaultVal) {
6626 
6627   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6628   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6629          "Expected target-based executable directive.");
6630   switch (DirectiveKind) {
6631   case OMPD_target: {
6632     const auto *CS = D.getInnermostCapturedStmt();
6633     const auto *Body =
6634         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6635     const Stmt *ChildStmt =
6636         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6637     if (const auto *NestedDir =
6638             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6639       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6640         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6641           const Expr *NumTeams =
6642               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6643           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6644             if (auto Constant =
6645                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6646               DefaultVal = Constant->getExtValue();
6647           return NumTeams;
6648         }
6649         DefaultVal = 0;
6650         return nullptr;
6651       }
6652       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6653           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6654         DefaultVal = 1;
6655         return nullptr;
6656       }
6657       DefaultVal = 1;
6658       return nullptr;
6659     }
6660     // A value of -1 is used to check if we need to emit no teams region
6661     DefaultVal = -1;
6662     return nullptr;
6663   }
6664   case OMPD_target_teams:
6665   case OMPD_target_teams_distribute:
6666   case OMPD_target_teams_distribute_simd:
6667   case OMPD_target_teams_distribute_parallel_for:
6668   case OMPD_target_teams_distribute_parallel_for_simd: {
6669     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6670       const Expr *NumTeams =
6671           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6673         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6674           DefaultVal = Constant->getExtValue();
6675       return NumTeams;
6676     }
6677     DefaultVal = 0;
6678     return nullptr;
6679   }
6680   case OMPD_target_parallel:
6681   case OMPD_target_parallel_for:
6682   case OMPD_target_parallel_for_simd:
6683   case OMPD_target_simd:
6684     DefaultVal = 1;
6685     return nullptr;
6686   case OMPD_parallel:
6687   case OMPD_for:
6688   case OMPD_parallel_for:
6689   case OMPD_parallel_master:
6690   case OMPD_parallel_sections:
6691   case OMPD_for_simd:
6692   case OMPD_parallel_for_simd:
6693   case OMPD_cancel:
6694   case OMPD_cancellation_point:
6695   case OMPD_ordered:
6696   case OMPD_threadprivate:
6697   case OMPD_allocate:
6698   case OMPD_task:
6699   case OMPD_simd:
6700   case OMPD_tile:
6701   case OMPD_unroll:
6702   case OMPD_sections:
6703   case OMPD_section:
6704   case OMPD_single:
6705   case OMPD_master:
6706   case OMPD_critical:
6707   case OMPD_taskyield:
6708   case OMPD_barrier:
6709   case OMPD_taskwait:
6710   case OMPD_taskgroup:
6711   case OMPD_atomic:
6712   case OMPD_flush:
6713   case OMPD_depobj:
6714   case OMPD_scan:
6715   case OMPD_teams:
6716   case OMPD_target_data:
6717   case OMPD_target_exit_data:
6718   case OMPD_target_enter_data:
6719   case OMPD_distribute:
6720   case OMPD_distribute_simd:
6721   case OMPD_distribute_parallel_for:
6722   case OMPD_distribute_parallel_for_simd:
6723   case OMPD_teams_distribute:
6724   case OMPD_teams_distribute_simd:
6725   case OMPD_teams_distribute_parallel_for:
6726   case OMPD_teams_distribute_parallel_for_simd:
6727   case OMPD_target_update:
6728   case OMPD_declare_simd:
6729   case OMPD_declare_variant:
6730   case OMPD_begin_declare_variant:
6731   case OMPD_end_declare_variant:
6732   case OMPD_declare_target:
6733   case OMPD_end_declare_target:
6734   case OMPD_declare_reduction:
6735   case OMPD_declare_mapper:
6736   case OMPD_taskloop:
6737   case OMPD_taskloop_simd:
6738   case OMPD_master_taskloop:
6739   case OMPD_master_taskloop_simd:
6740   case OMPD_parallel_master_taskloop:
6741   case OMPD_parallel_master_taskloop_simd:
6742   case OMPD_requires:
6743   case OMPD_unknown:
6744     break;
6745   default:
6746     break;
6747   }
6748   llvm_unreachable("Unexpected directive kind.");
6749 }
6750 
6751 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6752     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6753   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6754          "Clauses associated with the teams directive expected to be emitted "
6755          "only for the host!");
6756   CGBuilderTy &Bld = CGF.Builder;
6757   int32_t DefaultNT = -1;
6758   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6759   if (NumTeams != nullptr) {
6760     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6761 
6762     switch (DirectiveKind) {
6763     case OMPD_target: {
6764       const auto *CS = D.getInnermostCapturedStmt();
6765       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6766       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6768                                                   /*IgnoreResultAssign*/ true);
6769       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6770                              /*isSigned=*/true);
6771     }
6772     case OMPD_target_teams:
6773     case OMPD_target_teams_distribute:
6774     case OMPD_target_teams_distribute_simd:
6775     case OMPD_target_teams_distribute_parallel_for:
6776     case OMPD_target_teams_distribute_parallel_for_simd: {
6777       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6778       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6779                                                   /*IgnoreResultAssign*/ true);
6780       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6781                              /*isSigned=*/true);
6782     }
6783     default:
6784       break;
6785     }
6786   } else if (DefaultNT == -1) {
6787     return nullptr;
6788   }
6789 
6790   return Bld.getInt32(DefaultNT);
6791 }
6792 
6793 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6794                                   llvm::Value *DefaultThreadLimitVal) {
6795   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6796       CGF.getContext(), CS->getCapturedStmt());
6797   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6798     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6799       llvm::Value *NumThreads = nullptr;
6800       llvm::Value *CondVal = nullptr;
6801       // Handle if clause. If if clause present, the number of threads is
6802       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6803       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6804         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6805         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6806         const OMPIfClause *IfClause = nullptr;
6807         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6808           if (C->getNameModifier() == OMPD_unknown ||
6809               C->getNameModifier() == OMPD_parallel) {
6810             IfClause = C;
6811             break;
6812           }
6813         }
6814         if (IfClause) {
6815           const Expr *Cond = IfClause->getCondition();
6816           bool Result;
6817           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6818             if (!Result)
6819               return CGF.Builder.getInt32(1);
6820           } else {
6821             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6822             if (const auto *PreInit =
6823                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6824               for (const auto *I : PreInit->decls()) {
6825                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6826                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6827                 } else {
6828                   CodeGenFunction::AutoVarEmission Emission =
6829                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6830                   CGF.EmitAutoVarCleanups(Emission);
6831                 }
6832               }
6833             }
6834             CondVal = CGF.EvaluateExprAsBool(Cond);
6835           }
6836         }
6837       }
6838       // Check the value of num_threads clause iff if clause was not specified
6839       // or is not evaluated to false.
6840       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6841         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6842         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6843         const auto *NumThreadsClause =
6844             Dir->getSingleClause<OMPNumThreadsClause>();
6845         CodeGenFunction::LexicalScope Scope(
6846             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6847         if (const auto *PreInit =
6848                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6849           for (const auto *I : PreInit->decls()) {
6850             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6851               CGF.EmitVarDecl(cast<VarDecl>(*I));
6852             } else {
6853               CodeGenFunction::AutoVarEmission Emission =
6854                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6855               CGF.EmitAutoVarCleanups(Emission);
6856             }
6857           }
6858         }
6859         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6860         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6861                                                /*isSigned=*/false);
6862         if (DefaultThreadLimitVal)
6863           NumThreads = CGF.Builder.CreateSelect(
6864               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6865               DefaultThreadLimitVal, NumThreads);
6866       } else {
6867         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6868                                            : CGF.Builder.getInt32(0);
6869       }
6870       // Process condition of the if clause.
6871       if (CondVal) {
6872         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6873                                               CGF.Builder.getInt32(1));
6874       }
6875       return NumThreads;
6876     }
6877     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6878       return CGF.Builder.getInt32(1);
6879     return DefaultThreadLimitVal;
6880   }
6881   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6882                                : CGF.Builder.getInt32(0);
6883 }
6884 
6885 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6886     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6887     int32_t &DefaultVal) {
6888   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6889   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6890          "Expected target-based executable directive.");
6891 
6892   switch (DirectiveKind) {
6893   case OMPD_target:
6894     // Teams have no clause thread_limit
6895     return nullptr;
6896   case OMPD_target_teams:
6897   case OMPD_target_teams_distribute:
6898     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6899       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6900       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6901       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6902         if (auto Constant =
6903                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6904           DefaultVal = Constant->getExtValue();
6905       return ThreadLimit;
6906     }
6907     return nullptr;
6908   case OMPD_target_parallel:
6909   case OMPD_target_parallel_for:
6910   case OMPD_target_parallel_for_simd:
6911   case OMPD_target_teams_distribute_parallel_for:
6912   case OMPD_target_teams_distribute_parallel_for_simd: {
6913     Expr *ThreadLimit = nullptr;
6914     Expr *NumThreads = nullptr;
6915     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6916       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6917       ThreadLimit = ThreadLimitClause->getThreadLimit();
6918       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6919         if (auto Constant =
6920                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6921           DefaultVal = Constant->getExtValue();
6922     }
6923     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6924       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6925       NumThreads = NumThreadsClause->getNumThreads();
6926       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6927         if (auto Constant =
6928                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6929           if (Constant->getExtValue() < DefaultVal) {
6930             DefaultVal = Constant->getExtValue();
6931             ThreadLimit = NumThreads;
6932           }
6933         }
6934       }
6935     }
6936     return ThreadLimit;
6937   }
6938   case OMPD_target_teams_distribute_simd:
6939   case OMPD_target_simd:
6940     DefaultVal = 1;
6941     return nullptr;
6942   case OMPD_parallel:
6943   case OMPD_for:
6944   case OMPD_parallel_for:
6945   case OMPD_parallel_master:
6946   case OMPD_parallel_sections:
6947   case OMPD_for_simd:
6948   case OMPD_parallel_for_simd:
6949   case OMPD_cancel:
6950   case OMPD_cancellation_point:
6951   case OMPD_ordered:
6952   case OMPD_threadprivate:
6953   case OMPD_allocate:
6954   case OMPD_task:
6955   case OMPD_simd:
6956   case OMPD_tile:
6957   case OMPD_unroll:
6958   case OMPD_sections:
6959   case OMPD_section:
6960   case OMPD_single:
6961   case OMPD_master:
6962   case OMPD_critical:
6963   case OMPD_taskyield:
6964   case OMPD_barrier:
6965   case OMPD_taskwait:
6966   case OMPD_taskgroup:
6967   case OMPD_atomic:
6968   case OMPD_flush:
6969   case OMPD_depobj:
6970   case OMPD_scan:
6971   case OMPD_teams:
6972   case OMPD_target_data:
6973   case OMPD_target_exit_data:
6974   case OMPD_target_enter_data:
6975   case OMPD_distribute:
6976   case OMPD_distribute_simd:
6977   case OMPD_distribute_parallel_for:
6978   case OMPD_distribute_parallel_for_simd:
6979   case OMPD_teams_distribute:
6980   case OMPD_teams_distribute_simd:
6981   case OMPD_teams_distribute_parallel_for:
6982   case OMPD_teams_distribute_parallel_for_simd:
6983   case OMPD_target_update:
6984   case OMPD_declare_simd:
6985   case OMPD_declare_variant:
6986   case OMPD_begin_declare_variant:
6987   case OMPD_end_declare_variant:
6988   case OMPD_declare_target:
6989   case OMPD_end_declare_target:
6990   case OMPD_declare_reduction:
6991   case OMPD_declare_mapper:
6992   case OMPD_taskloop:
6993   case OMPD_taskloop_simd:
6994   case OMPD_master_taskloop:
6995   case OMPD_master_taskloop_simd:
6996   case OMPD_parallel_master_taskloop:
6997   case OMPD_parallel_master_taskloop_simd:
6998   case OMPD_requires:
6999   case OMPD_unknown:
7000     break;
7001   default:
7002     break;
7003   }
7004   llvm_unreachable("Unsupported directive kind.");
7005 }
7006 
7007 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7008     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7009   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7010          "Clauses associated with the teams directive expected to be emitted "
7011          "only for the host!");
7012   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7013   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7014          "Expected target-based executable directive.");
7015   CGBuilderTy &Bld = CGF.Builder;
7016   llvm::Value *ThreadLimitVal = nullptr;
7017   llvm::Value *NumThreadsVal = nullptr;
7018   switch (DirectiveKind) {
7019   case OMPD_target: {
7020     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7021     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7022       return NumThreads;
7023     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7024         CGF.getContext(), CS->getCapturedStmt());
7025     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7026       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7027         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7028         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7029         const auto *ThreadLimitClause =
7030             Dir->getSingleClause<OMPThreadLimitClause>();
7031         CodeGenFunction::LexicalScope Scope(
7032             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7033         if (const auto *PreInit =
7034                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7035           for (const auto *I : PreInit->decls()) {
7036             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7037               CGF.EmitVarDecl(cast<VarDecl>(*I));
7038             } else {
7039               CodeGenFunction::AutoVarEmission Emission =
7040                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7041               CGF.EmitAutoVarCleanups(Emission);
7042             }
7043           }
7044         }
7045         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7046             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7047         ThreadLimitVal =
7048             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7049       }
7050       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7051           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7052         CS = Dir->getInnermostCapturedStmt();
7053         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7054             CGF.getContext(), CS->getCapturedStmt());
7055         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7056       }
7057       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7058           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7059         CS = Dir->getInnermostCapturedStmt();
7060         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7061           return NumThreads;
7062       }
7063       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7064         return Bld.getInt32(1);
7065     }
7066     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7067   }
7068   case OMPD_target_teams: {
7069     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7070       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7071       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7072       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7073           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7074       ThreadLimitVal =
7075           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7076     }
7077     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7078     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7079       return NumThreads;
7080     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7081         CGF.getContext(), CS->getCapturedStmt());
7082     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7083       if (Dir->getDirectiveKind() == OMPD_distribute) {
7084         CS = Dir->getInnermostCapturedStmt();
7085         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7086           return NumThreads;
7087       }
7088     }
7089     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7090   }
7091   case OMPD_target_teams_distribute:
7092     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7093       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7094       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7095       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7096           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7097       ThreadLimitVal =
7098           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7099     }
7100     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7101   case OMPD_target_parallel:
7102   case OMPD_target_parallel_for:
7103   case OMPD_target_parallel_for_simd:
7104   case OMPD_target_teams_distribute_parallel_for:
7105   case OMPD_target_teams_distribute_parallel_for_simd: {
7106     llvm::Value *CondVal = nullptr;
7107     // Handle if clause. If if clause present, the number of threads is
7108     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7109     if (D.hasClausesOfKind<OMPIfClause>()) {
7110       const OMPIfClause *IfClause = nullptr;
7111       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7112         if (C->getNameModifier() == OMPD_unknown ||
7113             C->getNameModifier() == OMPD_parallel) {
7114           IfClause = C;
7115           break;
7116         }
7117       }
7118       if (IfClause) {
7119         const Expr *Cond = IfClause->getCondition();
7120         bool Result;
7121         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7122           if (!Result)
7123             return Bld.getInt32(1);
7124         } else {
7125           CodeGenFunction::RunCleanupsScope Scope(CGF);
7126           CondVal = CGF.EvaluateExprAsBool(Cond);
7127         }
7128       }
7129     }
7130     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7131       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7132       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7133       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7134           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7135       ThreadLimitVal =
7136           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7137     }
7138     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7139       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7140       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7141       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7142           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7143       NumThreadsVal =
7144           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7145       ThreadLimitVal = ThreadLimitVal
7146                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7147                                                                 ThreadLimitVal),
7148                                               NumThreadsVal, ThreadLimitVal)
7149                            : NumThreadsVal;
7150     }
7151     if (!ThreadLimitVal)
7152       ThreadLimitVal = Bld.getInt32(0);
7153     if (CondVal)
7154       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7155     return ThreadLimitVal;
7156   }
7157   case OMPD_target_teams_distribute_simd:
7158   case OMPD_target_simd:
7159     return Bld.getInt32(1);
7160   case OMPD_parallel:
7161   case OMPD_for:
7162   case OMPD_parallel_for:
7163   case OMPD_parallel_master:
7164   case OMPD_parallel_sections:
7165   case OMPD_for_simd:
7166   case OMPD_parallel_for_simd:
7167   case OMPD_cancel:
7168   case OMPD_cancellation_point:
7169   case OMPD_ordered:
7170   case OMPD_threadprivate:
7171   case OMPD_allocate:
7172   case OMPD_task:
7173   case OMPD_simd:
7174   case OMPD_tile:
7175   case OMPD_unroll:
7176   case OMPD_sections:
7177   case OMPD_section:
7178   case OMPD_single:
7179   case OMPD_master:
7180   case OMPD_critical:
7181   case OMPD_taskyield:
7182   case OMPD_barrier:
7183   case OMPD_taskwait:
7184   case OMPD_taskgroup:
7185   case OMPD_atomic:
7186   case OMPD_flush:
7187   case OMPD_depobj:
7188   case OMPD_scan:
7189   case OMPD_teams:
7190   case OMPD_target_data:
7191   case OMPD_target_exit_data:
7192   case OMPD_target_enter_data:
7193   case OMPD_distribute:
7194   case OMPD_distribute_simd:
7195   case OMPD_distribute_parallel_for:
7196   case OMPD_distribute_parallel_for_simd:
7197   case OMPD_teams_distribute:
7198   case OMPD_teams_distribute_simd:
7199   case OMPD_teams_distribute_parallel_for:
7200   case OMPD_teams_distribute_parallel_for_simd:
7201   case OMPD_target_update:
7202   case OMPD_declare_simd:
7203   case OMPD_declare_variant:
7204   case OMPD_begin_declare_variant:
7205   case OMPD_end_declare_variant:
7206   case OMPD_declare_target:
7207   case OMPD_end_declare_target:
7208   case OMPD_declare_reduction:
7209   case OMPD_declare_mapper:
7210   case OMPD_taskloop:
7211   case OMPD_taskloop_simd:
7212   case OMPD_master_taskloop:
7213   case OMPD_master_taskloop_simd:
7214   case OMPD_parallel_master_taskloop:
7215   case OMPD_parallel_master_taskloop_simd:
7216   case OMPD_requires:
7217   case OMPD_unknown:
7218     break;
7219   default:
7220     break;
7221   }
7222   llvm_unreachable("Unsupported directive kind.");
7223 }
7224 
7225 namespace {
7226 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7227 
7228 // Utility to handle information from clauses associated with a given
7229 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7230 // It provides a convenient interface to obtain the information and generate
7231 // code for that information.
7232 class MappableExprsHandler {
7233 public:
7234   /// Values for bit flags used to specify the mapping type for
7235   /// offloading.
7236   enum OpenMPOffloadMappingFlags : uint64_t {
7237     /// No flags
7238     OMP_MAP_NONE = 0x0,
7239     /// Allocate memory on the device and move data from host to device.
7240     OMP_MAP_TO = 0x01,
7241     /// Allocate memory on the device and move data from device to host.
7242     OMP_MAP_FROM = 0x02,
7243     /// Always perform the requested mapping action on the element, even
7244     /// if it was already mapped before.
7245     OMP_MAP_ALWAYS = 0x04,
7246     /// Delete the element from the device environment, ignoring the
7247     /// current reference count associated with the element.
7248     OMP_MAP_DELETE = 0x08,
7249     /// The element being mapped is a pointer-pointee pair; both the
7250     /// pointer and the pointee should be mapped.
7251     OMP_MAP_PTR_AND_OBJ = 0x10,
7252     /// This flags signals that the base address of an entry should be
7253     /// passed to the target kernel as an argument.
7254     OMP_MAP_TARGET_PARAM = 0x20,
7255     /// Signal that the runtime library has to return the device pointer
7256     /// in the current position for the data being mapped. Used when we have the
7257     /// use_device_ptr or use_device_addr clause.
7258     OMP_MAP_RETURN_PARAM = 0x40,
7259     /// This flag signals that the reference being passed is a pointer to
7260     /// private data.
7261     OMP_MAP_PRIVATE = 0x80,
7262     /// Pass the element to the device by value.
7263     OMP_MAP_LITERAL = 0x100,
7264     /// Implicit map
7265     OMP_MAP_IMPLICIT = 0x200,
7266     /// Close is a hint to the runtime to allocate memory close to
7267     /// the target device.
7268     OMP_MAP_CLOSE = 0x400,
7269     /// 0x800 is reserved for compatibility with XLC.
7270     /// Produce a runtime error if the data is not already allocated.
7271     OMP_MAP_PRESENT = 0x1000,
7272     /// Signal that the runtime library should use args as an array of
7273     /// descriptor_dim pointers and use args_size as dims. Used when we have
7274     /// non-contiguous list items in target update directive
7275     OMP_MAP_NON_CONTIG = 0x100000000000,
7276     /// The 16 MSBs of the flags indicate whether the entry is member of some
7277     /// struct/class.
7278     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7279     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7280   };
7281 
7282   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7283   static unsigned getFlagMemberOffset() {
7284     unsigned Offset = 0;
7285     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7286          Remain = Remain >> 1)
7287       Offset++;
7288     return Offset;
7289   }
7290 
7291   /// Class that holds debugging information for a data mapping to be passed to
7292   /// the runtime library.
7293   class MappingExprInfo {
7294     /// The variable declaration used for the data mapping.
7295     const ValueDecl *MapDecl = nullptr;
7296     /// The original expression used in the map clause, or null if there is
7297     /// none.
7298     const Expr *MapExpr = nullptr;
7299 
7300   public:
7301     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7302         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7303 
7304     const ValueDecl *getMapDecl() const { return MapDecl; }
7305     const Expr *getMapExpr() const { return MapExpr; }
7306   };
7307 
7308   /// Class that associates information with a base pointer to be passed to the
7309   /// runtime library.
7310   class BasePointerInfo {
7311     /// The base pointer.
7312     llvm::Value *Ptr = nullptr;
7313     /// The base declaration that refers to this device pointer, or null if
7314     /// there is none.
7315     const ValueDecl *DevPtrDecl = nullptr;
7316 
7317   public:
7318     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7319         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7320     llvm::Value *operator*() const { return Ptr; }
7321     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7322     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7323   };
7324 
7325   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7326   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7327   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7328   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7329   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7330   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7331   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7332 
7333   /// This structure contains combined information generated for mappable
7334   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7335   /// mappers, and non-contiguous information.
7336   struct MapCombinedInfoTy {
7337     struct StructNonContiguousInfo {
7338       bool IsNonContiguous = false;
7339       MapDimArrayTy Dims;
7340       MapNonContiguousArrayTy Offsets;
7341       MapNonContiguousArrayTy Counts;
7342       MapNonContiguousArrayTy Strides;
7343     };
7344     MapExprsArrayTy Exprs;
7345     MapBaseValuesArrayTy BasePointers;
7346     MapValuesArrayTy Pointers;
7347     MapValuesArrayTy Sizes;
7348     MapFlagsArrayTy Types;
7349     MapMappersArrayTy Mappers;
7350     StructNonContiguousInfo NonContigInfo;
7351 
7352     /// Append arrays in \a CurInfo.
7353     void append(MapCombinedInfoTy &CurInfo) {
7354       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7355       BasePointers.append(CurInfo.BasePointers.begin(),
7356                           CurInfo.BasePointers.end());
7357       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7358       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7359       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7360       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7361       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7362                                  CurInfo.NonContigInfo.Dims.end());
7363       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7364                                     CurInfo.NonContigInfo.Offsets.end());
7365       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7366                                    CurInfo.NonContigInfo.Counts.end());
7367       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7368                                     CurInfo.NonContigInfo.Strides.end());
7369     }
7370   };
7371 
7372   /// Map between a struct and the its lowest & highest elements which have been
7373   /// mapped.
7374   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7375   ///                    HE(FieldIndex, Pointer)}
7376   struct StructRangeInfoTy {
7377     MapCombinedInfoTy PreliminaryMapData;
7378     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7379         0, Address::invalid()};
7380     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7381         0, Address::invalid()};
7382     Address Base = Address::invalid();
7383     Address LB = Address::invalid();
7384     bool IsArraySection = false;
7385     bool HasCompleteRecord = false;
7386   };
7387 
7388 private:
7389   /// Kind that defines how a device pointer has to be returned.
7390   struct MapInfo {
7391     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7392     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7393     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7394     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7395     bool ReturnDevicePointer = false;
7396     bool IsImplicit = false;
7397     const ValueDecl *Mapper = nullptr;
7398     const Expr *VarRef = nullptr;
7399     bool ForDeviceAddr = false;
7400 
7401     MapInfo() = default;
7402     MapInfo(
7403         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7404         OpenMPMapClauseKind MapType,
7405         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7406         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7407         bool ReturnDevicePointer, bool IsImplicit,
7408         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7409         bool ForDeviceAddr = false)
7410         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7411           MotionModifiers(MotionModifiers),
7412           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7413           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7414   };
7415 
7416   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7417   /// member and there is no map information about it, then emission of that
7418   /// entry is deferred until the whole struct has been processed.
7419   struct DeferredDevicePtrEntryTy {
7420     const Expr *IE = nullptr;
7421     const ValueDecl *VD = nullptr;
7422     bool ForDeviceAddr = false;
7423 
7424     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7425                              bool ForDeviceAddr)
7426         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7427   };
7428 
7429   /// The target directive from where the mappable clauses were extracted. It
7430   /// is either a executable directive or a user-defined mapper directive.
7431   llvm::PointerUnion<const OMPExecutableDirective *,
7432                      const OMPDeclareMapperDecl *>
7433       CurDir;
7434 
7435   /// Function the directive is being generated for.
7436   CodeGenFunction &CGF;
7437 
7438   /// Set of all first private variables in the current directive.
7439   /// bool data is set to true if the variable is implicitly marked as
7440   /// firstprivate, false otherwise.
7441   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7442 
7443   /// Map between device pointer declarations and their expression components.
7444   /// The key value for declarations in 'this' is null.
7445   llvm::DenseMap<
7446       const ValueDecl *,
7447       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7448       DevPointersMap;
7449 
7450   llvm::Value *getExprTypeSize(const Expr *E) const {
7451     QualType ExprTy = E->getType().getCanonicalType();
7452 
7453     // Calculate the size for array shaping expression.
7454     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7455       llvm::Value *Size =
7456           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7457       for (const Expr *SE : OAE->getDimensions()) {
7458         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7459         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7460                                       CGF.getContext().getSizeType(),
7461                                       SE->getExprLoc());
7462         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7463       }
7464       return Size;
7465     }
7466 
7467     // Reference types are ignored for mapping purposes.
7468     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7469       ExprTy = RefTy->getPointeeType().getCanonicalType();
7470 
7471     // Given that an array section is considered a built-in type, we need to
7472     // do the calculation based on the length of the section instead of relying
7473     // on CGF.getTypeSize(E->getType()).
7474     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7475       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7476                             OAE->getBase()->IgnoreParenImpCasts())
7477                             .getCanonicalType();
7478 
7479       // If there is no length associated with the expression and lower bound is
7480       // not specified too, that means we are using the whole length of the
7481       // base.
7482       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7483           !OAE->getLowerBound())
7484         return CGF.getTypeSize(BaseTy);
7485 
7486       llvm::Value *ElemSize;
7487       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7488         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7489       } else {
7490         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7491         assert(ATy && "Expecting array type if not a pointer type.");
7492         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7493       }
7494 
7495       // If we don't have a length at this point, that is because we have an
7496       // array section with a single element.
7497       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7498         return ElemSize;
7499 
7500       if (const Expr *LenExpr = OAE->getLength()) {
7501         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7502         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7503                                              CGF.getContext().getSizeType(),
7504                                              LenExpr->getExprLoc());
7505         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7506       }
7507       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7508              OAE->getLowerBound() && "expected array_section[lb:].");
7509       // Size = sizetype - lb * elemtype;
7510       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7511       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7512       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7513                                        CGF.getContext().getSizeType(),
7514                                        OAE->getLowerBound()->getExprLoc());
7515       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7516       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7517       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7518       LengthVal = CGF.Builder.CreateSelect(
7519           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7520       return LengthVal;
7521     }
7522     return CGF.getTypeSize(ExprTy);
7523   }
7524 
7525   /// Return the corresponding bits for a given map clause modifier. Add
7526   /// a flag marking the map as a pointer if requested. Add a flag marking the
7527   /// map as the first one of a series of maps that relate to the same map
7528   /// expression.
7529   OpenMPOffloadMappingFlags getMapTypeBits(
7530       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7531       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7532       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7533     OpenMPOffloadMappingFlags Bits =
7534         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7535     switch (MapType) {
7536     case OMPC_MAP_alloc:
7537     case OMPC_MAP_release:
7538       // alloc and release is the default behavior in the runtime library,  i.e.
7539       // if we don't pass any bits alloc/release that is what the runtime is
7540       // going to do. Therefore, we don't need to signal anything for these two
7541       // type modifiers.
7542       break;
7543     case OMPC_MAP_to:
7544       Bits |= OMP_MAP_TO;
7545       break;
7546     case OMPC_MAP_from:
7547       Bits |= OMP_MAP_FROM;
7548       break;
7549     case OMPC_MAP_tofrom:
7550       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7551       break;
7552     case OMPC_MAP_delete:
7553       Bits |= OMP_MAP_DELETE;
7554       break;
7555     case OMPC_MAP_unknown:
7556       llvm_unreachable("Unexpected map type!");
7557     }
7558     if (AddPtrFlag)
7559       Bits |= OMP_MAP_PTR_AND_OBJ;
7560     if (AddIsTargetParamFlag)
7561       Bits |= OMP_MAP_TARGET_PARAM;
7562     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7563         != MapModifiers.end())
7564       Bits |= OMP_MAP_ALWAYS;
7565     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7566         != MapModifiers.end())
7567       Bits |= OMP_MAP_CLOSE;
7568     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7569             MapModifiers.end() ||
7570         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7571             MotionModifiers.end())
7572       Bits |= OMP_MAP_PRESENT;
7573     if (IsNonContiguous)
7574       Bits |= OMP_MAP_NON_CONTIG;
7575     return Bits;
7576   }
7577 
7578   /// Return true if the provided expression is a final array section. A
7579   /// final array section, is one whose length can't be proved to be one.
7580   bool isFinalArraySectionExpression(const Expr *E) const {
7581     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7582 
7583     // It is not an array section and therefore not a unity-size one.
7584     if (!OASE)
7585       return false;
7586 
7587     // An array section with no colon always refer to a single element.
7588     if (OASE->getColonLocFirst().isInvalid())
7589       return false;
7590 
7591     const Expr *Length = OASE->getLength();
7592 
7593     // If we don't have a length we have to check if the array has size 1
7594     // for this dimension. Also, we should always expect a length if the
7595     // base type is pointer.
7596     if (!Length) {
7597       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7598                              OASE->getBase()->IgnoreParenImpCasts())
7599                              .getCanonicalType();
7600       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7601         return ATy->getSize().getSExtValue() != 1;
7602       // If we don't have a constant dimension length, we have to consider
7603       // the current section as having any size, so it is not necessarily
7604       // unitary. If it happen to be unity size, that's user fault.
7605       return true;
7606     }
7607 
7608     // Check if the length evaluates to 1.
7609     Expr::EvalResult Result;
7610     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7611       return true; // Can have more that size 1.
7612 
7613     llvm::APSInt ConstLength = Result.Val.getInt();
7614     return ConstLength.getSExtValue() != 1;
7615   }
7616 
7617   /// Generate the base pointers, section pointers, sizes, map type bits, and
7618   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7619   /// map type, map or motion modifiers, and expression components.
7620   /// \a IsFirstComponent should be set to true if the provided set of
7621   /// components is the first associated with a capture.
7622   void generateInfoForComponentList(
7623       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7624       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7625       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7626       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7627       bool IsFirstComponentList, bool IsImplicit,
7628       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7629       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7630       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7631           OverlappedElements = llvm::None) const {
7632     // The following summarizes what has to be generated for each map and the
7633     // types below. The generated information is expressed in this order:
7634     // base pointer, section pointer, size, flags
7635     // (to add to the ones that come from the map type and modifier).
7636     //
7637     // double d;
7638     // int i[100];
7639     // float *p;
7640     //
7641     // struct S1 {
7642     //   int i;
7643     //   float f[50];
7644     // }
7645     // struct S2 {
7646     //   int i;
7647     //   float f[50];
7648     //   S1 s;
7649     //   double *p;
7650     //   struct S2 *ps;
7651     //   int &ref;
7652     // }
7653     // S2 s;
7654     // S2 *ps;
7655     //
7656     // map(d)
7657     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7658     //
7659     // map(i)
7660     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7661     //
7662     // map(i[1:23])
7663     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7664     //
7665     // map(p)
7666     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7667     //
7668     // map(p[1:24])
7669     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7670     // in unified shared memory mode or for local pointers
7671     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7672     //
7673     // map(s)
7674     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7675     //
7676     // map(s.i)
7677     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7678     //
7679     // map(s.s.f)
7680     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7681     //
7682     // map(s.p)
7683     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7684     //
7685     // map(to: s.p[:22])
7686     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7687     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7688     // &(s.p), &(s.p[0]), 22*sizeof(double),
7689     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7690     // (*) alloc space for struct members, only this is a target parameter
7691     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7692     //      optimizes this entry out, same in the examples below)
7693     // (***) map the pointee (map: to)
7694     //
7695     // map(to: s.ref)
7696     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7697     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7698     // (*) alloc space for struct members, only this is a target parameter
7699     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7700     //      optimizes this entry out, same in the examples below)
7701     // (***) map the pointee (map: to)
7702     //
7703     // map(s.ps)
7704     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7705     //
7706     // map(from: s.ps->s.i)
7707     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7708     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7709     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7710     //
7711     // map(to: s.ps->ps)
7712     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7713     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7714     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7715     //
7716     // map(s.ps->ps->ps)
7717     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7718     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7719     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7720     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7721     //
7722     // map(to: s.ps->ps->s.f[:22])
7723     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7724     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7725     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7726     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7727     //
7728     // map(ps)
7729     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7730     //
7731     // map(ps->i)
7732     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7733     //
7734     // map(ps->s.f)
7735     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7736     //
7737     // map(from: ps->p)
7738     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7739     //
7740     // map(to: ps->p[:22])
7741     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7742     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7743     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7744     //
7745     // map(ps->ps)
7746     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7747     //
7748     // map(from: ps->ps->s.i)
7749     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7750     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7751     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7752     //
7753     // map(from: ps->ps->ps)
7754     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7755     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7756     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7757     //
7758     // map(ps->ps->ps->ps)
7759     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7760     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7761     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7762     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7763     //
7764     // map(to: ps->ps->ps->s.f[:22])
7765     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7766     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7767     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7768     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7769     //
7770     // map(to: s.f[:22]) map(from: s.p[:33])
7771     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7772     //     sizeof(double*) (**), TARGET_PARAM
7773     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7774     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7775     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7776     // (*) allocate contiguous space needed to fit all mapped members even if
7777     //     we allocate space for members not mapped (in this example,
7778     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7779     //     them as well because they fall between &s.f[0] and &s.p)
7780     //
7781     // map(from: s.f[:22]) map(to: ps->p[:33])
7782     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7783     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7784     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7785     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7786     // (*) the struct this entry pertains to is the 2nd element in the list of
7787     //     arguments, hence MEMBER_OF(2)
7788     //
7789     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7790     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7791     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7792     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7793     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7794     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7795     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7796     // (*) the struct this entry pertains to is the 4th element in the list
7797     //     of arguments, hence MEMBER_OF(4)
7798 
7799     // Track if the map information being generated is the first for a capture.
7800     bool IsCaptureFirstInfo = IsFirstComponentList;
7801     // When the variable is on a declare target link or in a to clause with
7802     // unified memory, a reference is needed to hold the host/device address
7803     // of the variable.
7804     bool RequiresReference = false;
7805 
7806     // Scan the components from the base to the complete expression.
7807     auto CI = Components.rbegin();
7808     auto CE = Components.rend();
7809     auto I = CI;
7810 
7811     // Track if the map information being generated is the first for a list of
7812     // components.
7813     bool IsExpressionFirstInfo = true;
7814     bool FirstPointerInComplexData = false;
7815     Address BP = Address::invalid();
7816     const Expr *AssocExpr = I->getAssociatedExpression();
7817     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7818     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7819     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7820 
7821     if (isa<MemberExpr>(AssocExpr)) {
7822       // The base is the 'this' pointer. The content of the pointer is going
7823       // to be the base of the field being mapped.
7824       BP = CGF.LoadCXXThisAddress();
7825     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7826                (OASE &&
7827                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7828       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7829     } else if (OAShE &&
7830                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7831       BP = Address(
7832           CGF.EmitScalarExpr(OAShE->getBase()),
7833           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7834     } else {
7835       // The base is the reference to the variable.
7836       // BP = &Var.
7837       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7838       if (const auto *VD =
7839               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7840         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7841                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7842           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7843               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7844                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7845             RequiresReference = true;
7846             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7847           }
7848         }
7849       }
7850 
7851       // If the variable is a pointer and is being dereferenced (i.e. is not
7852       // the last component), the base has to be the pointer itself, not its
7853       // reference. References are ignored for mapping purposes.
7854       QualType Ty =
7855           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7856       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7857         // No need to generate individual map information for the pointer, it
7858         // can be associated with the combined storage if shared memory mode is
7859         // active or the base declaration is not global variable.
7860         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7861         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7862             !VD || VD->hasLocalStorage())
7863           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7864         else
7865           FirstPointerInComplexData = true;
7866         ++I;
7867       }
7868     }
7869 
7870     // Track whether a component of the list should be marked as MEMBER_OF some
7871     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7872     // in a component list should be marked as MEMBER_OF, all subsequent entries
7873     // do not belong to the base struct. E.g.
7874     // struct S2 s;
7875     // s.ps->ps->ps->f[:]
7876     //   (1) (2) (3) (4)
7877     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7878     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7879     // is the pointee of ps(2) which is not member of struct s, so it should not
7880     // be marked as such (it is still PTR_AND_OBJ).
7881     // The variable is initialized to false so that PTR_AND_OBJ entries which
7882     // are not struct members are not considered (e.g. array of pointers to
7883     // data).
7884     bool ShouldBeMemberOf = false;
7885 
7886     // Variable keeping track of whether or not we have encountered a component
7887     // in the component list which is a member expression. Useful when we have a
7888     // pointer or a final array section, in which case it is the previous
7889     // component in the list which tells us whether we have a member expression.
7890     // E.g. X.f[:]
7891     // While processing the final array section "[:]" it is "f" which tells us
7892     // whether we are dealing with a member of a declared struct.
7893     const MemberExpr *EncounteredME = nullptr;
7894 
7895     // Track for the total number of dimension. Start from one for the dummy
7896     // dimension.
7897     uint64_t DimSize = 1;
7898 
7899     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7900     bool IsPrevMemberReference = false;
7901 
7902     for (; I != CE; ++I) {
7903       // If the current component is member of a struct (parent struct) mark it.
7904       if (!EncounteredME) {
7905         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7906         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7907         // as MEMBER_OF the parent struct.
7908         if (EncounteredME) {
7909           ShouldBeMemberOf = true;
7910           // Do not emit as complex pointer if this is actually not array-like
7911           // expression.
7912           if (FirstPointerInComplexData) {
7913             QualType Ty = std::prev(I)
7914                               ->getAssociatedDeclaration()
7915                               ->getType()
7916                               .getNonReferenceType();
7917             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7918             FirstPointerInComplexData = false;
7919           }
7920         }
7921       }
7922 
7923       auto Next = std::next(I);
7924 
7925       // We need to generate the addresses and sizes if this is the last
7926       // component, if the component is a pointer or if it is an array section
7927       // whose length can't be proved to be one. If this is a pointer, it
7928       // becomes the base address for the following components.
7929 
7930       // A final array section, is one whose length can't be proved to be one.
7931       // If the map item is non-contiguous then we don't treat any array section
7932       // as final array section.
7933       bool IsFinalArraySection =
7934           !IsNonContiguous &&
7935           isFinalArraySectionExpression(I->getAssociatedExpression());
7936 
7937       // If we have a declaration for the mapping use that, otherwise use
7938       // the base declaration of the map clause.
7939       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7940                                      ? I->getAssociatedDeclaration()
7941                                      : BaseDecl;
7942       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7943                                                : MapExpr;
7944 
7945       // Get information on whether the element is a pointer. Have to do a
7946       // special treatment for array sections given that they are built-in
7947       // types.
7948       const auto *OASE =
7949           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7950       const auto *OAShE =
7951           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7952       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7953       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7954       bool IsPointer =
7955           OAShE ||
7956           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7957                        .getCanonicalType()
7958                        ->isAnyPointerType()) ||
7959           I->getAssociatedExpression()->getType()->isAnyPointerType();
7960       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7961                                MapDecl &&
7962                                MapDecl->getType()->isLValueReferenceType();
7963       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7964 
7965       if (OASE)
7966         ++DimSize;
7967 
7968       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7969           IsFinalArraySection) {
7970         // If this is not the last component, we expect the pointer to be
7971         // associated with an array expression or member expression.
7972         assert((Next == CE ||
7973                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7974                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7975                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7976                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7977                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7978                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7979                "Unexpected expression");
7980 
7981         Address LB = Address::invalid();
7982         Address LowestElem = Address::invalid();
7983         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7984                                        const MemberExpr *E) {
7985           const Expr *BaseExpr = E->getBase();
7986           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7987           // scalar.
7988           LValue BaseLV;
7989           if (E->isArrow()) {
7990             LValueBaseInfo BaseInfo;
7991             TBAAAccessInfo TBAAInfo;
7992             Address Addr =
7993                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7994             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7995             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7996           } else {
7997             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7998           }
7999           return BaseLV;
8000         };
8001         if (OAShE) {
8002           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8003                                     CGF.getContext().getTypeAlignInChars(
8004                                         OAShE->getBase()->getType()));
8005         } else if (IsMemberReference) {
8006           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8007           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8008           LowestElem = CGF.EmitLValueForFieldInitialization(
8009                               BaseLVal, cast<FieldDecl>(MapDecl))
8010                            .getAddress(CGF);
8011           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8012                    .getAddress(CGF);
8013         } else {
8014           LowestElem = LB =
8015               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8016                   .getAddress(CGF);
8017         }
8018 
8019         // If this component is a pointer inside the base struct then we don't
8020         // need to create any entry for it - it will be combined with the object
8021         // it is pointing to into a single PTR_AND_OBJ entry.
8022         bool IsMemberPointerOrAddr =
8023             EncounteredME &&
8024             (((IsPointer || ForDeviceAddr) &&
8025               I->getAssociatedExpression() == EncounteredME) ||
8026              (IsPrevMemberReference && !IsPointer) ||
8027              (IsMemberReference && Next != CE &&
8028               !Next->getAssociatedExpression()->getType()->isPointerType()));
8029         if (!OverlappedElements.empty() && Next == CE) {
8030           // Handle base element with the info for overlapped elements.
8031           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8032           assert(!IsPointer &&
8033                  "Unexpected base element with the pointer type.");
8034           // Mark the whole struct as the struct that requires allocation on the
8035           // device.
8036           PartialStruct.LowestElem = {0, LowestElem};
8037           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8038               I->getAssociatedExpression()->getType());
8039           Address HB = CGF.Builder.CreateConstGEP(
8040               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8041                                                               CGF.VoidPtrTy),
8042               TypeSize.getQuantity() - 1);
8043           PartialStruct.HighestElem = {
8044               std::numeric_limits<decltype(
8045                   PartialStruct.HighestElem.first)>::max(),
8046               HB};
8047           PartialStruct.Base = BP;
8048           PartialStruct.LB = LB;
8049           assert(
8050               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8051               "Overlapped elements must be used only once for the variable.");
8052           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8053           // Emit data for non-overlapped data.
8054           OpenMPOffloadMappingFlags Flags =
8055               OMP_MAP_MEMBER_OF |
8056               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8057                              /*AddPtrFlag=*/false,
8058                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8059           llvm::Value *Size = nullptr;
8060           // Do bitcopy of all non-overlapped structure elements.
8061           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8062                    Component : OverlappedElements) {
8063             Address ComponentLB = Address::invalid();
8064             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8065                  Component) {
8066               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8067                 const auto *FD = dyn_cast<FieldDecl>(VD);
8068                 if (FD && FD->getType()->isLValueReferenceType()) {
8069                   const auto *ME =
8070                       cast<MemberExpr>(MC.getAssociatedExpression());
8071                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8072                   ComponentLB =
8073                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8074                           .getAddress(CGF);
8075                 } else {
8076                   ComponentLB =
8077                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8078                           .getAddress(CGF);
8079                 }
8080                 Size = CGF.Builder.CreatePtrDiff(
8081                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8082                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8083                 break;
8084               }
8085             }
8086             assert(Size && "Failed to determine structure size");
8087             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8088             CombinedInfo.BasePointers.push_back(BP.getPointer());
8089             CombinedInfo.Pointers.push_back(LB.getPointer());
8090             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8091                 Size, CGF.Int64Ty, /*isSigned=*/true));
8092             CombinedInfo.Types.push_back(Flags);
8093             CombinedInfo.Mappers.push_back(nullptr);
8094             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8095                                                                       : 1);
8096             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8097           }
8098           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8099           CombinedInfo.BasePointers.push_back(BP.getPointer());
8100           CombinedInfo.Pointers.push_back(LB.getPointer());
8101           Size = CGF.Builder.CreatePtrDiff(
8102               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8103               CGF.EmitCastToVoidPtr(LB.getPointer()));
8104           CombinedInfo.Sizes.push_back(
8105               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8106           CombinedInfo.Types.push_back(Flags);
8107           CombinedInfo.Mappers.push_back(nullptr);
8108           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8109                                                                     : 1);
8110           break;
8111         }
8112         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8113         if (!IsMemberPointerOrAddr ||
8114             (Next == CE && MapType != OMPC_MAP_unknown)) {
8115           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8116           CombinedInfo.BasePointers.push_back(BP.getPointer());
8117           CombinedInfo.Pointers.push_back(LB.getPointer());
8118           CombinedInfo.Sizes.push_back(
8119               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8120           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8121                                                                     : 1);
8122 
8123           // If Mapper is valid, the last component inherits the mapper.
8124           bool HasMapper = Mapper && Next == CE;
8125           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8126 
8127           // We need to add a pointer flag for each map that comes from the
8128           // same expression except for the first one. We also need to signal
8129           // this map is the first one that relates with the current capture
8130           // (there is a set of entries for each capture).
8131           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8132               MapType, MapModifiers, MotionModifiers, IsImplicit,
8133               !IsExpressionFirstInfo || RequiresReference ||
8134                   FirstPointerInComplexData || IsMemberReference,
8135               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8136 
8137           if (!IsExpressionFirstInfo || IsMemberReference) {
8138             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8139             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8140             if (IsPointer || (IsMemberReference && Next != CE))
8141               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8142                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8143 
8144             if (ShouldBeMemberOf) {
8145               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8146               // should be later updated with the correct value of MEMBER_OF.
8147               Flags |= OMP_MAP_MEMBER_OF;
8148               // From now on, all subsequent PTR_AND_OBJ entries should not be
8149               // marked as MEMBER_OF.
8150               ShouldBeMemberOf = false;
8151             }
8152           }
8153 
8154           CombinedInfo.Types.push_back(Flags);
8155         }
8156 
8157         // If we have encountered a member expression so far, keep track of the
8158         // mapped member. If the parent is "*this", then the value declaration
8159         // is nullptr.
8160         if (EncounteredME) {
8161           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8162           unsigned FieldIndex = FD->getFieldIndex();
8163 
8164           // Update info about the lowest and highest elements for this struct
8165           if (!PartialStruct.Base.isValid()) {
8166             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8167             if (IsFinalArraySection) {
8168               Address HB =
8169                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8170                       .getAddress(CGF);
8171               PartialStruct.HighestElem = {FieldIndex, HB};
8172             } else {
8173               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8174             }
8175             PartialStruct.Base = BP;
8176             PartialStruct.LB = BP;
8177           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8178             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8179           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8180             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8181           }
8182         }
8183 
8184         // Need to emit combined struct for array sections.
8185         if (IsFinalArraySection || IsNonContiguous)
8186           PartialStruct.IsArraySection = true;
8187 
8188         // If we have a final array section, we are done with this expression.
8189         if (IsFinalArraySection)
8190           break;
8191 
8192         // The pointer becomes the base for the next element.
8193         if (Next != CE)
8194           BP = IsMemberReference ? LowestElem : LB;
8195 
8196         IsExpressionFirstInfo = false;
8197         IsCaptureFirstInfo = false;
8198         FirstPointerInComplexData = false;
8199         IsPrevMemberReference = IsMemberReference;
8200       } else if (FirstPointerInComplexData) {
8201         QualType Ty = Components.rbegin()
8202                           ->getAssociatedDeclaration()
8203                           ->getType()
8204                           .getNonReferenceType();
8205         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8206         FirstPointerInComplexData = false;
8207       }
8208     }
8209     // If ran into the whole component - allocate the space for the whole
8210     // record.
8211     if (!EncounteredME)
8212       PartialStruct.HasCompleteRecord = true;
8213 
8214     if (!IsNonContiguous)
8215       return;
8216 
8217     const ASTContext &Context = CGF.getContext();
8218 
8219     // For supporting stride in array section, we need to initialize the first
8220     // dimension size as 1, first offset as 0, and first count as 1
8221     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8222     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8223     MapValuesArrayTy CurStrides;
8224     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8225     uint64_t ElementTypeSize;
8226 
8227     // Collect Size information for each dimension and get the element size as
8228     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8229     // should be [10, 10] and the first stride is 4 btyes.
8230     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8231          Components) {
8232       const Expr *AssocExpr = Component.getAssociatedExpression();
8233       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8234 
8235       if (!OASE)
8236         continue;
8237 
8238       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8239       auto *CAT = Context.getAsConstantArrayType(Ty);
8240       auto *VAT = Context.getAsVariableArrayType(Ty);
8241 
8242       // We need all the dimension size except for the last dimension.
8243       assert((VAT || CAT || &Component == &*Components.begin()) &&
8244              "Should be either ConstantArray or VariableArray if not the "
8245              "first Component");
8246 
8247       // Get element size if CurStrides is empty.
8248       if (CurStrides.empty()) {
8249         const Type *ElementType = nullptr;
8250         if (CAT)
8251           ElementType = CAT->getElementType().getTypePtr();
8252         else if (VAT)
8253           ElementType = VAT->getElementType().getTypePtr();
8254         else
8255           assert(&Component == &*Components.begin() &&
8256                  "Only expect pointer (non CAT or VAT) when this is the "
8257                  "first Component");
8258         // If ElementType is null, then it means the base is a pointer
8259         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8260         // for next iteration.
8261         if (ElementType) {
8262           // For the case that having pointer as base, we need to remove one
8263           // level of indirection.
8264           if (&Component != &*Components.begin())
8265             ElementType = ElementType->getPointeeOrArrayElementType();
8266           ElementTypeSize =
8267               Context.getTypeSizeInChars(ElementType).getQuantity();
8268           CurStrides.push_back(
8269               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8270         }
8271       }
8272       // Get dimension value except for the last dimension since we don't need
8273       // it.
8274       if (DimSizes.size() < Components.size() - 1) {
8275         if (CAT)
8276           DimSizes.push_back(llvm::ConstantInt::get(
8277               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8278         else if (VAT)
8279           DimSizes.push_back(CGF.Builder.CreateIntCast(
8280               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8281               /*IsSigned=*/false));
8282       }
8283     }
8284 
8285     // Skip the dummy dimension since we have already have its information.
8286     auto DI = DimSizes.begin() + 1;
8287     // Product of dimension.
8288     llvm::Value *DimProd =
8289         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8290 
8291     // Collect info for non-contiguous. Notice that offset, count, and stride
8292     // are only meaningful for array-section, so we insert a null for anything
8293     // other than array-section.
8294     // Also, the size of offset, count, and stride are not the same as
8295     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8296     // count, and stride are the same as the number of non-contiguous
8297     // declaration in target update to/from clause.
8298     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8299          Components) {
8300       const Expr *AssocExpr = Component.getAssociatedExpression();
8301 
8302       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8303         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8304             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8305             /*isSigned=*/false);
8306         CurOffsets.push_back(Offset);
8307         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8308         CurStrides.push_back(CurStrides.back());
8309         continue;
8310       }
8311 
8312       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8313 
8314       if (!OASE)
8315         continue;
8316 
8317       // Offset
8318       const Expr *OffsetExpr = OASE->getLowerBound();
8319       llvm::Value *Offset = nullptr;
8320       if (!OffsetExpr) {
8321         // If offset is absent, then we just set it to zero.
8322         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8323       } else {
8324         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8325                                            CGF.Int64Ty,
8326                                            /*isSigned=*/false);
8327       }
8328       CurOffsets.push_back(Offset);
8329 
8330       // Count
8331       const Expr *CountExpr = OASE->getLength();
8332       llvm::Value *Count = nullptr;
8333       if (!CountExpr) {
8334         // In Clang, once a high dimension is an array section, we construct all
8335         // the lower dimension as array section, however, for case like
8336         // arr[0:2][2], Clang construct the inner dimension as an array section
8337         // but it actually is not in an array section form according to spec.
8338         if (!OASE->getColonLocFirst().isValid() &&
8339             !OASE->getColonLocSecond().isValid()) {
8340           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8341         } else {
8342           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8343           // When the length is absent it defaults to ⌈(size −
8344           // lower-bound)/stride⌉, where size is the size of the array
8345           // dimension.
8346           const Expr *StrideExpr = OASE->getStride();
8347           llvm::Value *Stride =
8348               StrideExpr
8349                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8350                                               CGF.Int64Ty, /*isSigned=*/false)
8351                   : nullptr;
8352           if (Stride)
8353             Count = CGF.Builder.CreateUDiv(
8354                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8355           else
8356             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8357         }
8358       } else {
8359         Count = CGF.EmitScalarExpr(CountExpr);
8360       }
8361       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8362       CurCounts.push_back(Count);
8363 
8364       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8365       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8366       //              Offset      Count     Stride
8367       //    D0          0           1         4    (int)    <- dummy dimension
8368       //    D1          0           2         8    (2 * (1) * 4)
8369       //    D2          1           2         20   (1 * (1 * 5) * 4)
8370       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8371       const Expr *StrideExpr = OASE->getStride();
8372       llvm::Value *Stride =
8373           StrideExpr
8374               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8375                                           CGF.Int64Ty, /*isSigned=*/false)
8376               : nullptr;
8377       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8378       if (Stride)
8379         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8380       else
8381         CurStrides.push_back(DimProd);
8382       if (DI != DimSizes.end())
8383         ++DI;
8384     }
8385 
8386     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8387     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8388     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8389   }
8390 
8391   /// Return the adjusted map modifiers if the declaration a capture refers to
8392   /// appears in a first-private clause. This is expected to be used only with
8393   /// directives that start with 'target'.
8394   MappableExprsHandler::OpenMPOffloadMappingFlags
8395   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8396     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8397 
8398     // A first private variable captured by reference will use only the
8399     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8400     // declaration is known as first-private in this handler.
8401     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8402       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8403         return MappableExprsHandler::OMP_MAP_TO |
8404                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8405       return MappableExprsHandler::OMP_MAP_PRIVATE |
8406              MappableExprsHandler::OMP_MAP_TO;
8407     }
8408     return MappableExprsHandler::OMP_MAP_TO |
8409            MappableExprsHandler::OMP_MAP_FROM;
8410   }
8411 
8412   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8413     // Rotate by getFlagMemberOffset() bits.
8414     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8415                                                   << getFlagMemberOffset());
8416   }
8417 
8418   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8419                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8420     // If the entry is PTR_AND_OBJ but has not been marked with the special
8421     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8422     // marked as MEMBER_OF.
8423     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8424         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8425       return;
8426 
8427     // Reset the placeholder value to prepare the flag for the assignment of the
8428     // proper MEMBER_OF value.
8429     Flags &= ~OMP_MAP_MEMBER_OF;
8430     Flags |= MemberOfFlag;
8431   }
8432 
8433   void getPlainLayout(const CXXRecordDecl *RD,
8434                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8435                       bool AsBase) const {
8436     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8437 
8438     llvm::StructType *St =
8439         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8440 
8441     unsigned NumElements = St->getNumElements();
8442     llvm::SmallVector<
8443         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8444         RecordLayout(NumElements);
8445 
8446     // Fill bases.
8447     for (const auto &I : RD->bases()) {
8448       if (I.isVirtual())
8449         continue;
8450       const auto *Base = I.getType()->getAsCXXRecordDecl();
8451       // Ignore empty bases.
8452       if (Base->isEmpty() || CGF.getContext()
8453                                  .getASTRecordLayout(Base)
8454                                  .getNonVirtualSize()
8455                                  .isZero())
8456         continue;
8457 
8458       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8459       RecordLayout[FieldIndex] = Base;
8460     }
8461     // Fill in virtual bases.
8462     for (const auto &I : RD->vbases()) {
8463       const auto *Base = I.getType()->getAsCXXRecordDecl();
8464       // Ignore empty bases.
8465       if (Base->isEmpty())
8466         continue;
8467       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8468       if (RecordLayout[FieldIndex])
8469         continue;
8470       RecordLayout[FieldIndex] = Base;
8471     }
8472     // Fill in all the fields.
8473     assert(!RD->isUnion() && "Unexpected union.");
8474     for (const auto *Field : RD->fields()) {
8475       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8476       // will fill in later.)
8477       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8478         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8479         RecordLayout[FieldIndex] = Field;
8480       }
8481     }
8482     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8483              &Data : RecordLayout) {
8484       if (Data.isNull())
8485         continue;
8486       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8487         getPlainLayout(Base, Layout, /*AsBase=*/true);
8488       else
8489         Layout.push_back(Data.get<const FieldDecl *>());
8490     }
8491   }
8492 
8493   /// Generate all the base pointers, section pointers, sizes, map types, and
8494   /// mappers for the extracted mappable expressions (all included in \a
8495   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8496   /// pair of the relevant declaration and index where it occurs is appended to
8497   /// the device pointers info array.
8498   void generateAllInfoForClauses(
8499       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8500       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8501           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8502     // We have to process the component lists that relate with the same
8503     // declaration in a single chunk so that we can generate the map flags
8504     // correctly. Therefore, we organize all lists in a map.
8505     enum MapKind { Present, Allocs, Other, Total };
8506     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8507                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8508         Info;
8509 
8510     // Helper function to fill the information map for the different supported
8511     // clauses.
8512     auto &&InfoGen =
8513         [&Info, &SkipVarSet](
8514             const ValueDecl *D, MapKind Kind,
8515             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8516             OpenMPMapClauseKind MapType,
8517             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8518             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8519             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8520             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8521           if (SkipVarSet.contains(D))
8522             return;
8523           auto It = Info.find(D);
8524           if (It == Info.end())
8525             It = Info
8526                      .insert(std::make_pair(
8527                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8528                      .first;
8529           It->second[Kind].emplace_back(
8530               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8531               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8532         };
8533 
8534     for (const auto *Cl : Clauses) {
8535       const auto *C = dyn_cast<OMPMapClause>(Cl);
8536       if (!C)
8537         continue;
8538       MapKind Kind = Other;
8539       if (!C->getMapTypeModifiers().empty() &&
8540           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8541             return K == OMPC_MAP_MODIFIER_present;
8542           }))
8543         Kind = Present;
8544       else if (C->getMapType() == OMPC_MAP_alloc)
8545         Kind = Allocs;
8546       const auto *EI = C->getVarRefs().begin();
8547       for (const auto L : C->component_lists()) {
8548         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8549         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8550                 C->getMapTypeModifiers(), llvm::None,
8551                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8552                 E);
8553         ++EI;
8554       }
8555     }
8556     for (const auto *Cl : Clauses) {
8557       const auto *C = dyn_cast<OMPToClause>(Cl);
8558       if (!C)
8559         continue;
8560       MapKind Kind = Other;
8561       if (!C->getMotionModifiers().empty() &&
8562           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8563             return K == OMPC_MOTION_MODIFIER_present;
8564           }))
8565         Kind = Present;
8566       const auto *EI = C->getVarRefs().begin();
8567       for (const auto L : C->component_lists()) {
8568         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8569                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8570                 C->isImplicit(), std::get<2>(L), *EI);
8571         ++EI;
8572       }
8573     }
8574     for (const auto *Cl : Clauses) {
8575       const auto *C = dyn_cast<OMPFromClause>(Cl);
8576       if (!C)
8577         continue;
8578       MapKind Kind = Other;
8579       if (!C->getMotionModifiers().empty() &&
8580           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8581             return K == OMPC_MOTION_MODIFIER_present;
8582           }))
8583         Kind = Present;
8584       const auto *EI = C->getVarRefs().begin();
8585       for (const auto L : C->component_lists()) {
8586         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8587                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8588                 C->isImplicit(), std::get<2>(L), *EI);
8589         ++EI;
8590       }
8591     }
8592 
8593     // Look at the use_device_ptr clause information and mark the existing map
8594     // entries as such. If there is no map information for an entry in the
8595     // use_device_ptr list, we create one with map type 'alloc' and zero size
8596     // section. It is the user fault if that was not mapped before. If there is
8597     // no map information and the pointer is a struct member, then we defer the
8598     // emission of that entry until the whole struct has been processed.
8599     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8600                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8601         DeferredInfo;
8602     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8603 
8604     for (const auto *Cl : Clauses) {
8605       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8606       if (!C)
8607         continue;
8608       for (const auto L : C->component_lists()) {
8609         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8610             std::get<1>(L);
8611         assert(!Components.empty() &&
8612                "Not expecting empty list of components!");
8613         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8614         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8615         const Expr *IE = Components.back().getAssociatedExpression();
8616         // If the first component is a member expression, we have to look into
8617         // 'this', which maps to null in the map of map information. Otherwise
8618         // look directly for the information.
8619         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8620 
8621         // We potentially have map information for this declaration already.
8622         // Look for the first set of components that refer to it.
8623         if (It != Info.end()) {
8624           bool Found = false;
8625           for (auto &Data : It->second) {
8626             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8627               return MI.Components.back().getAssociatedDeclaration() == VD;
8628             });
8629             // If we found a map entry, signal that the pointer has to be
8630             // returned and move on to the next declaration. Exclude cases where
8631             // the base pointer is mapped as array subscript, array section or
8632             // array shaping. The base address is passed as a pointer to base in
8633             // this case and cannot be used as a base for use_device_ptr list
8634             // item.
8635             if (CI != Data.end()) {
8636               auto PrevCI = std::next(CI->Components.rbegin());
8637               const auto *VarD = dyn_cast<VarDecl>(VD);
8638               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8639                   isa<MemberExpr>(IE) ||
8640                   !VD->getType().getNonReferenceType()->isPointerType() ||
8641                   PrevCI == CI->Components.rend() ||
8642                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8643                   VarD->hasLocalStorage()) {
8644                 CI->ReturnDevicePointer = true;
8645                 Found = true;
8646                 break;
8647               }
8648             }
8649           }
8650           if (Found)
8651             continue;
8652         }
8653 
8654         // We didn't find any match in our map information - generate a zero
8655         // size array section - if the pointer is a struct member we defer this
8656         // action until the whole struct has been processed.
8657         if (isa<MemberExpr>(IE)) {
8658           // Insert the pointer into Info to be processed by
8659           // generateInfoForComponentList. Because it is a member pointer
8660           // without a pointee, no entry will be generated for it, therefore
8661           // we need to generate one after the whole struct has been processed.
8662           // Nonetheless, generateInfoForComponentList must be called to take
8663           // the pointer into account for the calculation of the range of the
8664           // partial struct.
8665           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8666                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8667                   nullptr);
8668           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8669         } else {
8670           llvm::Value *Ptr =
8671               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8672           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8673           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8674           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8675           UseDevicePtrCombinedInfo.Sizes.push_back(
8676               llvm::Constant::getNullValue(CGF.Int64Ty));
8677           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8678           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8679         }
8680       }
8681     }
8682 
8683     // Look at the use_device_addr clause information and mark the existing map
8684     // entries as such. If there is no map information for an entry in the
8685     // use_device_addr list, we create one with map type 'alloc' and zero size
8686     // section. It is the user fault if that was not mapped before. If there is
8687     // no map information and the pointer is a struct member, then we defer the
8688     // emission of that entry until the whole struct has been processed.
8689     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8690     for (const auto *Cl : Clauses) {
8691       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8692       if (!C)
8693         continue;
8694       for (const auto L : C->component_lists()) {
8695         assert(!std::get<1>(L).empty() &&
8696                "Not expecting empty list of components!");
8697         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8698         if (!Processed.insert(VD).second)
8699           continue;
8700         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8701         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8702         // If the first component is a member expression, we have to look into
8703         // 'this', which maps to null in the map of map information. Otherwise
8704         // look directly for the information.
8705         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8706 
8707         // We potentially have map information for this declaration already.
8708         // Look for the first set of components that refer to it.
8709         if (It != Info.end()) {
8710           bool Found = false;
8711           for (auto &Data : It->second) {
8712             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8713               return MI.Components.back().getAssociatedDeclaration() == VD;
8714             });
8715             // If we found a map entry, signal that the pointer has to be
8716             // returned and move on to the next declaration.
8717             if (CI != Data.end()) {
8718               CI->ReturnDevicePointer = true;
8719               Found = true;
8720               break;
8721             }
8722           }
8723           if (Found)
8724             continue;
8725         }
8726 
8727         // We didn't find any match in our map information - generate a zero
8728         // size array section - if the pointer is a struct member we defer this
8729         // action until the whole struct has been processed.
8730         if (isa<MemberExpr>(IE)) {
8731           // Insert the pointer into Info to be processed by
8732           // generateInfoForComponentList. Because it is a member pointer
8733           // without a pointee, no entry will be generated for it, therefore
8734           // we need to generate one after the whole struct has been processed.
8735           // Nonetheless, generateInfoForComponentList must be called to take
8736           // the pointer into account for the calculation of the range of the
8737           // partial struct.
8738           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8739                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8740                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8741           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8742         } else {
8743           llvm::Value *Ptr;
8744           if (IE->isGLValue())
8745             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8746           else
8747             Ptr = CGF.EmitScalarExpr(IE);
8748           CombinedInfo.Exprs.push_back(VD);
8749           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8750           CombinedInfo.Pointers.push_back(Ptr);
8751           CombinedInfo.Sizes.push_back(
8752               llvm::Constant::getNullValue(CGF.Int64Ty));
8753           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8754           CombinedInfo.Mappers.push_back(nullptr);
8755         }
8756       }
8757     }
8758 
8759     for (const auto &Data : Info) {
8760       StructRangeInfoTy PartialStruct;
8761       // Temporary generated information.
8762       MapCombinedInfoTy CurInfo;
8763       const Decl *D = Data.first;
8764       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8765       for (const auto &M : Data.second) {
8766         for (const MapInfo &L : M) {
8767           assert(!L.Components.empty() &&
8768                  "Not expecting declaration with no component lists.");
8769 
8770           // Remember the current base pointer index.
8771           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8772           CurInfo.NonContigInfo.IsNonContiguous =
8773               L.Components.back().isNonContiguous();
8774           generateInfoForComponentList(
8775               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8776               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8777               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8778 
8779           // If this entry relates with a device pointer, set the relevant
8780           // declaration and add the 'return pointer' flag.
8781           if (L.ReturnDevicePointer) {
8782             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8783                    "Unexpected number of mapped base pointers.");
8784 
8785             const ValueDecl *RelevantVD =
8786                 L.Components.back().getAssociatedDeclaration();
8787             assert(RelevantVD &&
8788                    "No relevant declaration related with device pointer??");
8789 
8790             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8791                 RelevantVD);
8792             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8793           }
8794         }
8795       }
8796 
8797       // Append any pending zero-length pointers which are struct members and
8798       // used with use_device_ptr or use_device_addr.
8799       auto CI = DeferredInfo.find(Data.first);
8800       if (CI != DeferredInfo.end()) {
8801         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8802           llvm::Value *BasePtr;
8803           llvm::Value *Ptr;
8804           if (L.ForDeviceAddr) {
8805             if (L.IE->isGLValue())
8806               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8807             else
8808               Ptr = this->CGF.EmitScalarExpr(L.IE);
8809             BasePtr = Ptr;
8810             // Entry is RETURN_PARAM. Also, set the placeholder value
8811             // MEMBER_OF=FFFF so that the entry is later updated with the
8812             // correct value of MEMBER_OF.
8813             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8814           } else {
8815             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8816             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8817                                              L.IE->getExprLoc());
8818             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8819             // placeholder value MEMBER_OF=FFFF so that the entry is later
8820             // updated with the correct value of MEMBER_OF.
8821             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8822                                     OMP_MAP_MEMBER_OF);
8823           }
8824           CurInfo.Exprs.push_back(L.VD);
8825           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8826           CurInfo.Pointers.push_back(Ptr);
8827           CurInfo.Sizes.push_back(
8828               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8829           CurInfo.Mappers.push_back(nullptr);
8830         }
8831       }
8832       // If there is an entry in PartialStruct it means we have a struct with
8833       // individual members mapped. Emit an extra combined entry.
8834       if (PartialStruct.Base.isValid()) {
8835         CurInfo.NonContigInfo.Dims.push_back(0);
8836         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8837       }
8838 
8839       // We need to append the results of this capture to what we already
8840       // have.
8841       CombinedInfo.append(CurInfo);
8842     }
8843     // Append data for use_device_ptr clauses.
8844     CombinedInfo.append(UseDevicePtrCombinedInfo);
8845   }
8846 
8847 public:
8848   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8849       : CurDir(&Dir), CGF(CGF) {
8850     // Extract firstprivate clause information.
8851     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8852       for (const auto *D : C->varlists())
8853         FirstPrivateDecls.try_emplace(
8854             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8855     // Extract implicit firstprivates from uses_allocators clauses.
8856     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8857       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8858         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8859         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8860           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8861                                         /*Implicit=*/true);
8862         else if (const auto *VD = dyn_cast<VarDecl>(
8863                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8864                          ->getDecl()))
8865           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8866       }
8867     }
8868     // Extract device pointer clause information.
8869     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8870       for (auto L : C->component_lists())
8871         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8872   }
8873 
8874   /// Constructor for the declare mapper directive.
8875   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8876       : CurDir(&Dir), CGF(CGF) {}
8877 
8878   /// Generate code for the combined entry if we have a partially mapped struct
8879   /// and take care of the mapping flags of the arguments corresponding to
8880   /// individual struct members.
8881   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8882                          MapFlagsArrayTy &CurTypes,
8883                          const StructRangeInfoTy &PartialStruct,
8884                          const ValueDecl *VD = nullptr,
8885                          bool NotTargetParams = true) const {
8886     if (CurTypes.size() == 1 &&
8887         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8888         !PartialStruct.IsArraySection)
8889       return;
8890     Address LBAddr = PartialStruct.LowestElem.second;
8891     Address HBAddr = PartialStruct.HighestElem.second;
8892     if (PartialStruct.HasCompleteRecord) {
8893       LBAddr = PartialStruct.LB;
8894       HBAddr = PartialStruct.LB;
8895     }
8896     CombinedInfo.Exprs.push_back(VD);
8897     // Base is the base of the struct
8898     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8899     // Pointer is the address of the lowest element
8900     llvm::Value *LB = LBAddr.getPointer();
8901     CombinedInfo.Pointers.push_back(LB);
8902     // There should not be a mapper for a combined entry.
8903     CombinedInfo.Mappers.push_back(nullptr);
8904     // Size is (addr of {highest+1} element) - (addr of lowest element)
8905     llvm::Value *HB = HBAddr.getPointer();
8906     llvm::Value *HAddr =
8907         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8908     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8909     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8910     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8911     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8912                                                   /*isSigned=*/false);
8913     CombinedInfo.Sizes.push_back(Size);
8914     // Map type is always TARGET_PARAM, if generate info for captures.
8915     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8916                                                  : OMP_MAP_TARGET_PARAM);
8917     // If any element has the present modifier, then make sure the runtime
8918     // doesn't attempt to allocate the struct.
8919     if (CurTypes.end() !=
8920         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8921           return Type & OMP_MAP_PRESENT;
8922         }))
8923       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8924     // Remove TARGET_PARAM flag from the first element
8925     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8926 
8927     // All other current entries will be MEMBER_OF the combined entry
8928     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8929     // 0xFFFF in the MEMBER_OF field).
8930     OpenMPOffloadMappingFlags MemberOfFlag =
8931         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8932     for (auto &M : CurTypes)
8933       setCorrectMemberOfFlag(M, MemberOfFlag);
8934   }
8935 
8936   /// Generate all the base pointers, section pointers, sizes, map types, and
8937   /// mappers for the extracted mappable expressions (all included in \a
8938   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8939   /// pair of the relevant declaration and index where it occurs is appended to
8940   /// the device pointers info array.
8941   void generateAllInfo(
8942       MapCombinedInfoTy &CombinedInfo,
8943       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8944           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8945     assert(CurDir.is<const OMPExecutableDirective *>() &&
8946            "Expect a executable directive");
8947     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8948     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8949   }
8950 
8951   /// Generate all the base pointers, section pointers, sizes, map types, and
8952   /// mappers for the extracted map clauses of user-defined mapper (all included
8953   /// in \a CombinedInfo).
8954   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8955     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8956            "Expect a declare mapper directive");
8957     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8958     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8959   }
8960 
8961   /// Emit capture info for lambdas for variables captured by reference.
8962   void generateInfoForLambdaCaptures(
8963       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8964       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8965     const auto *RD = VD->getType()
8966                          .getCanonicalType()
8967                          .getNonReferenceType()
8968                          ->getAsCXXRecordDecl();
8969     if (!RD || !RD->isLambda())
8970       return;
8971     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8972     LValue VDLVal = CGF.MakeAddrLValue(
8973         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8974     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8975     FieldDecl *ThisCapture = nullptr;
8976     RD->getCaptureFields(Captures, ThisCapture);
8977     if (ThisCapture) {
8978       LValue ThisLVal =
8979           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8980       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8981       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8982                                  VDLVal.getPointer(CGF));
8983       CombinedInfo.Exprs.push_back(VD);
8984       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8985       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8986       CombinedInfo.Sizes.push_back(
8987           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8988                                     CGF.Int64Ty, /*isSigned=*/true));
8989       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8990                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8991       CombinedInfo.Mappers.push_back(nullptr);
8992     }
8993     for (const LambdaCapture &LC : RD->captures()) {
8994       if (!LC.capturesVariable())
8995         continue;
8996       const VarDecl *VD = LC.getCapturedVar();
8997       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8998         continue;
8999       auto It = Captures.find(VD);
9000       assert(It != Captures.end() && "Found lambda capture without field.");
9001       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9002       if (LC.getCaptureKind() == LCK_ByRef) {
9003         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9004         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9005                                    VDLVal.getPointer(CGF));
9006         CombinedInfo.Exprs.push_back(VD);
9007         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9008         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9009         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9010             CGF.getTypeSize(
9011                 VD->getType().getCanonicalType().getNonReferenceType()),
9012             CGF.Int64Ty, /*isSigned=*/true));
9013       } else {
9014         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9015         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9016                                    VDLVal.getPointer(CGF));
9017         CombinedInfo.Exprs.push_back(VD);
9018         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9019         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9020         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9021       }
9022       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9023                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9024       CombinedInfo.Mappers.push_back(nullptr);
9025     }
9026   }
9027 
9028   /// Set correct indices for lambdas captures.
9029   void adjustMemberOfForLambdaCaptures(
9030       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9031       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9032       MapFlagsArrayTy &Types) const {
9033     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9034       // Set correct member_of idx for all implicit lambda captures.
9035       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9036                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9037         continue;
9038       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9039       assert(BasePtr && "Unable to find base lambda address.");
9040       int TgtIdx = -1;
9041       for (unsigned J = I; J > 0; --J) {
9042         unsigned Idx = J - 1;
9043         if (Pointers[Idx] != BasePtr)
9044           continue;
9045         TgtIdx = Idx;
9046         break;
9047       }
9048       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9049       // All other current entries will be MEMBER_OF the combined entry
9050       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9051       // 0xFFFF in the MEMBER_OF field).
9052       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9053       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9054     }
9055   }
9056 
9057   /// Generate the base pointers, section pointers, sizes, map types, and
9058   /// mappers associated to a given capture (all included in \a CombinedInfo).
9059   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9060                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9061                               StructRangeInfoTy &PartialStruct) const {
9062     assert(!Cap->capturesVariableArrayType() &&
9063            "Not expecting to generate map info for a variable array type!");
9064 
9065     // We need to know when we generating information for the first component
9066     const ValueDecl *VD = Cap->capturesThis()
9067                               ? nullptr
9068                               : Cap->getCapturedVar()->getCanonicalDecl();
9069 
9070     // If this declaration appears in a is_device_ptr clause we just have to
9071     // pass the pointer by value. If it is a reference to a declaration, we just
9072     // pass its value.
9073     if (DevPointersMap.count(VD)) {
9074       CombinedInfo.Exprs.push_back(VD);
9075       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9076       CombinedInfo.Pointers.push_back(Arg);
9077       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9078           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9079           /*isSigned=*/true));
9080       CombinedInfo.Types.push_back(
9081           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9082           OMP_MAP_TARGET_PARAM);
9083       CombinedInfo.Mappers.push_back(nullptr);
9084       return;
9085     }
9086 
9087     using MapData =
9088         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9089                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9090                    const ValueDecl *, const Expr *>;
9091     SmallVector<MapData, 4> DeclComponentLists;
9092     assert(CurDir.is<const OMPExecutableDirective *>() &&
9093            "Expect a executable directive");
9094     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9095     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9096       const auto *EI = C->getVarRefs().begin();
9097       for (const auto L : C->decl_component_lists(VD)) {
9098         const ValueDecl *VDecl, *Mapper;
9099         // The Expression is not correct if the mapping is implicit
9100         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9101         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9102         std::tie(VDecl, Components, Mapper) = L;
9103         assert(VDecl == VD && "We got information for the wrong declaration??");
9104         assert(!Components.empty() &&
9105                "Not expecting declaration with no component lists.");
9106         DeclComponentLists.emplace_back(Components, C->getMapType(),
9107                                         C->getMapTypeModifiers(),
9108                                         C->isImplicit(), Mapper, E);
9109         ++EI;
9110       }
9111     }
9112     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9113                                              const MapData &RHS) {
9114       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9115       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9116       bool HasPresent = !MapModifiers.empty() &&
9117                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9118                           return K == clang::OMPC_MAP_MODIFIER_present;
9119                         });
9120       bool HasAllocs = MapType == OMPC_MAP_alloc;
9121       MapModifiers = std::get<2>(RHS);
9122       MapType = std::get<1>(LHS);
9123       bool HasPresentR =
9124           !MapModifiers.empty() &&
9125           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
9126             return K == clang::OMPC_MAP_MODIFIER_present;
9127           });
9128       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9129       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9130     });
9131 
9132     // Find overlapping elements (including the offset from the base element).
9133     llvm::SmallDenseMap<
9134         const MapData *,
9135         llvm::SmallVector<
9136             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9137         4>
9138         OverlappedData;
9139     size_t Count = 0;
9140     for (const MapData &L : DeclComponentLists) {
9141       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9142       OpenMPMapClauseKind MapType;
9143       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9144       bool IsImplicit;
9145       const ValueDecl *Mapper;
9146       const Expr *VarRef;
9147       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9148           L;
9149       ++Count;
9150       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9151         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9152         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9153                  VarRef) = L1;
9154         auto CI = Components.rbegin();
9155         auto CE = Components.rend();
9156         auto SI = Components1.rbegin();
9157         auto SE = Components1.rend();
9158         for (; CI != CE && SI != SE; ++CI, ++SI) {
9159           if (CI->getAssociatedExpression()->getStmtClass() !=
9160               SI->getAssociatedExpression()->getStmtClass())
9161             break;
9162           // Are we dealing with different variables/fields?
9163           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9164             break;
9165         }
9166         // Found overlapping if, at least for one component, reached the head
9167         // of the components list.
9168         if (CI == CE || SI == SE) {
9169           // Ignore it if it is the same component.
9170           if (CI == CE && SI == SE)
9171             continue;
9172           const auto It = (SI == SE) ? CI : SI;
9173           // If one component is a pointer and another one is a kind of
9174           // dereference of this pointer (array subscript, section, dereference,
9175           // etc.), it is not an overlapping.
9176           // Same, if one component is a base and another component is a
9177           // dereferenced pointer memberexpr with the same base.
9178           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9179               (std::prev(It)->getAssociatedDeclaration() &&
9180                std::prev(It)
9181                    ->getAssociatedDeclaration()
9182                    ->getType()
9183                    ->isPointerType()) ||
9184               (It->getAssociatedDeclaration() &&
9185                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9186                std::next(It) != CE && std::next(It) != SE))
9187             continue;
9188           const MapData &BaseData = CI == CE ? L : L1;
9189           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9190               SI == SE ? Components : Components1;
9191           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9192           OverlappedElements.getSecond().push_back(SubData);
9193         }
9194       }
9195     }
9196     // Sort the overlapped elements for each item.
9197     llvm::SmallVector<const FieldDecl *, 4> Layout;
9198     if (!OverlappedData.empty()) {
9199       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9200       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9201       while (BaseType != OrigType) {
9202         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9203         OrigType = BaseType->getPointeeOrArrayElementType();
9204       }
9205 
9206       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9207         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9208       else {
9209         const auto *RD = BaseType->getAsRecordDecl();
9210         Layout.append(RD->field_begin(), RD->field_end());
9211       }
9212     }
9213     for (auto &Pair : OverlappedData) {
9214       llvm::stable_sort(
9215           Pair.getSecond(),
9216           [&Layout](
9217               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9218               OMPClauseMappableExprCommon::MappableExprComponentListRef
9219                   Second) {
9220             auto CI = First.rbegin();
9221             auto CE = First.rend();
9222             auto SI = Second.rbegin();
9223             auto SE = Second.rend();
9224             for (; CI != CE && SI != SE; ++CI, ++SI) {
9225               if (CI->getAssociatedExpression()->getStmtClass() !=
9226                   SI->getAssociatedExpression()->getStmtClass())
9227                 break;
9228               // Are we dealing with different variables/fields?
9229               if (CI->getAssociatedDeclaration() !=
9230                   SI->getAssociatedDeclaration())
9231                 break;
9232             }
9233 
9234             // Lists contain the same elements.
9235             if (CI == CE && SI == SE)
9236               return false;
9237 
9238             // List with less elements is less than list with more elements.
9239             if (CI == CE || SI == SE)
9240               return CI == CE;
9241 
9242             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9243             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9244             if (FD1->getParent() == FD2->getParent())
9245               return FD1->getFieldIndex() < FD2->getFieldIndex();
9246             const auto *It =
9247                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9248                   return FD == FD1 || FD == FD2;
9249                 });
9250             return *It == FD1;
9251           });
9252     }
9253 
9254     // Associated with a capture, because the mapping flags depend on it.
9255     // Go through all of the elements with the overlapped elements.
9256     bool IsFirstComponentList = true;
9257     for (const auto &Pair : OverlappedData) {
9258       const MapData &L = *Pair.getFirst();
9259       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9260       OpenMPMapClauseKind MapType;
9261       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9262       bool IsImplicit;
9263       const ValueDecl *Mapper;
9264       const Expr *VarRef;
9265       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9266           L;
9267       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9268           OverlappedComponents = Pair.getSecond();
9269       generateInfoForComponentList(
9270           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9271           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9272           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9273       IsFirstComponentList = false;
9274     }
9275     // Go through other elements without overlapped elements.
9276     for (const MapData &L : DeclComponentLists) {
9277       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9278       OpenMPMapClauseKind MapType;
9279       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9280       bool IsImplicit;
9281       const ValueDecl *Mapper;
9282       const Expr *VarRef;
9283       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9284           L;
9285       auto It = OverlappedData.find(&L);
9286       if (It == OverlappedData.end())
9287         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9288                                      Components, CombinedInfo, PartialStruct,
9289                                      IsFirstComponentList, IsImplicit, Mapper,
9290                                      /*ForDeviceAddr=*/false, VD, VarRef);
9291       IsFirstComponentList = false;
9292     }
9293   }
9294 
9295   /// Generate the default map information for a given capture \a CI,
9296   /// record field declaration \a RI and captured value \a CV.
9297   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9298                               const FieldDecl &RI, llvm::Value *CV,
9299                               MapCombinedInfoTy &CombinedInfo) const {
9300     bool IsImplicit = true;
9301     // Do the default mapping.
9302     if (CI.capturesThis()) {
9303       CombinedInfo.Exprs.push_back(nullptr);
9304       CombinedInfo.BasePointers.push_back(CV);
9305       CombinedInfo.Pointers.push_back(CV);
9306       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9307       CombinedInfo.Sizes.push_back(
9308           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9309                                     CGF.Int64Ty, /*isSigned=*/true));
9310       // Default map type.
9311       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9312     } else if (CI.capturesVariableByCopy()) {
9313       const VarDecl *VD = CI.getCapturedVar();
9314       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9315       CombinedInfo.BasePointers.push_back(CV);
9316       CombinedInfo.Pointers.push_back(CV);
9317       if (!RI.getType()->isAnyPointerType()) {
9318         // We have to signal to the runtime captures passed by value that are
9319         // not pointers.
9320         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9321         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9322             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9323       } else {
9324         // Pointers are implicitly mapped with a zero size and no flags
9325         // (other than first map that is added for all implicit maps).
9326         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9327         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9328       }
9329       auto I = FirstPrivateDecls.find(VD);
9330       if (I != FirstPrivateDecls.end())
9331         IsImplicit = I->getSecond();
9332     } else {
9333       assert(CI.capturesVariable() && "Expected captured reference.");
9334       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9335       QualType ElementType = PtrTy->getPointeeType();
9336       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9337           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9338       // The default map type for a scalar/complex type is 'to' because by
9339       // default the value doesn't have to be retrieved. For an aggregate
9340       // type, the default is 'tofrom'.
9341       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9342       const VarDecl *VD = CI.getCapturedVar();
9343       auto I = FirstPrivateDecls.find(VD);
9344       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9345       CombinedInfo.BasePointers.push_back(CV);
9346       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9347         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9348             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9349             AlignmentSource::Decl));
9350         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9351       } else {
9352         CombinedInfo.Pointers.push_back(CV);
9353       }
9354       if (I != FirstPrivateDecls.end())
9355         IsImplicit = I->getSecond();
9356     }
9357     // Every default map produces a single argument which is a target parameter.
9358     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9359 
9360     // Add flag stating this is an implicit map.
9361     if (IsImplicit)
9362       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9363 
9364     // No user-defined mapper for default mapping.
9365     CombinedInfo.Mappers.push_back(nullptr);
9366   }
9367 };
9368 } // anonymous namespace
9369 
9370 static void emitNonContiguousDescriptor(
9371     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9372     CGOpenMPRuntime::TargetDataInfo &Info) {
9373   CodeGenModule &CGM = CGF.CGM;
9374   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9375       &NonContigInfo = CombinedInfo.NonContigInfo;
9376 
9377   // Build an array of struct descriptor_dim and then assign it to
9378   // offload_args.
9379   //
9380   // struct descriptor_dim {
9381   //  uint64_t offset;
9382   //  uint64_t count;
9383   //  uint64_t stride
9384   // };
9385   ASTContext &C = CGF.getContext();
9386   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9387   RecordDecl *RD;
9388   RD = C.buildImplicitRecord("descriptor_dim");
9389   RD->startDefinition();
9390   addFieldToRecordDecl(C, RD, Int64Ty);
9391   addFieldToRecordDecl(C, RD, Int64Ty);
9392   addFieldToRecordDecl(C, RD, Int64Ty);
9393   RD->completeDefinition();
9394   QualType DimTy = C.getRecordType(RD);
9395 
9396   enum { OffsetFD = 0, CountFD, StrideFD };
9397   // We need two index variable here since the size of "Dims" is the same as the
9398   // size of Components, however, the size of offset, count, and stride is equal
9399   // to the size of base declaration that is non-contiguous.
9400   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9401     // Skip emitting ir if dimension size is 1 since it cannot be
9402     // non-contiguous.
9403     if (NonContigInfo.Dims[I] == 1)
9404       continue;
9405     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9406     QualType ArrayTy =
9407         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9408     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9409     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9410       unsigned RevIdx = EE - II - 1;
9411       LValue DimsLVal = CGF.MakeAddrLValue(
9412           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9413       // Offset
9414       LValue OffsetLVal = CGF.EmitLValueForField(
9415           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9416       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9417       // Count
9418       LValue CountLVal = CGF.EmitLValueForField(
9419           DimsLVal, *std::next(RD->field_begin(), CountFD));
9420       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9421       // Stride
9422       LValue StrideLVal = CGF.EmitLValueForField(
9423           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9424       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9425     }
9426     // args[I] = &dims
9427     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9428         DimsAddr, CGM.Int8PtrTy);
9429     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9430         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9431         Info.PointersArray, 0, I);
9432     Address PAddr(P, CGF.getPointerAlign());
9433     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9434     ++L;
9435   }
9436 }
9437 
9438 // Try to extract the base declaration from a `this->x` expression if possible.
9439 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9440   if (!E)
9441     return nullptr;
9442 
9443   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9444     if (const MemberExpr *ME =
9445             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9446       return ME->getMemberDecl();
9447   return nullptr;
9448 }
9449 
9450 /// Emit a string constant containing the names of the values mapped to the
9451 /// offloading runtime library.
9452 llvm::Constant *
9453 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9454                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9455 
9456   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9457     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9458 
9459   SourceLocation Loc;
9460   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9461     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9462       Loc = VD->getLocation();
9463     else
9464       Loc = MapExprs.getMapExpr()->getExprLoc();
9465   } else {
9466     Loc = MapExprs.getMapDecl()->getLocation();
9467   }
9468 
9469   std::string ExprName = "";
9470   if (MapExprs.getMapExpr()) {
9471     PrintingPolicy P(CGF.getContext().getLangOpts());
9472     llvm::raw_string_ostream OS(ExprName);
9473     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9474     OS.flush();
9475   } else {
9476     ExprName = MapExprs.getMapDecl()->getNameAsString();
9477   }
9478 
9479   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9480   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9481                                          PLoc.getLine(), PLoc.getColumn());
9482 }
9483 
9484 /// Emit the arrays used to pass the captures and map information to the
9485 /// offloading runtime library. If there is no map or capture information,
9486 /// return nullptr by reference.
9487 static void emitOffloadingArrays(
9488     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9489     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9490     bool IsNonContiguous = false) {
9491   CodeGenModule &CGM = CGF.CGM;
9492   ASTContext &Ctx = CGF.getContext();
9493 
9494   // Reset the array information.
9495   Info.clearArrayInfo();
9496   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9497 
9498   if (Info.NumberOfPtrs) {
9499     // Detect if we have any capture size requiring runtime evaluation of the
9500     // size so that a constant array could be eventually used.
9501     bool hasRuntimeEvaluationCaptureSize = false;
9502     for (llvm::Value *S : CombinedInfo.Sizes)
9503       if (!isa<llvm::Constant>(S)) {
9504         hasRuntimeEvaluationCaptureSize = true;
9505         break;
9506       }
9507 
9508     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9509     QualType PointerArrayType = Ctx.getConstantArrayType(
9510         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9511         /*IndexTypeQuals=*/0);
9512 
9513     Info.BasePointersArray =
9514         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9515     Info.PointersArray =
9516         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9517     Address MappersArray =
9518         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9519     Info.MappersArray = MappersArray.getPointer();
9520 
9521     // If we don't have any VLA types or other types that require runtime
9522     // evaluation, we can use a constant array for the map sizes, otherwise we
9523     // need to fill up the arrays as we do for the pointers.
9524     QualType Int64Ty =
9525         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9526     if (hasRuntimeEvaluationCaptureSize) {
9527       QualType SizeArrayType = Ctx.getConstantArrayType(
9528           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9529           /*IndexTypeQuals=*/0);
9530       Info.SizesArray =
9531           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9532     } else {
9533       // We expect all the sizes to be constant, so we collect them to create
9534       // a constant array.
9535       SmallVector<llvm::Constant *, 16> ConstSizes;
9536       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9537         if (IsNonContiguous &&
9538             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9539           ConstSizes.push_back(llvm::ConstantInt::get(
9540               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9541         } else {
9542           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9543         }
9544       }
9545 
9546       auto *SizesArrayInit = llvm::ConstantArray::get(
9547           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9548       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9549       auto *SizesArrayGbl = new llvm::GlobalVariable(
9550           CGM.getModule(), SizesArrayInit->getType(),
9551           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9552           SizesArrayInit, Name);
9553       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9554       Info.SizesArray = SizesArrayGbl;
9555     }
9556 
9557     // The map types are always constant so we don't need to generate code to
9558     // fill arrays. Instead, we create an array constant.
9559     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9560     llvm::copy(CombinedInfo.Types, Mapping.begin());
9561     std::string MaptypesName =
9562         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9563     auto *MapTypesArrayGbl =
9564         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9565     Info.MapTypesArray = MapTypesArrayGbl;
9566 
9567     // The information types are only built if there is debug information
9568     // requested.
9569     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9570       Info.MapNamesArray = llvm::Constant::getNullValue(
9571           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9572     } else {
9573       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9574         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9575       };
9576       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9577       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9578       std::string MapnamesName =
9579           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9580       auto *MapNamesArrayGbl =
9581           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9582       Info.MapNamesArray = MapNamesArrayGbl;
9583     }
9584 
9585     // If there's a present map type modifier, it must not be applied to the end
9586     // of a region, so generate a separate map type array in that case.
9587     if (Info.separateBeginEndCalls()) {
9588       bool EndMapTypesDiffer = false;
9589       for (uint64_t &Type : Mapping) {
9590         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9591           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9592           EndMapTypesDiffer = true;
9593         }
9594       }
9595       if (EndMapTypesDiffer) {
9596         MapTypesArrayGbl =
9597             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9598         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9599       }
9600     }
9601 
9602     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9603       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9604       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9605           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9606           Info.BasePointersArray, 0, I);
9607       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9608           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9609       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9610       CGF.Builder.CreateStore(BPVal, BPAddr);
9611 
9612       if (Info.requiresDevicePointerInfo())
9613         if (const ValueDecl *DevVD =
9614                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9615           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9616 
9617       llvm::Value *PVal = CombinedInfo.Pointers[I];
9618       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9619           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9620           Info.PointersArray, 0, I);
9621       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9622           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9623       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9624       CGF.Builder.CreateStore(PVal, PAddr);
9625 
9626       if (hasRuntimeEvaluationCaptureSize) {
9627         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9628             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9629             Info.SizesArray,
9630             /*Idx0=*/0,
9631             /*Idx1=*/I);
9632         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9633         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9634                                                           CGM.Int64Ty,
9635                                                           /*isSigned=*/true),
9636                                 SAddr);
9637       }
9638 
9639       // Fill up the mapper array.
9640       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9641       if (CombinedInfo.Mappers[I]) {
9642         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9643             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9644         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9645         Info.HasMapper = true;
9646       }
9647       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9648       CGF.Builder.CreateStore(MFunc, MAddr);
9649     }
9650   }
9651 
9652   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9653       Info.NumberOfPtrs == 0)
9654     return;
9655 
9656   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9657 }
9658 
9659 namespace {
9660 /// Additional arguments for emitOffloadingArraysArgument function.
9661 struct ArgumentsOptions {
9662   bool ForEndCall = false;
9663   ArgumentsOptions() = default;
9664   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9665 };
9666 } // namespace
9667 
9668 /// Emit the arguments to be passed to the runtime library based on the
9669 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9670 /// ForEndCall, emit map types to be passed for the end of the region instead of
9671 /// the beginning.
9672 static void emitOffloadingArraysArgument(
9673     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9674     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9675     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9676     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9677     const ArgumentsOptions &Options = ArgumentsOptions()) {
9678   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9679          "expected region end call to runtime only when end call is separate");
9680   CodeGenModule &CGM = CGF.CGM;
9681   if (Info.NumberOfPtrs) {
9682     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9683         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9684         Info.BasePointersArray,
9685         /*Idx0=*/0, /*Idx1=*/0);
9686     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9687         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9688         Info.PointersArray,
9689         /*Idx0=*/0,
9690         /*Idx1=*/0);
9691     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9692         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9693         /*Idx0=*/0, /*Idx1=*/0);
9694     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9695         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9696         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9697                                                     : Info.MapTypesArray,
9698         /*Idx0=*/0,
9699         /*Idx1=*/0);
9700 
9701     // Only emit the mapper information arrays if debug information is
9702     // requested.
9703     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9704       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9705     else
9706       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9707           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9708           Info.MapNamesArray,
9709           /*Idx0=*/0,
9710           /*Idx1=*/0);
9711     // If there is no user-defined mapper, set the mapper array to nullptr to
9712     // avoid an unnecessary data privatization
9713     if (!Info.HasMapper)
9714       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9715     else
9716       MappersArrayArg =
9717           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9718   } else {
9719     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9720     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9721     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9722     MapTypesArrayArg =
9723         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9724     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9725     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9726   }
9727 }
9728 
9729 /// Check for inner distribute directive.
9730 static const OMPExecutableDirective *
9731 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9732   const auto *CS = D.getInnermostCapturedStmt();
9733   const auto *Body =
9734       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9735   const Stmt *ChildStmt =
9736       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9737 
9738   if (const auto *NestedDir =
9739           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9740     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9741     switch (D.getDirectiveKind()) {
9742     case OMPD_target:
9743       if (isOpenMPDistributeDirective(DKind))
9744         return NestedDir;
9745       if (DKind == OMPD_teams) {
9746         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9747             /*IgnoreCaptured=*/true);
9748         if (!Body)
9749           return nullptr;
9750         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9751         if (const auto *NND =
9752                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9753           DKind = NND->getDirectiveKind();
9754           if (isOpenMPDistributeDirective(DKind))
9755             return NND;
9756         }
9757       }
9758       return nullptr;
9759     case OMPD_target_teams:
9760       if (isOpenMPDistributeDirective(DKind))
9761         return NestedDir;
9762       return nullptr;
9763     case OMPD_target_parallel:
9764     case OMPD_target_simd:
9765     case OMPD_target_parallel_for:
9766     case OMPD_target_parallel_for_simd:
9767       return nullptr;
9768     case OMPD_target_teams_distribute:
9769     case OMPD_target_teams_distribute_simd:
9770     case OMPD_target_teams_distribute_parallel_for:
9771     case OMPD_target_teams_distribute_parallel_for_simd:
9772     case OMPD_parallel:
9773     case OMPD_for:
9774     case OMPD_parallel_for:
9775     case OMPD_parallel_master:
9776     case OMPD_parallel_sections:
9777     case OMPD_for_simd:
9778     case OMPD_parallel_for_simd:
9779     case OMPD_cancel:
9780     case OMPD_cancellation_point:
9781     case OMPD_ordered:
9782     case OMPD_threadprivate:
9783     case OMPD_allocate:
9784     case OMPD_task:
9785     case OMPD_simd:
9786     case OMPD_tile:
9787     case OMPD_unroll:
9788     case OMPD_sections:
9789     case OMPD_section:
9790     case OMPD_single:
9791     case OMPD_master:
9792     case OMPD_critical:
9793     case OMPD_taskyield:
9794     case OMPD_barrier:
9795     case OMPD_taskwait:
9796     case OMPD_taskgroup:
9797     case OMPD_atomic:
9798     case OMPD_flush:
9799     case OMPD_depobj:
9800     case OMPD_scan:
9801     case OMPD_teams:
9802     case OMPD_target_data:
9803     case OMPD_target_exit_data:
9804     case OMPD_target_enter_data:
9805     case OMPD_distribute:
9806     case OMPD_distribute_simd:
9807     case OMPD_distribute_parallel_for:
9808     case OMPD_distribute_parallel_for_simd:
9809     case OMPD_teams_distribute:
9810     case OMPD_teams_distribute_simd:
9811     case OMPD_teams_distribute_parallel_for:
9812     case OMPD_teams_distribute_parallel_for_simd:
9813     case OMPD_target_update:
9814     case OMPD_declare_simd:
9815     case OMPD_declare_variant:
9816     case OMPD_begin_declare_variant:
9817     case OMPD_end_declare_variant:
9818     case OMPD_declare_target:
9819     case OMPD_end_declare_target:
9820     case OMPD_declare_reduction:
9821     case OMPD_declare_mapper:
9822     case OMPD_taskloop:
9823     case OMPD_taskloop_simd:
9824     case OMPD_master_taskloop:
9825     case OMPD_master_taskloop_simd:
9826     case OMPD_parallel_master_taskloop:
9827     case OMPD_parallel_master_taskloop_simd:
9828     case OMPD_requires:
9829     case OMPD_unknown:
9830     default:
9831       llvm_unreachable("Unexpected directive.");
9832     }
9833   }
9834 
9835   return nullptr;
9836 }
9837 
9838 /// Emit the user-defined mapper function. The code generation follows the
9839 /// pattern in the example below.
9840 /// \code
9841 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9842 ///                                           void *base, void *begin,
9843 ///                                           int64_t size, int64_t type,
9844 ///                                           void *name = nullptr) {
9845 ///   // Allocate space for an array section first or add a base/begin for
9846 ///   // pointer dereference.
9847 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9848 ///       !maptype.IsDelete)
9849 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9850 ///                                 size*sizeof(Ty), clearToFromMember(type));
9851 ///   // Map members.
9852 ///   for (unsigned i = 0; i < size; i++) {
9853 ///     // For each component specified by this mapper:
9854 ///     for (auto c : begin[i]->all_components) {
9855 ///       if (c.hasMapper())
9856 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9857 ///                       c.arg_type, c.arg_name);
9858 ///       else
9859 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9860 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9861 ///                                     c.arg_name);
9862 ///     }
9863 ///   }
9864 ///   // Delete the array section.
9865 ///   if (size > 1 && maptype.IsDelete)
9866 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9867 ///                                 size*sizeof(Ty), clearToFromMember(type));
9868 /// }
9869 /// \endcode
9870 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9871                                             CodeGenFunction *CGF) {
9872   if (UDMMap.count(D) > 0)
9873     return;
9874   ASTContext &C = CGM.getContext();
9875   QualType Ty = D->getType();
9876   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9877   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9878   auto *MapperVarDecl =
9879       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9880   SourceLocation Loc = D->getLocation();
9881   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9882 
9883   // Prepare mapper function arguments and attributes.
9884   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9885                               C.VoidPtrTy, ImplicitParamDecl::Other);
9886   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9887                             ImplicitParamDecl::Other);
9888   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9889                              C.VoidPtrTy, ImplicitParamDecl::Other);
9890   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9891                             ImplicitParamDecl::Other);
9892   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9893                             ImplicitParamDecl::Other);
9894   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9895                             ImplicitParamDecl::Other);
9896   FunctionArgList Args;
9897   Args.push_back(&HandleArg);
9898   Args.push_back(&BaseArg);
9899   Args.push_back(&BeginArg);
9900   Args.push_back(&SizeArg);
9901   Args.push_back(&TypeArg);
9902   Args.push_back(&NameArg);
9903   const CGFunctionInfo &FnInfo =
9904       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9905   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9906   SmallString<64> TyStr;
9907   llvm::raw_svector_ostream Out(TyStr);
9908   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9909   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9910   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9911                                     Name, &CGM.getModule());
9912   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9913   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9914   // Start the mapper function code generation.
9915   CodeGenFunction MapperCGF(CGM);
9916   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9917   // Compute the starting and end addresses of array elements.
9918   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9919       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9920       C.getPointerType(Int64Ty), Loc);
9921   // Prepare common arguments for array initiation and deletion.
9922   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9923       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9924       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9925   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9926       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9927       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9928   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9929       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9930       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9931   // Convert the size in bytes into the number of array elements.
9932   Size = MapperCGF.Builder.CreateExactUDiv(
9933       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9934   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9935       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9936   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
9937       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
9938   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9939       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9940       C.getPointerType(Int64Ty), Loc);
9941   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9942       MapperCGF.GetAddrOfLocalVar(&NameArg),
9943       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9944 
9945   // Emit array initiation if this is an array section and \p MapType indicates
9946   // that memory allocation is required.
9947   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9948   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9949                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9950 
9951   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9952 
9953   // Emit the loop header block.
9954   MapperCGF.EmitBlock(HeadBB);
9955   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9956   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9957   // Evaluate whether the initial condition is satisfied.
9958   llvm::Value *IsEmpty =
9959       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9960   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9961   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9962 
9963   // Emit the loop body block.
9964   MapperCGF.EmitBlock(BodyBB);
9965   llvm::BasicBlock *LastBB = BodyBB;
9966   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9967       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9968   PtrPHI->addIncoming(PtrBegin, EntryBB);
9969   Address PtrCurrent =
9970       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9971                           .getAlignment()
9972                           .alignmentOfArrayElement(ElementSize));
9973   // Privatize the declared variable of mapper to be the current array element.
9974   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9975   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9976   (void)Scope.Privatize();
9977 
9978   // Get map clause information. Fill up the arrays with all mapped variables.
9979   MappableExprsHandler::MapCombinedInfoTy Info;
9980   MappableExprsHandler MEHandler(*D, MapperCGF);
9981   MEHandler.generateAllInfoForMapper(Info);
9982 
9983   // Call the runtime API __tgt_mapper_num_components to get the number of
9984   // pre-existing components.
9985   llvm::Value *OffloadingArgs[] = {Handle};
9986   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9987       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9988                                             OMPRTL___tgt_mapper_num_components),
9989       OffloadingArgs);
9990   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9991       PreviousSize,
9992       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9993 
9994   // Fill up the runtime mapper handle for all components.
9995   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9996     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9997         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9998     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9999         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10000     llvm::Value *CurSizeArg = Info.Sizes[I];
10001     llvm::Value *CurNameArg =
10002         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10003             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10004             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10005 
10006     // Extract the MEMBER_OF field from the map type.
10007     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10008     llvm::Value *MemberMapType =
10009         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10010 
10011     // Combine the map type inherited from user-defined mapper with that
10012     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10013     // bits of the \a MapType, which is the input argument of the mapper
10014     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10015     // bits of MemberMapType.
10016     // [OpenMP 5.0], 1.2.6. map-type decay.
10017     //        | alloc |  to   | from  | tofrom | release | delete
10018     // ----------------------------------------------------------
10019     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10020     // to     | alloc |  to   | alloc |   to   | release | delete
10021     // from   | alloc | alloc | from  |  from  | release | delete
10022     // tofrom | alloc |  to   | from  | tofrom | release | delete
10023     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10024         MapType,
10025         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10026                                    MappableExprsHandler::OMP_MAP_FROM));
10027     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10028     llvm::BasicBlock *AllocElseBB =
10029         MapperCGF.createBasicBlock("omp.type.alloc.else");
10030     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10031     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10032     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10033     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10034     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10035     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10036     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10037     MapperCGF.EmitBlock(AllocBB);
10038     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10039         MemberMapType,
10040         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10041                                      MappableExprsHandler::OMP_MAP_FROM)));
10042     MapperCGF.Builder.CreateBr(EndBB);
10043     MapperCGF.EmitBlock(AllocElseBB);
10044     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10045         LeftToFrom,
10046         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10047     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10048     // In case of to, clear OMP_MAP_FROM.
10049     MapperCGF.EmitBlock(ToBB);
10050     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10051         MemberMapType,
10052         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10053     MapperCGF.Builder.CreateBr(EndBB);
10054     MapperCGF.EmitBlock(ToElseBB);
10055     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10056         LeftToFrom,
10057         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10058     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10059     // In case of from, clear OMP_MAP_TO.
10060     MapperCGF.EmitBlock(FromBB);
10061     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10062         MemberMapType,
10063         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10064     // In case of tofrom, do nothing.
10065     MapperCGF.EmitBlock(EndBB);
10066     LastBB = EndBB;
10067     llvm::PHINode *CurMapType =
10068         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10069     CurMapType->addIncoming(AllocMapType, AllocBB);
10070     CurMapType->addIncoming(ToMapType, ToBB);
10071     CurMapType->addIncoming(FromMapType, FromBB);
10072     CurMapType->addIncoming(MemberMapType, ToElseBB);
10073 
10074     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10075                                      CurSizeArg, CurMapType, CurNameArg};
10076     if (Info.Mappers[I]) {
10077       // Call the corresponding mapper function.
10078       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10079           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10080       assert(MapperFunc && "Expect a valid mapper function is available.");
10081       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10082     } else {
10083       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10084       // data structure.
10085       MapperCGF.EmitRuntimeCall(
10086           OMPBuilder.getOrCreateRuntimeFunction(
10087               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10088           OffloadingArgs);
10089     }
10090   }
10091 
10092   // Update the pointer to point to the next element that needs to be mapped,
10093   // and check whether we have mapped all elements.
10094   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10095   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10096       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10097   PtrPHI->addIncoming(PtrNext, LastBB);
10098   llvm::Value *IsDone =
10099       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10100   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10101   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10102 
10103   MapperCGF.EmitBlock(ExitBB);
10104   // Emit array deletion if this is an array section and \p MapType indicates
10105   // that deletion is required.
10106   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10107                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10108 
10109   // Emit the function exit block.
10110   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10111   MapperCGF.FinishFunction();
10112   UDMMap.try_emplace(D, Fn);
10113   if (CGF) {
10114     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10115     Decls.second.push_back(D);
10116   }
10117 }
10118 
10119 /// Emit the array initialization or deletion portion for user-defined mapper
10120 /// code generation. First, it evaluates whether an array section is mapped and
10121 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10122 /// true, and \a MapType indicates to not delete this array, array
10123 /// initialization code is generated. If \a IsInit is false, and \a MapType
10124 /// indicates to not this array, array deletion code is generated.
10125 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10126     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10127     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10128     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10129     bool IsInit) {
10130   StringRef Prefix = IsInit ? ".init" : ".del";
10131 
10132   // Evaluate if this is an array section.
10133   llvm::BasicBlock *BodyBB =
10134       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10135   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10136       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10137   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10138       MapType,
10139       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10140   llvm::Value *DeleteCond;
10141   llvm::Value *Cond;
10142   if (IsInit) {
10143     // base != begin?
10144     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10145         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10146     // IsPtrAndObj?
10147     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10148         MapType,
10149         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10150     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10151     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10152     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10153     DeleteCond = MapperCGF.Builder.CreateIsNull(
10154         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10155   } else {
10156     Cond = IsArray;
10157     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10158         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10159   }
10160   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10161   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10162 
10163   MapperCGF.EmitBlock(BodyBB);
10164   // Get the array size by multiplying element size and element number (i.e., \p
10165   // Size).
10166   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10167       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10168   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10169   // memory allocation/deletion purpose only.
10170   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10171       MapType,
10172       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10173                                    MappableExprsHandler::OMP_MAP_FROM)));
10174   MapTypeArg = MapperCGF.Builder.CreateOr(
10175       MapTypeArg,
10176       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10177 
10178   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10179   // data structure.
10180   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10181                                    ArraySize, MapTypeArg, MapName};
10182   MapperCGF.EmitRuntimeCall(
10183       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10184                                             OMPRTL___tgt_push_mapper_component),
10185       OffloadingArgs);
10186 }
10187 
10188 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10189     const OMPDeclareMapperDecl *D) {
10190   auto I = UDMMap.find(D);
10191   if (I != UDMMap.end())
10192     return I->second;
10193   emitUserDefinedMapper(D);
10194   return UDMMap.lookup(D);
10195 }
10196 
10197 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10198     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10199     llvm::Value *DeviceID,
10200     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10201                                      const OMPLoopDirective &D)>
10202         SizeEmitter) {
10203   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10204   const OMPExecutableDirective *TD = &D;
10205   // Get nested teams distribute kind directive, if any.
10206   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10207     TD = getNestedDistributeDirective(CGM.getContext(), D);
10208   if (!TD)
10209     return;
10210   const auto *LD = cast<OMPLoopDirective>(TD);
10211   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10212                                                          PrePostActionTy &) {
10213     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10214       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10215       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10216       CGF.EmitRuntimeCall(
10217           OMPBuilder.getOrCreateRuntimeFunction(
10218               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10219           Args);
10220     }
10221   };
10222   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10223 }
10224 
10225 void CGOpenMPRuntime::emitTargetCall(
10226     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10227     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10228     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10229     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10230                                      const OMPLoopDirective &D)>
10231         SizeEmitter) {
10232   if (!CGF.HaveInsertPoint())
10233     return;
10234 
10235   assert(OutlinedFn && "Invalid outlined function!");
10236 
10237   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10238                                  D.hasClausesOfKind<OMPNowaitClause>();
10239   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10240   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10241   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10242                                             PrePostActionTy &) {
10243     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10244   };
10245   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10246 
10247   CodeGenFunction::OMPTargetDataInfo InputInfo;
10248   llvm::Value *MapTypesArray = nullptr;
10249   llvm::Value *MapNamesArray = nullptr;
10250   // Fill up the pointer arrays and transfer execution to the device.
10251   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10252                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10253                     &CapturedVars,
10254                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10255     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10256       // Reverse offloading is not supported, so just execute on the host.
10257       if (RequiresOuterTask) {
10258         CapturedVars.clear();
10259         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10260       }
10261       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10262       return;
10263     }
10264 
10265     // On top of the arrays that were filled up, the target offloading call
10266     // takes as arguments the device id as well as the host pointer. The host
10267     // pointer is used by the runtime library to identify the current target
10268     // region, so it only has to be unique and not necessarily point to
10269     // anything. It could be the pointer to the outlined function that
10270     // implements the target region, but we aren't using that so that the
10271     // compiler doesn't need to keep that, and could therefore inline the host
10272     // function if proven worthwhile during optimization.
10273 
10274     // From this point on, we need to have an ID of the target region defined.
10275     assert(OutlinedFnID && "Invalid outlined function ID!");
10276 
10277     // Emit device ID if any.
10278     llvm::Value *DeviceID;
10279     if (Device.getPointer()) {
10280       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10281               Device.getInt() == OMPC_DEVICE_device_num) &&
10282              "Expected device_num modifier.");
10283       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10284       DeviceID =
10285           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10286     } else {
10287       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10288     }
10289 
10290     // Emit the number of elements in the offloading arrays.
10291     llvm::Value *PointerNum =
10292         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10293 
10294     // Return value of the runtime offloading call.
10295     llvm::Value *Return;
10296 
10297     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10298     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10299 
10300     // Source location for the ident struct
10301     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10302 
10303     // Emit tripcount for the target loop-based directive.
10304     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10305 
10306     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10307     // The target region is an outlined function launched by the runtime
10308     // via calls __tgt_target() or __tgt_target_teams().
10309     //
10310     // __tgt_target() launches a target region with one team and one thread,
10311     // executing a serial region.  This master thread may in turn launch
10312     // more threads within its team upon encountering a parallel region,
10313     // however, no additional teams can be launched on the device.
10314     //
10315     // __tgt_target_teams() launches a target region with one or more teams,
10316     // each with one or more threads.  This call is required for target
10317     // constructs such as:
10318     //  'target teams'
10319     //  'target' / 'teams'
10320     //  'target teams distribute parallel for'
10321     //  'target parallel'
10322     // and so on.
10323     //
10324     // Note that on the host and CPU targets, the runtime implementation of
10325     // these calls simply call the outlined function without forking threads.
10326     // The outlined functions themselves have runtime calls to
10327     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10328     // the compiler in emitTeamsCall() and emitParallelCall().
10329     //
10330     // In contrast, on the NVPTX target, the implementation of
10331     // __tgt_target_teams() launches a GPU kernel with the requested number
10332     // of teams and threads so no additional calls to the runtime are required.
10333     if (NumTeams) {
10334       // If we have NumTeams defined this means that we have an enclosed teams
10335       // region. Therefore we also expect to have NumThreads defined. These two
10336       // values should be defined in the presence of a teams directive,
10337       // regardless of having any clauses associated. If the user is using teams
10338       // but no clauses, these two values will be the default that should be
10339       // passed to the runtime library - a 32-bit integer with the value zero.
10340       assert(NumThreads && "Thread limit expression should be available along "
10341                            "with number of teams.");
10342       SmallVector<llvm::Value *> OffloadingArgs = {
10343           RTLoc,
10344           DeviceID,
10345           OutlinedFnID,
10346           PointerNum,
10347           InputInfo.BasePointersArray.getPointer(),
10348           InputInfo.PointersArray.getPointer(),
10349           InputInfo.SizesArray.getPointer(),
10350           MapTypesArray,
10351           MapNamesArray,
10352           InputInfo.MappersArray.getPointer(),
10353           NumTeams,
10354           NumThreads};
10355       if (HasNowait) {
10356         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10357         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10358         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10359         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10360         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10361         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10362       }
10363       Return = CGF.EmitRuntimeCall(
10364           OMPBuilder.getOrCreateRuntimeFunction(
10365               CGM.getModule(), HasNowait
10366                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10367                                    : OMPRTL___tgt_target_teams_mapper),
10368           OffloadingArgs);
10369     } else {
10370       SmallVector<llvm::Value *> OffloadingArgs = {
10371           RTLoc,
10372           DeviceID,
10373           OutlinedFnID,
10374           PointerNum,
10375           InputInfo.BasePointersArray.getPointer(),
10376           InputInfo.PointersArray.getPointer(),
10377           InputInfo.SizesArray.getPointer(),
10378           MapTypesArray,
10379           MapNamesArray,
10380           InputInfo.MappersArray.getPointer()};
10381       if (HasNowait) {
10382         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10383         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10384         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10385         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10386         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10387         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10388       }
10389       Return = CGF.EmitRuntimeCall(
10390           OMPBuilder.getOrCreateRuntimeFunction(
10391               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10392                                          : OMPRTL___tgt_target_mapper),
10393           OffloadingArgs);
10394     }
10395 
10396     // Check the error code and execute the host version if required.
10397     llvm::BasicBlock *OffloadFailedBlock =
10398         CGF.createBasicBlock("omp_offload.failed");
10399     llvm::BasicBlock *OffloadContBlock =
10400         CGF.createBasicBlock("omp_offload.cont");
10401     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10402     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10403 
10404     CGF.EmitBlock(OffloadFailedBlock);
10405     if (RequiresOuterTask) {
10406       CapturedVars.clear();
10407       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10408     }
10409     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10410     CGF.EmitBranch(OffloadContBlock);
10411 
10412     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10413   };
10414 
10415   // Notify that the host version must be executed.
10416   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10417                     RequiresOuterTask](CodeGenFunction &CGF,
10418                                        PrePostActionTy &) {
10419     if (RequiresOuterTask) {
10420       CapturedVars.clear();
10421       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10422     }
10423     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10424   };
10425 
10426   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10427                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10428                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10429     // Fill up the arrays with all the captured variables.
10430     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10431 
10432     // Get mappable expression information.
10433     MappableExprsHandler MEHandler(D, CGF);
10434     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10435     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10436 
10437     auto RI = CS.getCapturedRecordDecl()->field_begin();
10438     auto *CV = CapturedVars.begin();
10439     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10440                                               CE = CS.capture_end();
10441          CI != CE; ++CI, ++RI, ++CV) {
10442       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10443       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10444 
10445       // VLA sizes are passed to the outlined region by copy and do not have map
10446       // information associated.
10447       if (CI->capturesVariableArrayType()) {
10448         CurInfo.Exprs.push_back(nullptr);
10449         CurInfo.BasePointers.push_back(*CV);
10450         CurInfo.Pointers.push_back(*CV);
10451         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10452             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10453         // Copy to the device as an argument. No need to retrieve it.
10454         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10455                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10456                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10457         CurInfo.Mappers.push_back(nullptr);
10458       } else {
10459         // If we have any information in the map clause, we use it, otherwise we
10460         // just do a default mapping.
10461         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10462         if (!CI->capturesThis())
10463           MappedVarSet.insert(CI->getCapturedVar());
10464         else
10465           MappedVarSet.insert(nullptr);
10466         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10467           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10468         // Generate correct mapping for variables captured by reference in
10469         // lambdas.
10470         if (CI->capturesVariable())
10471           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10472                                                   CurInfo, LambdaPointers);
10473       }
10474       // We expect to have at least an element of information for this capture.
10475       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10476              "Non-existing map pointer for capture!");
10477       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10478              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10479              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10480              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10481              "Inconsistent map information sizes!");
10482 
10483       // If there is an entry in PartialStruct it means we have a struct with
10484       // individual members mapped. Emit an extra combined entry.
10485       if (PartialStruct.Base.isValid()) {
10486         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10487         MEHandler.emitCombinedEntry(
10488             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10489             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10490       }
10491 
10492       // We need to append the results of this capture to what we already have.
10493       CombinedInfo.append(CurInfo);
10494     }
10495     // Adjust MEMBER_OF flags for the lambdas captures.
10496     MEHandler.adjustMemberOfForLambdaCaptures(
10497         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10498         CombinedInfo.Types);
10499     // Map any list items in a map clause that were not captures because they
10500     // weren't referenced within the construct.
10501     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10502 
10503     TargetDataInfo Info;
10504     // Fill up the arrays and create the arguments.
10505     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10506     emitOffloadingArraysArgument(
10507         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10508         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10509         {/*ForEndTask=*/false});
10510 
10511     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10512     InputInfo.BasePointersArray =
10513         Address(Info.BasePointersArray, CGM.getPointerAlign());
10514     InputInfo.PointersArray =
10515         Address(Info.PointersArray, CGM.getPointerAlign());
10516     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10517     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10518     MapTypesArray = Info.MapTypesArray;
10519     MapNamesArray = Info.MapNamesArray;
10520     if (RequiresOuterTask)
10521       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10522     else
10523       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10524   };
10525 
10526   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10527                              CodeGenFunction &CGF, PrePostActionTy &) {
10528     if (RequiresOuterTask) {
10529       CodeGenFunction::OMPTargetDataInfo InputInfo;
10530       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10531     } else {
10532       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10533     }
10534   };
10535 
10536   // If we have a target function ID it means that we need to support
10537   // offloading, otherwise, just execute on the host. We need to execute on host
10538   // regardless of the conditional in the if clause if, e.g., the user do not
10539   // specify target triples.
10540   if (OutlinedFnID) {
10541     if (IfCond) {
10542       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10543     } else {
10544       RegionCodeGenTy ThenRCG(TargetThenGen);
10545       ThenRCG(CGF);
10546     }
10547   } else {
10548     RegionCodeGenTy ElseRCG(TargetElseGen);
10549     ElseRCG(CGF);
10550   }
10551 }
10552 
10553 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10554                                                     StringRef ParentName) {
10555   if (!S)
10556     return;
10557 
10558   // Codegen OMP target directives that offload compute to the device.
10559   bool RequiresDeviceCodegen =
10560       isa<OMPExecutableDirective>(S) &&
10561       isOpenMPTargetExecutionDirective(
10562           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10563 
10564   if (RequiresDeviceCodegen) {
10565     const auto &E = *cast<OMPExecutableDirective>(S);
10566     unsigned DeviceID;
10567     unsigned FileID;
10568     unsigned Line;
10569     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10570                              FileID, Line);
10571 
10572     // Is this a target region that should not be emitted as an entry point? If
10573     // so just signal we are done with this target region.
10574     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10575                                                             ParentName, Line))
10576       return;
10577 
10578     switch (E.getDirectiveKind()) {
10579     case OMPD_target:
10580       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10581                                                    cast<OMPTargetDirective>(E));
10582       break;
10583     case OMPD_target_parallel:
10584       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10585           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10586       break;
10587     case OMPD_target_teams:
10588       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10589           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10590       break;
10591     case OMPD_target_teams_distribute:
10592       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10593           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10594       break;
10595     case OMPD_target_teams_distribute_simd:
10596       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10597           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10598       break;
10599     case OMPD_target_parallel_for:
10600       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10601           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10602       break;
10603     case OMPD_target_parallel_for_simd:
10604       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10605           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10606       break;
10607     case OMPD_target_simd:
10608       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10609           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10610       break;
10611     case OMPD_target_teams_distribute_parallel_for:
10612       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10613           CGM, ParentName,
10614           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10615       break;
10616     case OMPD_target_teams_distribute_parallel_for_simd:
10617       CodeGenFunction::
10618           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10619               CGM, ParentName,
10620               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10621       break;
10622     case OMPD_parallel:
10623     case OMPD_for:
10624     case OMPD_parallel_for:
10625     case OMPD_parallel_master:
10626     case OMPD_parallel_sections:
10627     case OMPD_for_simd:
10628     case OMPD_parallel_for_simd:
10629     case OMPD_cancel:
10630     case OMPD_cancellation_point:
10631     case OMPD_ordered:
10632     case OMPD_threadprivate:
10633     case OMPD_allocate:
10634     case OMPD_task:
10635     case OMPD_simd:
10636     case OMPD_tile:
10637     case OMPD_unroll:
10638     case OMPD_sections:
10639     case OMPD_section:
10640     case OMPD_single:
10641     case OMPD_master:
10642     case OMPD_critical:
10643     case OMPD_taskyield:
10644     case OMPD_barrier:
10645     case OMPD_taskwait:
10646     case OMPD_taskgroup:
10647     case OMPD_atomic:
10648     case OMPD_flush:
10649     case OMPD_depobj:
10650     case OMPD_scan:
10651     case OMPD_teams:
10652     case OMPD_target_data:
10653     case OMPD_target_exit_data:
10654     case OMPD_target_enter_data:
10655     case OMPD_distribute:
10656     case OMPD_distribute_simd:
10657     case OMPD_distribute_parallel_for:
10658     case OMPD_distribute_parallel_for_simd:
10659     case OMPD_teams_distribute:
10660     case OMPD_teams_distribute_simd:
10661     case OMPD_teams_distribute_parallel_for:
10662     case OMPD_teams_distribute_parallel_for_simd:
10663     case OMPD_target_update:
10664     case OMPD_declare_simd:
10665     case OMPD_declare_variant:
10666     case OMPD_begin_declare_variant:
10667     case OMPD_end_declare_variant:
10668     case OMPD_declare_target:
10669     case OMPD_end_declare_target:
10670     case OMPD_declare_reduction:
10671     case OMPD_declare_mapper:
10672     case OMPD_taskloop:
10673     case OMPD_taskloop_simd:
10674     case OMPD_master_taskloop:
10675     case OMPD_master_taskloop_simd:
10676     case OMPD_parallel_master_taskloop:
10677     case OMPD_parallel_master_taskloop_simd:
10678     case OMPD_requires:
10679     case OMPD_unknown:
10680     default:
10681       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10682     }
10683     return;
10684   }
10685 
10686   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10687     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10688       return;
10689 
10690     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10691     return;
10692   }
10693 
10694   // If this is a lambda function, look into its body.
10695   if (const auto *L = dyn_cast<LambdaExpr>(S))
10696     S = L->getBody();
10697 
10698   // Keep looking for target regions recursively.
10699   for (const Stmt *II : S->children())
10700     scanForTargetRegionsFunctions(II, ParentName);
10701 }
10702 
10703 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10704   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10705       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10706   if (!DevTy)
10707     return false;
10708   // Do not emit device_type(nohost) functions for the host.
10709   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10710     return true;
10711   // Do not emit device_type(host) functions for the device.
10712   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10713     return true;
10714   return false;
10715 }
10716 
10717 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10718   // If emitting code for the host, we do not process FD here. Instead we do
10719   // the normal code generation.
10720   if (!CGM.getLangOpts().OpenMPIsDevice) {
10721     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10722       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10723                                   CGM.getLangOpts().OpenMPIsDevice))
10724         return true;
10725     return false;
10726   }
10727 
10728   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10729   // Try to detect target regions in the function.
10730   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10731     StringRef Name = CGM.getMangledName(GD);
10732     scanForTargetRegionsFunctions(FD->getBody(), Name);
10733     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10734                                 CGM.getLangOpts().OpenMPIsDevice))
10735       return true;
10736   }
10737 
10738   // Do not to emit function if it is not marked as declare target.
10739   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10740          AlreadyEmittedTargetDecls.count(VD) == 0;
10741 }
10742 
10743 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10744   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10745                               CGM.getLangOpts().OpenMPIsDevice))
10746     return true;
10747 
10748   if (!CGM.getLangOpts().OpenMPIsDevice)
10749     return false;
10750 
10751   // Check if there are Ctors/Dtors in this declaration and look for target
10752   // regions in it. We use the complete variant to produce the kernel name
10753   // mangling.
10754   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10755   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10756     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10757       StringRef ParentName =
10758           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10759       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10760     }
10761     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10762       StringRef ParentName =
10763           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10764       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10765     }
10766   }
10767 
10768   // Do not to emit variable if it is not marked as declare target.
10769   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10770       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10771           cast<VarDecl>(GD.getDecl()));
10772   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10773       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10774        HasRequiresUnifiedSharedMemory)) {
10775     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10776     return true;
10777   }
10778   return false;
10779 }
10780 
10781 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10782                                                    llvm::Constant *Addr) {
10783   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10784       !CGM.getLangOpts().OpenMPIsDevice)
10785     return;
10786 
10787   // If we have host/nohost variables, they do not need to be registered.
10788   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10789       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10790   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10791     return;
10792 
10793   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10794       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10795   if (!Res) {
10796     if (CGM.getLangOpts().OpenMPIsDevice) {
10797       // Register non-target variables being emitted in device code (debug info
10798       // may cause this).
10799       StringRef VarName = CGM.getMangledName(VD);
10800       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10801     }
10802     return;
10803   }
10804   // Register declare target variables.
10805   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10806   StringRef VarName;
10807   CharUnits VarSize;
10808   llvm::GlobalValue::LinkageTypes Linkage;
10809 
10810   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10811       !HasRequiresUnifiedSharedMemory) {
10812     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10813     VarName = CGM.getMangledName(VD);
10814     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10815       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10816       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10817     } else {
10818       VarSize = CharUnits::Zero();
10819     }
10820     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10821     // Temp solution to prevent optimizations of the internal variables.
10822     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10823       // Do not create a "ref-variable" if the original is not also available
10824       // on the host.
10825       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10826         return;
10827       std::string RefName = getName({VarName, "ref"});
10828       if (!CGM.GetGlobalValue(RefName)) {
10829         llvm::Constant *AddrRef =
10830             getOrCreateInternalVariable(Addr->getType(), RefName);
10831         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10832         GVAddrRef->setConstant(/*Val=*/true);
10833         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10834         GVAddrRef->setInitializer(Addr);
10835         CGM.addCompilerUsedGlobal(GVAddrRef);
10836       }
10837     }
10838   } else {
10839     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10840             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10841              HasRequiresUnifiedSharedMemory)) &&
10842            "Declare target attribute must link or to with unified memory.");
10843     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10844       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10845     else
10846       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10847 
10848     if (CGM.getLangOpts().OpenMPIsDevice) {
10849       VarName = Addr->getName();
10850       Addr = nullptr;
10851     } else {
10852       VarName = getAddrOfDeclareTargetVar(VD).getName();
10853       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10854     }
10855     VarSize = CGM.getPointerSize();
10856     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10857   }
10858 
10859   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10860       VarName, Addr, VarSize, Flags, Linkage);
10861 }
10862 
10863 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10864   if (isa<FunctionDecl>(GD.getDecl()) ||
10865       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10866     return emitTargetFunctions(GD);
10867 
10868   return emitTargetGlobalVariable(GD);
10869 }
10870 
10871 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10872   for (const VarDecl *VD : DeferredGlobalVariables) {
10873     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10874         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10875     if (!Res)
10876       continue;
10877     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10878         !HasRequiresUnifiedSharedMemory) {
10879       CGM.EmitGlobal(VD);
10880     } else {
10881       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10882               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10883                HasRequiresUnifiedSharedMemory)) &&
10884              "Expected link clause or to clause with unified memory.");
10885       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10886     }
10887   }
10888 }
10889 
10890 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10891     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10892   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10893          " Expected target-based directive.");
10894 }
10895 
10896 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10897   for (const OMPClause *Clause : D->clauselists()) {
10898     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10899       HasRequiresUnifiedSharedMemory = true;
10900     } else if (const auto *AC =
10901                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10902       switch (AC->getAtomicDefaultMemOrderKind()) {
10903       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10904         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10905         break;
10906       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10907         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10908         break;
10909       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10910         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10911         break;
10912       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10913         break;
10914       }
10915     }
10916   }
10917 }
10918 
10919 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10920   return RequiresAtomicOrdering;
10921 }
10922 
10923 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10924                                                        LangAS &AS) {
10925   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10926     return false;
10927   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10928   switch(A->getAllocatorType()) {
10929   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10930   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10931   // Not supported, fallback to the default mem space.
10932   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10933   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10934   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10935   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10936   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10937   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10938   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10939     AS = LangAS::Default;
10940     return true;
10941   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10942     llvm_unreachable("Expected predefined allocator for the variables with the "
10943                      "static storage.");
10944   }
10945   return false;
10946 }
10947 
10948 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10949   return HasRequiresUnifiedSharedMemory;
10950 }
10951 
10952 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10953     CodeGenModule &CGM)
10954     : CGM(CGM) {
10955   if (CGM.getLangOpts().OpenMPIsDevice) {
10956     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10957     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10958   }
10959 }
10960 
10961 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10962   if (CGM.getLangOpts().OpenMPIsDevice)
10963     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10964 }
10965 
10966 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10967   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10968     return true;
10969 
10970   const auto *D = cast<FunctionDecl>(GD.getDecl());
10971   // Do not to emit function if it is marked as declare target as it was already
10972   // emitted.
10973   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10974     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10975       if (auto *F = dyn_cast_or_null<llvm::Function>(
10976               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10977         return !F->isDeclaration();
10978       return false;
10979     }
10980     return true;
10981   }
10982 
10983   return !AlreadyEmittedTargetDecls.insert(D).second;
10984 }
10985 
10986 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10987   // If we don't have entries or if we are emitting code for the device, we
10988   // don't need to do anything.
10989   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10990       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10991       (OffloadEntriesInfoManager.empty() &&
10992        !HasEmittedDeclareTargetRegion &&
10993        !HasEmittedTargetRegion))
10994     return nullptr;
10995 
10996   // Create and register the function that handles the requires directives.
10997   ASTContext &C = CGM.getContext();
10998 
10999   llvm::Function *RequiresRegFn;
11000   {
11001     CodeGenFunction CGF(CGM);
11002     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11003     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11004     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11005     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11006     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11007     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11008     // TODO: check for other requires clauses.
11009     // The requires directive takes effect only when a target region is
11010     // present in the compilation unit. Otherwise it is ignored and not
11011     // passed to the runtime. This avoids the runtime from throwing an error
11012     // for mismatching requires clauses across compilation units that don't
11013     // contain at least 1 target region.
11014     assert((HasEmittedTargetRegion ||
11015             HasEmittedDeclareTargetRegion ||
11016             !OffloadEntriesInfoManager.empty()) &&
11017            "Target or declare target region expected.");
11018     if (HasRequiresUnifiedSharedMemory)
11019       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11020     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11021                             CGM.getModule(), OMPRTL___tgt_register_requires),
11022                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11023     CGF.FinishFunction();
11024   }
11025   return RequiresRegFn;
11026 }
11027 
11028 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11029                                     const OMPExecutableDirective &D,
11030                                     SourceLocation Loc,
11031                                     llvm::Function *OutlinedFn,
11032                                     ArrayRef<llvm::Value *> CapturedVars) {
11033   if (!CGF.HaveInsertPoint())
11034     return;
11035 
11036   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11037   CodeGenFunction::RunCleanupsScope Scope(CGF);
11038 
11039   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11040   llvm::Value *Args[] = {
11041       RTLoc,
11042       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11043       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11044   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11045   RealArgs.append(std::begin(Args), std::end(Args));
11046   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11047 
11048   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11049       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11050   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11051 }
11052 
11053 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11054                                          const Expr *NumTeams,
11055                                          const Expr *ThreadLimit,
11056                                          SourceLocation Loc) {
11057   if (!CGF.HaveInsertPoint())
11058     return;
11059 
11060   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11061 
11062   llvm::Value *NumTeamsVal =
11063       NumTeams
11064           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11065                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11066           : CGF.Builder.getInt32(0);
11067 
11068   llvm::Value *ThreadLimitVal =
11069       ThreadLimit
11070           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11071                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11072           : CGF.Builder.getInt32(0);
11073 
11074   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11075   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11076                                      ThreadLimitVal};
11077   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11078                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11079                       PushNumTeamsArgs);
11080 }
11081 
11082 void CGOpenMPRuntime::emitTargetDataCalls(
11083     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11084     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11085   if (!CGF.HaveInsertPoint())
11086     return;
11087 
11088   // Action used to replace the default codegen action and turn privatization
11089   // off.
11090   PrePostActionTy NoPrivAction;
11091 
11092   // Generate the code for the opening of the data environment. Capture all the
11093   // arguments of the runtime call by reference because they are used in the
11094   // closing of the region.
11095   auto &&BeginThenGen = [this, &D, Device, &Info,
11096                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11097     // Fill up the arrays with all the mapped variables.
11098     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11099 
11100     // Get map clause information.
11101     MappableExprsHandler MEHandler(D, CGF);
11102     MEHandler.generateAllInfo(CombinedInfo);
11103 
11104     // Fill up the arrays and create the arguments.
11105     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11106                          /*IsNonContiguous=*/true);
11107 
11108     llvm::Value *BasePointersArrayArg = nullptr;
11109     llvm::Value *PointersArrayArg = nullptr;
11110     llvm::Value *SizesArrayArg = nullptr;
11111     llvm::Value *MapTypesArrayArg = nullptr;
11112     llvm::Value *MapNamesArrayArg = nullptr;
11113     llvm::Value *MappersArrayArg = nullptr;
11114     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11115                                  SizesArrayArg, MapTypesArrayArg,
11116                                  MapNamesArrayArg, MappersArrayArg, Info);
11117 
11118     // Emit device ID if any.
11119     llvm::Value *DeviceID = nullptr;
11120     if (Device) {
11121       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11122                                            CGF.Int64Ty, /*isSigned=*/true);
11123     } else {
11124       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11125     }
11126 
11127     // Emit the number of elements in the offloading arrays.
11128     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11129     //
11130     // Source location for the ident struct
11131     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11132 
11133     llvm::Value *OffloadingArgs[] = {RTLoc,
11134                                      DeviceID,
11135                                      PointerNum,
11136                                      BasePointersArrayArg,
11137                                      PointersArrayArg,
11138                                      SizesArrayArg,
11139                                      MapTypesArrayArg,
11140                                      MapNamesArrayArg,
11141                                      MappersArrayArg};
11142     CGF.EmitRuntimeCall(
11143         OMPBuilder.getOrCreateRuntimeFunction(
11144             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11145         OffloadingArgs);
11146 
11147     // If device pointer privatization is required, emit the body of the region
11148     // here. It will have to be duplicated: with and without privatization.
11149     if (!Info.CaptureDeviceAddrMap.empty())
11150       CodeGen(CGF);
11151   };
11152 
11153   // Generate code for the closing of the data region.
11154   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11155                                                 PrePostActionTy &) {
11156     assert(Info.isValid() && "Invalid data environment closing arguments.");
11157 
11158     llvm::Value *BasePointersArrayArg = nullptr;
11159     llvm::Value *PointersArrayArg = nullptr;
11160     llvm::Value *SizesArrayArg = nullptr;
11161     llvm::Value *MapTypesArrayArg = nullptr;
11162     llvm::Value *MapNamesArrayArg = nullptr;
11163     llvm::Value *MappersArrayArg = nullptr;
11164     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11165                                  SizesArrayArg, MapTypesArrayArg,
11166                                  MapNamesArrayArg, MappersArrayArg, Info,
11167                                  {/*ForEndCall=*/true});
11168 
11169     // Emit device ID if any.
11170     llvm::Value *DeviceID = nullptr;
11171     if (Device) {
11172       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11173                                            CGF.Int64Ty, /*isSigned=*/true);
11174     } else {
11175       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11176     }
11177 
11178     // Emit the number of elements in the offloading arrays.
11179     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11180 
11181     // Source location for the ident struct
11182     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11183 
11184     llvm::Value *OffloadingArgs[] = {RTLoc,
11185                                      DeviceID,
11186                                      PointerNum,
11187                                      BasePointersArrayArg,
11188                                      PointersArrayArg,
11189                                      SizesArrayArg,
11190                                      MapTypesArrayArg,
11191                                      MapNamesArrayArg,
11192                                      MappersArrayArg};
11193     CGF.EmitRuntimeCall(
11194         OMPBuilder.getOrCreateRuntimeFunction(
11195             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11196         OffloadingArgs);
11197   };
11198 
11199   // If we need device pointer privatization, we need to emit the body of the
11200   // region with no privatization in the 'else' branch of the conditional.
11201   // Otherwise, we don't have to do anything.
11202   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11203                                                          PrePostActionTy &) {
11204     if (!Info.CaptureDeviceAddrMap.empty()) {
11205       CodeGen.setAction(NoPrivAction);
11206       CodeGen(CGF);
11207     }
11208   };
11209 
11210   // We don't have to do anything to close the region if the if clause evaluates
11211   // to false.
11212   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11213 
11214   if (IfCond) {
11215     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11216   } else {
11217     RegionCodeGenTy RCG(BeginThenGen);
11218     RCG(CGF);
11219   }
11220 
11221   // If we don't require privatization of device pointers, we emit the body in
11222   // between the runtime calls. This avoids duplicating the body code.
11223   if (Info.CaptureDeviceAddrMap.empty()) {
11224     CodeGen.setAction(NoPrivAction);
11225     CodeGen(CGF);
11226   }
11227 
11228   if (IfCond) {
11229     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11230   } else {
11231     RegionCodeGenTy RCG(EndThenGen);
11232     RCG(CGF);
11233   }
11234 }
11235 
11236 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11237     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11238     const Expr *Device) {
11239   if (!CGF.HaveInsertPoint())
11240     return;
11241 
11242   assert((isa<OMPTargetEnterDataDirective>(D) ||
11243           isa<OMPTargetExitDataDirective>(D) ||
11244           isa<OMPTargetUpdateDirective>(D)) &&
11245          "Expecting either target enter, exit data, or update directives.");
11246 
11247   CodeGenFunction::OMPTargetDataInfo InputInfo;
11248   llvm::Value *MapTypesArray = nullptr;
11249   llvm::Value *MapNamesArray = nullptr;
11250   // Generate the code for the opening of the data environment.
11251   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11252                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11253     // Emit device ID if any.
11254     llvm::Value *DeviceID = nullptr;
11255     if (Device) {
11256       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11257                                            CGF.Int64Ty, /*isSigned=*/true);
11258     } else {
11259       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11260     }
11261 
11262     // Emit the number of elements in the offloading arrays.
11263     llvm::Constant *PointerNum =
11264         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11265 
11266     // Source location for the ident struct
11267     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11268 
11269     llvm::Value *OffloadingArgs[] = {RTLoc,
11270                                      DeviceID,
11271                                      PointerNum,
11272                                      InputInfo.BasePointersArray.getPointer(),
11273                                      InputInfo.PointersArray.getPointer(),
11274                                      InputInfo.SizesArray.getPointer(),
11275                                      MapTypesArray,
11276                                      MapNamesArray,
11277                                      InputInfo.MappersArray.getPointer()};
11278 
11279     // Select the right runtime function call for each standalone
11280     // directive.
11281     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11282     RuntimeFunction RTLFn;
11283     switch (D.getDirectiveKind()) {
11284     case OMPD_target_enter_data:
11285       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11286                         : OMPRTL___tgt_target_data_begin_mapper;
11287       break;
11288     case OMPD_target_exit_data:
11289       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11290                         : OMPRTL___tgt_target_data_end_mapper;
11291       break;
11292     case OMPD_target_update:
11293       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11294                         : OMPRTL___tgt_target_data_update_mapper;
11295       break;
11296     case OMPD_parallel:
11297     case OMPD_for:
11298     case OMPD_parallel_for:
11299     case OMPD_parallel_master:
11300     case OMPD_parallel_sections:
11301     case OMPD_for_simd:
11302     case OMPD_parallel_for_simd:
11303     case OMPD_cancel:
11304     case OMPD_cancellation_point:
11305     case OMPD_ordered:
11306     case OMPD_threadprivate:
11307     case OMPD_allocate:
11308     case OMPD_task:
11309     case OMPD_simd:
11310     case OMPD_tile:
11311     case OMPD_unroll:
11312     case OMPD_sections:
11313     case OMPD_section:
11314     case OMPD_single:
11315     case OMPD_master:
11316     case OMPD_critical:
11317     case OMPD_taskyield:
11318     case OMPD_barrier:
11319     case OMPD_taskwait:
11320     case OMPD_taskgroup:
11321     case OMPD_atomic:
11322     case OMPD_flush:
11323     case OMPD_depobj:
11324     case OMPD_scan:
11325     case OMPD_teams:
11326     case OMPD_target_data:
11327     case OMPD_distribute:
11328     case OMPD_distribute_simd:
11329     case OMPD_distribute_parallel_for:
11330     case OMPD_distribute_parallel_for_simd:
11331     case OMPD_teams_distribute:
11332     case OMPD_teams_distribute_simd:
11333     case OMPD_teams_distribute_parallel_for:
11334     case OMPD_teams_distribute_parallel_for_simd:
11335     case OMPD_declare_simd:
11336     case OMPD_declare_variant:
11337     case OMPD_begin_declare_variant:
11338     case OMPD_end_declare_variant:
11339     case OMPD_declare_target:
11340     case OMPD_end_declare_target:
11341     case OMPD_declare_reduction:
11342     case OMPD_declare_mapper:
11343     case OMPD_taskloop:
11344     case OMPD_taskloop_simd:
11345     case OMPD_master_taskloop:
11346     case OMPD_master_taskloop_simd:
11347     case OMPD_parallel_master_taskloop:
11348     case OMPD_parallel_master_taskloop_simd:
11349     case OMPD_target:
11350     case OMPD_target_simd:
11351     case OMPD_target_teams_distribute:
11352     case OMPD_target_teams_distribute_simd:
11353     case OMPD_target_teams_distribute_parallel_for:
11354     case OMPD_target_teams_distribute_parallel_for_simd:
11355     case OMPD_target_teams:
11356     case OMPD_target_parallel:
11357     case OMPD_target_parallel_for:
11358     case OMPD_target_parallel_for_simd:
11359     case OMPD_requires:
11360     case OMPD_unknown:
11361     default:
11362       llvm_unreachable("Unexpected standalone target data directive.");
11363       break;
11364     }
11365     CGF.EmitRuntimeCall(
11366         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11367         OffloadingArgs);
11368   };
11369 
11370   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11371                           &MapNamesArray](CodeGenFunction &CGF,
11372                                           PrePostActionTy &) {
11373     // Fill up the arrays with all the mapped variables.
11374     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11375 
11376     // Get map clause information.
11377     MappableExprsHandler MEHandler(D, CGF);
11378     MEHandler.generateAllInfo(CombinedInfo);
11379 
11380     TargetDataInfo Info;
11381     // Fill up the arrays and create the arguments.
11382     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11383                          /*IsNonContiguous=*/true);
11384     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11385                              D.hasClausesOfKind<OMPNowaitClause>();
11386     emitOffloadingArraysArgument(
11387         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11388         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11389         {/*ForEndTask=*/false});
11390     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11391     InputInfo.BasePointersArray =
11392         Address(Info.BasePointersArray, CGM.getPointerAlign());
11393     InputInfo.PointersArray =
11394         Address(Info.PointersArray, CGM.getPointerAlign());
11395     InputInfo.SizesArray =
11396         Address(Info.SizesArray, CGM.getPointerAlign());
11397     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11398     MapTypesArray = Info.MapTypesArray;
11399     MapNamesArray = Info.MapNamesArray;
11400     if (RequiresOuterTask)
11401       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11402     else
11403       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11404   };
11405 
11406   if (IfCond) {
11407     emitIfClause(CGF, IfCond, TargetThenGen,
11408                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11409   } else {
11410     RegionCodeGenTy ThenRCG(TargetThenGen);
11411     ThenRCG(CGF);
11412   }
11413 }
11414 
11415 namespace {
11416   /// Kind of parameter in a function with 'declare simd' directive.
11417   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11418   /// Attribute set of the parameter.
11419   struct ParamAttrTy {
11420     ParamKindTy Kind = Vector;
11421     llvm::APSInt StrideOrArg;
11422     llvm::APSInt Alignment;
11423   };
11424 } // namespace
11425 
11426 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11427                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11428   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11429   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11430   // of that clause. The VLEN value must be power of 2.
11431   // In other case the notion of the function`s "characteristic data type" (CDT)
11432   // is used to compute the vector length.
11433   // CDT is defined in the following order:
11434   //   a) For non-void function, the CDT is the return type.
11435   //   b) If the function has any non-uniform, non-linear parameters, then the
11436   //   CDT is the type of the first such parameter.
11437   //   c) If the CDT determined by a) or b) above is struct, union, or class
11438   //   type which is pass-by-value (except for the type that maps to the
11439   //   built-in complex data type), the characteristic data type is int.
11440   //   d) If none of the above three cases is applicable, the CDT is int.
11441   // The VLEN is then determined based on the CDT and the size of vector
11442   // register of that ISA for which current vector version is generated. The
11443   // VLEN is computed using the formula below:
11444   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11445   // where vector register size specified in section 3.2.1 Registers and the
11446   // Stack Frame of original AMD64 ABI document.
11447   QualType RetType = FD->getReturnType();
11448   if (RetType.isNull())
11449     return 0;
11450   ASTContext &C = FD->getASTContext();
11451   QualType CDT;
11452   if (!RetType.isNull() && !RetType->isVoidType()) {
11453     CDT = RetType;
11454   } else {
11455     unsigned Offset = 0;
11456     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11457       if (ParamAttrs[Offset].Kind == Vector)
11458         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11459       ++Offset;
11460     }
11461     if (CDT.isNull()) {
11462       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11463         if (ParamAttrs[I + Offset].Kind == Vector) {
11464           CDT = FD->getParamDecl(I)->getType();
11465           break;
11466         }
11467       }
11468     }
11469   }
11470   if (CDT.isNull())
11471     CDT = C.IntTy;
11472   CDT = CDT->getCanonicalTypeUnqualified();
11473   if (CDT->isRecordType() || CDT->isUnionType())
11474     CDT = C.IntTy;
11475   return C.getTypeSize(CDT);
11476 }
11477 
11478 static void
11479 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11480                            const llvm::APSInt &VLENVal,
11481                            ArrayRef<ParamAttrTy> ParamAttrs,
11482                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11483   struct ISADataTy {
11484     char ISA;
11485     unsigned VecRegSize;
11486   };
11487   ISADataTy ISAData[] = {
11488       {
11489           'b', 128
11490       }, // SSE
11491       {
11492           'c', 256
11493       }, // AVX
11494       {
11495           'd', 256
11496       }, // AVX2
11497       {
11498           'e', 512
11499       }, // AVX512
11500   };
11501   llvm::SmallVector<char, 2> Masked;
11502   switch (State) {
11503   case OMPDeclareSimdDeclAttr::BS_Undefined:
11504     Masked.push_back('N');
11505     Masked.push_back('M');
11506     break;
11507   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11508     Masked.push_back('N');
11509     break;
11510   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11511     Masked.push_back('M');
11512     break;
11513   }
11514   for (char Mask : Masked) {
11515     for (const ISADataTy &Data : ISAData) {
11516       SmallString<256> Buffer;
11517       llvm::raw_svector_ostream Out(Buffer);
11518       Out << "_ZGV" << Data.ISA << Mask;
11519       if (!VLENVal) {
11520         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11521         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11522         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11523       } else {
11524         Out << VLENVal;
11525       }
11526       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11527         switch (ParamAttr.Kind){
11528         case LinearWithVarStride:
11529           Out << 's' << ParamAttr.StrideOrArg;
11530           break;
11531         case Linear:
11532           Out << 'l';
11533           if (ParamAttr.StrideOrArg != 1)
11534             Out << ParamAttr.StrideOrArg;
11535           break;
11536         case Uniform:
11537           Out << 'u';
11538           break;
11539         case Vector:
11540           Out << 'v';
11541           break;
11542         }
11543         if (!!ParamAttr.Alignment)
11544           Out << 'a' << ParamAttr.Alignment;
11545       }
11546       Out << '_' << Fn->getName();
11547       Fn->addFnAttr(Out.str());
11548     }
11549   }
11550 }
11551 
11552 // This are the Functions that are needed to mangle the name of the
11553 // vector functions generated by the compiler, according to the rules
11554 // defined in the "Vector Function ABI specifications for AArch64",
11555 // available at
11556 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11557 
11558 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11559 ///
11560 /// TODO: Need to implement the behavior for reference marked with a
11561 /// var or no linear modifiers (1.b in the section). For this, we
11562 /// need to extend ParamKindTy to support the linear modifiers.
11563 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11564   QT = QT.getCanonicalType();
11565 
11566   if (QT->isVoidType())
11567     return false;
11568 
11569   if (Kind == ParamKindTy::Uniform)
11570     return false;
11571 
11572   if (Kind == ParamKindTy::Linear)
11573     return false;
11574 
11575   // TODO: Handle linear references with modifiers
11576 
11577   if (Kind == ParamKindTy::LinearWithVarStride)
11578     return false;
11579 
11580   return true;
11581 }
11582 
11583 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11584 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11585   QT = QT.getCanonicalType();
11586   unsigned Size = C.getTypeSize(QT);
11587 
11588   // Only scalars and complex within 16 bytes wide set PVB to true.
11589   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11590     return false;
11591 
11592   if (QT->isFloatingType())
11593     return true;
11594 
11595   if (QT->isIntegerType())
11596     return true;
11597 
11598   if (QT->isPointerType())
11599     return true;
11600 
11601   // TODO: Add support for complex types (section 3.1.2, item 2).
11602 
11603   return false;
11604 }
11605 
11606 /// Computes the lane size (LS) of a return type or of an input parameter,
11607 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11608 /// TODO: Add support for references, section 3.2.1, item 1.
11609 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11610   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11611     QualType PTy = QT.getCanonicalType()->getPointeeType();
11612     if (getAArch64PBV(PTy, C))
11613       return C.getTypeSize(PTy);
11614   }
11615   if (getAArch64PBV(QT, C))
11616     return C.getTypeSize(QT);
11617 
11618   return C.getTypeSize(C.getUIntPtrType());
11619 }
11620 
11621 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11622 // signature of the scalar function, as defined in 3.2.2 of the
11623 // AAVFABI.
11624 static std::tuple<unsigned, unsigned, bool>
11625 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11626   QualType RetType = FD->getReturnType().getCanonicalType();
11627 
11628   ASTContext &C = FD->getASTContext();
11629 
11630   bool OutputBecomesInput = false;
11631 
11632   llvm::SmallVector<unsigned, 8> Sizes;
11633   if (!RetType->isVoidType()) {
11634     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11635     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11636       OutputBecomesInput = true;
11637   }
11638   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11639     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11640     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11641   }
11642 
11643   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11644   // The LS of a function parameter / return value can only be a power
11645   // of 2, starting from 8 bits, up to 128.
11646   assert(std::all_of(Sizes.begin(), Sizes.end(),
11647                      [](unsigned Size) {
11648                        return Size == 8 || Size == 16 || Size == 32 ||
11649                               Size == 64 || Size == 128;
11650                      }) &&
11651          "Invalid size");
11652 
11653   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11654                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11655                          OutputBecomesInput);
11656 }
11657 
11658 /// Mangle the parameter part of the vector function name according to
11659 /// their OpenMP classification. The mangling function is defined in
11660 /// section 3.5 of the AAVFABI.
11661 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11662   SmallString<256> Buffer;
11663   llvm::raw_svector_ostream Out(Buffer);
11664   for (const auto &ParamAttr : ParamAttrs) {
11665     switch (ParamAttr.Kind) {
11666     case LinearWithVarStride:
11667       Out << "ls" << ParamAttr.StrideOrArg;
11668       break;
11669     case Linear:
11670       Out << 'l';
11671       // Don't print the step value if it is not present or if it is
11672       // equal to 1.
11673       if (ParamAttr.StrideOrArg != 1)
11674         Out << ParamAttr.StrideOrArg;
11675       break;
11676     case Uniform:
11677       Out << 'u';
11678       break;
11679     case Vector:
11680       Out << 'v';
11681       break;
11682     }
11683 
11684     if (!!ParamAttr.Alignment)
11685       Out << 'a' << ParamAttr.Alignment;
11686   }
11687 
11688   return std::string(Out.str());
11689 }
11690 
11691 // Function used to add the attribute. The parameter `VLEN` is
11692 // templated to allow the use of "x" when targeting scalable functions
11693 // for SVE.
11694 template <typename T>
11695 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11696                                  char ISA, StringRef ParSeq,
11697                                  StringRef MangledName, bool OutputBecomesInput,
11698                                  llvm::Function *Fn) {
11699   SmallString<256> Buffer;
11700   llvm::raw_svector_ostream Out(Buffer);
11701   Out << Prefix << ISA << LMask << VLEN;
11702   if (OutputBecomesInput)
11703     Out << "v";
11704   Out << ParSeq << "_" << MangledName;
11705   Fn->addFnAttr(Out.str());
11706 }
11707 
11708 // Helper function to generate the Advanced SIMD names depending on
11709 // the value of the NDS when simdlen is not present.
11710 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11711                                       StringRef Prefix, char ISA,
11712                                       StringRef ParSeq, StringRef MangledName,
11713                                       bool OutputBecomesInput,
11714                                       llvm::Function *Fn) {
11715   switch (NDS) {
11716   case 8:
11717     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11718                          OutputBecomesInput, Fn);
11719     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11720                          OutputBecomesInput, Fn);
11721     break;
11722   case 16:
11723     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11724                          OutputBecomesInput, Fn);
11725     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11726                          OutputBecomesInput, Fn);
11727     break;
11728   case 32:
11729     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11730                          OutputBecomesInput, Fn);
11731     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11732                          OutputBecomesInput, Fn);
11733     break;
11734   case 64:
11735   case 128:
11736     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11737                          OutputBecomesInput, Fn);
11738     break;
11739   default:
11740     llvm_unreachable("Scalar type is too wide.");
11741   }
11742 }
11743 
11744 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11745 static void emitAArch64DeclareSimdFunction(
11746     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11747     ArrayRef<ParamAttrTy> ParamAttrs,
11748     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11749     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11750 
11751   // Get basic data for building the vector signature.
11752   const auto Data = getNDSWDS(FD, ParamAttrs);
11753   const unsigned NDS = std::get<0>(Data);
11754   const unsigned WDS = std::get<1>(Data);
11755   const bool OutputBecomesInput = std::get<2>(Data);
11756 
11757   // Check the values provided via `simdlen` by the user.
11758   // 1. A `simdlen(1)` doesn't produce vector signatures,
11759   if (UserVLEN == 1) {
11760     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11761         DiagnosticsEngine::Warning,
11762         "The clause simdlen(1) has no effect when targeting aarch64.");
11763     CGM.getDiags().Report(SLoc, DiagID);
11764     return;
11765   }
11766 
11767   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11768   // Advanced SIMD output.
11769   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11770     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11771         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11772                                     "power of 2 when targeting Advanced SIMD.");
11773     CGM.getDiags().Report(SLoc, DiagID);
11774     return;
11775   }
11776 
11777   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11778   // limits.
11779   if (ISA == 's' && UserVLEN != 0) {
11780     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11781       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11782           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11783                                       "lanes in the architectural constraints "
11784                                       "for SVE (min is 128-bit, max is "
11785                                       "2048-bit, by steps of 128-bit)");
11786       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11787       return;
11788     }
11789   }
11790 
11791   // Sort out parameter sequence.
11792   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11793   StringRef Prefix = "_ZGV";
11794   // Generate simdlen from user input (if any).
11795   if (UserVLEN) {
11796     if (ISA == 's') {
11797       // SVE generates only a masked function.
11798       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11799                            OutputBecomesInput, Fn);
11800     } else {
11801       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11802       // Advanced SIMD generates one or two functions, depending on
11803       // the `[not]inbranch` clause.
11804       switch (State) {
11805       case OMPDeclareSimdDeclAttr::BS_Undefined:
11806         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11807                              OutputBecomesInput, Fn);
11808         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11809                              OutputBecomesInput, Fn);
11810         break;
11811       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11812         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11813                              OutputBecomesInput, Fn);
11814         break;
11815       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11816         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11817                              OutputBecomesInput, Fn);
11818         break;
11819       }
11820     }
11821   } else {
11822     // If no user simdlen is provided, follow the AAVFABI rules for
11823     // generating the vector length.
11824     if (ISA == 's') {
11825       // SVE, section 3.4.1, item 1.
11826       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11827                            OutputBecomesInput, Fn);
11828     } else {
11829       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11830       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11831       // two vector names depending on the use of the clause
11832       // `[not]inbranch`.
11833       switch (State) {
11834       case OMPDeclareSimdDeclAttr::BS_Undefined:
11835         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11836                                   OutputBecomesInput, Fn);
11837         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11838                                   OutputBecomesInput, Fn);
11839         break;
11840       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11841         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11842                                   OutputBecomesInput, Fn);
11843         break;
11844       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11845         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11846                                   OutputBecomesInput, Fn);
11847         break;
11848       }
11849     }
11850   }
11851 }
11852 
11853 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11854                                               llvm::Function *Fn) {
11855   ASTContext &C = CGM.getContext();
11856   FD = FD->getMostRecentDecl();
11857   // Map params to their positions in function decl.
11858   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11859   if (isa<CXXMethodDecl>(FD))
11860     ParamPositions.try_emplace(FD, 0);
11861   unsigned ParamPos = ParamPositions.size();
11862   for (const ParmVarDecl *P : FD->parameters()) {
11863     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11864     ++ParamPos;
11865   }
11866   while (FD) {
11867     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11868       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11869       // Mark uniform parameters.
11870       for (const Expr *E : Attr->uniforms()) {
11871         E = E->IgnoreParenImpCasts();
11872         unsigned Pos;
11873         if (isa<CXXThisExpr>(E)) {
11874           Pos = ParamPositions[FD];
11875         } else {
11876           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11877                                 ->getCanonicalDecl();
11878           Pos = ParamPositions[PVD];
11879         }
11880         ParamAttrs[Pos].Kind = Uniform;
11881       }
11882       // Get alignment info.
11883       auto NI = Attr->alignments_begin();
11884       for (const Expr *E : Attr->aligneds()) {
11885         E = E->IgnoreParenImpCasts();
11886         unsigned Pos;
11887         QualType ParmTy;
11888         if (isa<CXXThisExpr>(E)) {
11889           Pos = ParamPositions[FD];
11890           ParmTy = E->getType();
11891         } else {
11892           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11893                                 ->getCanonicalDecl();
11894           Pos = ParamPositions[PVD];
11895           ParmTy = PVD->getType();
11896         }
11897         ParamAttrs[Pos].Alignment =
11898             (*NI)
11899                 ? (*NI)->EvaluateKnownConstInt(C)
11900                 : llvm::APSInt::getUnsigned(
11901                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11902                           .getQuantity());
11903         ++NI;
11904       }
11905       // Mark linear parameters.
11906       auto SI = Attr->steps_begin();
11907       auto MI = Attr->modifiers_begin();
11908       for (const Expr *E : Attr->linears()) {
11909         E = E->IgnoreParenImpCasts();
11910         unsigned Pos;
11911         // Rescaling factor needed to compute the linear parameter
11912         // value in the mangled name.
11913         unsigned PtrRescalingFactor = 1;
11914         if (isa<CXXThisExpr>(E)) {
11915           Pos = ParamPositions[FD];
11916         } else {
11917           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11918                                 ->getCanonicalDecl();
11919           Pos = ParamPositions[PVD];
11920           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11921             PtrRescalingFactor = CGM.getContext()
11922                                      .getTypeSizeInChars(P->getPointeeType())
11923                                      .getQuantity();
11924         }
11925         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11926         ParamAttr.Kind = Linear;
11927         // Assuming a stride of 1, for `linear` without modifiers.
11928         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11929         if (*SI) {
11930           Expr::EvalResult Result;
11931           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11932             if (const auto *DRE =
11933                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11934               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11935                 ParamAttr.Kind = LinearWithVarStride;
11936                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11937                     ParamPositions[StridePVD->getCanonicalDecl()]);
11938               }
11939             }
11940           } else {
11941             ParamAttr.StrideOrArg = Result.Val.getInt();
11942           }
11943         }
11944         // If we are using a linear clause on a pointer, we need to
11945         // rescale the value of linear_step with the byte size of the
11946         // pointee type.
11947         if (Linear == ParamAttr.Kind)
11948           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11949         ++SI;
11950         ++MI;
11951       }
11952       llvm::APSInt VLENVal;
11953       SourceLocation ExprLoc;
11954       const Expr *VLENExpr = Attr->getSimdlen();
11955       if (VLENExpr) {
11956         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11957         ExprLoc = VLENExpr->getExprLoc();
11958       }
11959       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11960       if (CGM.getTriple().isX86()) {
11961         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11962       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11963         unsigned VLEN = VLENVal.getExtValue();
11964         StringRef MangledName = Fn->getName();
11965         if (CGM.getTarget().hasFeature("sve"))
11966           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11967                                          MangledName, 's', 128, Fn, ExprLoc);
11968         if (CGM.getTarget().hasFeature("neon"))
11969           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11970                                          MangledName, 'n', 128, Fn, ExprLoc);
11971       }
11972     }
11973     FD = FD->getPreviousDecl();
11974   }
11975 }
11976 
11977 namespace {
11978 /// Cleanup action for doacross support.
11979 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11980 public:
11981   static const int DoacrossFinArgs = 2;
11982 
11983 private:
11984   llvm::FunctionCallee RTLFn;
11985   llvm::Value *Args[DoacrossFinArgs];
11986 
11987 public:
11988   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11989                     ArrayRef<llvm::Value *> CallArgs)
11990       : RTLFn(RTLFn) {
11991     assert(CallArgs.size() == DoacrossFinArgs);
11992     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11993   }
11994   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11995     if (!CGF.HaveInsertPoint())
11996       return;
11997     CGF.EmitRuntimeCall(RTLFn, Args);
11998   }
11999 };
12000 } // namespace
12001 
12002 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12003                                        const OMPLoopDirective &D,
12004                                        ArrayRef<Expr *> NumIterations) {
12005   if (!CGF.HaveInsertPoint())
12006     return;
12007 
12008   ASTContext &C = CGM.getContext();
12009   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12010   RecordDecl *RD;
12011   if (KmpDimTy.isNull()) {
12012     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12013     //  kmp_int64 lo; // lower
12014     //  kmp_int64 up; // upper
12015     //  kmp_int64 st; // stride
12016     // };
12017     RD = C.buildImplicitRecord("kmp_dim");
12018     RD->startDefinition();
12019     addFieldToRecordDecl(C, RD, Int64Ty);
12020     addFieldToRecordDecl(C, RD, Int64Ty);
12021     addFieldToRecordDecl(C, RD, Int64Ty);
12022     RD->completeDefinition();
12023     KmpDimTy = C.getRecordType(RD);
12024   } else {
12025     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12026   }
12027   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12028   QualType ArrayTy =
12029       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12030 
12031   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12032   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12033   enum { LowerFD = 0, UpperFD, StrideFD };
12034   // Fill dims with data.
12035   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12036     LValue DimsLVal = CGF.MakeAddrLValue(
12037         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12038     // dims.upper = num_iterations;
12039     LValue UpperLVal = CGF.EmitLValueForField(
12040         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12041     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12042         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12043         Int64Ty, NumIterations[I]->getExprLoc());
12044     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12045     // dims.stride = 1;
12046     LValue StrideLVal = CGF.EmitLValueForField(
12047         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12048     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12049                           StrideLVal);
12050   }
12051 
12052   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12053   // kmp_int32 num_dims, struct kmp_dim * dims);
12054   llvm::Value *Args[] = {
12055       emitUpdateLocation(CGF, D.getBeginLoc()),
12056       getThreadID(CGF, D.getBeginLoc()),
12057       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12058       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12059           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12060           CGM.VoidPtrTy)};
12061 
12062   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12063       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12064   CGF.EmitRuntimeCall(RTLFn, Args);
12065   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12066       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12067   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12068       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12069   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12070                                              llvm::makeArrayRef(FiniArgs));
12071 }
12072 
12073 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12074                                           const OMPDependClause *C) {
12075   QualType Int64Ty =
12076       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12077   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12078   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12079       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12080   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12081   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12082     const Expr *CounterVal = C->getLoopData(I);
12083     assert(CounterVal);
12084     llvm::Value *CntVal = CGF.EmitScalarConversion(
12085         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12086         CounterVal->getExprLoc());
12087     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12088                           /*Volatile=*/false, Int64Ty);
12089   }
12090   llvm::Value *Args[] = {
12091       emitUpdateLocation(CGF, C->getBeginLoc()),
12092       getThreadID(CGF, C->getBeginLoc()),
12093       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12094   llvm::FunctionCallee RTLFn;
12095   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12096     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12097                                                   OMPRTL___kmpc_doacross_post);
12098   } else {
12099     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12100     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12101                                                   OMPRTL___kmpc_doacross_wait);
12102   }
12103   CGF.EmitRuntimeCall(RTLFn, Args);
12104 }
12105 
12106 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12107                                llvm::FunctionCallee Callee,
12108                                ArrayRef<llvm::Value *> Args) const {
12109   assert(Loc.isValid() && "Outlined function call location must be valid.");
12110   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12111 
12112   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12113     if (Fn->doesNotThrow()) {
12114       CGF.EmitNounwindRuntimeCall(Fn, Args);
12115       return;
12116     }
12117   }
12118   CGF.EmitRuntimeCall(Callee, Args);
12119 }
12120 
12121 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12122     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12123     ArrayRef<llvm::Value *> Args) const {
12124   emitCall(CGF, Loc, OutlinedFn, Args);
12125 }
12126 
12127 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12128   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12129     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12130       HasEmittedDeclareTargetRegion = true;
12131 }
12132 
12133 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12134                                              const VarDecl *NativeParam,
12135                                              const VarDecl *TargetParam) const {
12136   return CGF.GetAddrOfLocalVar(NativeParam);
12137 }
12138 
12139 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12140                                                    const VarDecl *VD) {
12141   if (!VD)
12142     return Address::invalid();
12143   Address UntiedAddr = Address::invalid();
12144   Address UntiedRealAddr = Address::invalid();
12145   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12146   if (It != FunctionToUntiedTaskStackMap.end()) {
12147     const UntiedLocalVarsAddressesMap &UntiedData =
12148         UntiedLocalVarsStack[It->second];
12149     auto I = UntiedData.find(VD);
12150     if (I != UntiedData.end()) {
12151       UntiedAddr = I->second.first;
12152       UntiedRealAddr = I->second.second;
12153     }
12154   }
12155   const VarDecl *CVD = VD->getCanonicalDecl();
12156   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12157     // Use the default allocation.
12158     if (!isAllocatableDecl(VD))
12159       return UntiedAddr;
12160     llvm::Value *Size;
12161     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12162     if (CVD->getType()->isVariablyModifiedType()) {
12163       Size = CGF.getTypeSize(CVD->getType());
12164       // Align the size: ((size + align - 1) / align) * align
12165       Size = CGF.Builder.CreateNUWAdd(
12166           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12167       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12168       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12169     } else {
12170       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12171       Size = CGM.getSize(Sz.alignTo(Align));
12172     }
12173     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12174     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12175     assert(AA->getAllocator() &&
12176            "Expected allocator expression for non-default allocator.");
12177     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12178     // According to the standard, the original allocator type is a enum
12179     // (integer). Convert to pointer type, if required.
12180     Allocator = CGF.EmitScalarConversion(
12181         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12182         AA->getAllocator()->getExprLoc());
12183     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12184 
12185     llvm::Value *Addr =
12186         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12187                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12188                             Args, getName({CVD->getName(), ".void.addr"}));
12189     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12190         CGM.getModule(), OMPRTL___kmpc_free);
12191     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12192     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12193         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12194     if (UntiedAddr.isValid())
12195       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12196 
12197     // Cleanup action for allocate support.
12198     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12199       llvm::FunctionCallee RTLFn;
12200       SourceLocation::UIntTy LocEncoding;
12201       Address Addr;
12202       const Expr *Allocator;
12203 
12204     public:
12205       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12206                            SourceLocation::UIntTy LocEncoding, Address Addr,
12207                            const Expr *Allocator)
12208           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12209             Allocator(Allocator) {}
12210       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12211         if (!CGF.HaveInsertPoint())
12212           return;
12213         llvm::Value *Args[3];
12214         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12215             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12216         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12217             Addr.getPointer(), CGF.VoidPtrTy);
12218         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12219         // According to the standard, the original allocator type is a enum
12220         // (integer). Convert to pointer type, if required.
12221         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12222                                             CGF.getContext().VoidPtrTy,
12223                                             Allocator->getExprLoc());
12224         Args[2] = AllocVal;
12225 
12226         CGF.EmitRuntimeCall(RTLFn, Args);
12227       }
12228     };
12229     Address VDAddr =
12230         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12231     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12232         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12233         VDAddr, AA->getAllocator());
12234     if (UntiedRealAddr.isValid())
12235       if (auto *Region =
12236               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12237         Region->emitUntiedSwitch(CGF);
12238     return VDAddr;
12239   }
12240   return UntiedAddr;
12241 }
12242 
12243 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12244                                              const VarDecl *VD) const {
12245   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12246   if (It == FunctionToUntiedTaskStackMap.end())
12247     return false;
12248   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12249 }
12250 
12251 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12252     CodeGenModule &CGM, const OMPLoopDirective &S)
12253     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12254   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12255   if (!NeedToPush)
12256     return;
12257   NontemporalDeclsSet &DS =
12258       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12259   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12260     for (const Stmt *Ref : C->private_refs()) {
12261       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12262       const ValueDecl *VD;
12263       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12264         VD = DRE->getDecl();
12265       } else {
12266         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12267         assert((ME->isImplicitCXXThis() ||
12268                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12269                "Expected member of current class.");
12270         VD = ME->getMemberDecl();
12271       }
12272       DS.insert(VD);
12273     }
12274   }
12275 }
12276 
12277 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12278   if (!NeedToPush)
12279     return;
12280   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12281 }
12282 
12283 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12284     CodeGenFunction &CGF,
12285     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12286                           std::pair<Address, Address>> &LocalVars)
12287     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12288   if (!NeedToPush)
12289     return;
12290   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12291       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12292   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12293 }
12294 
12295 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12296   if (!NeedToPush)
12297     return;
12298   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12299 }
12300 
12301 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12302   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12303 
12304   return llvm::any_of(
12305       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12306       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12307 }
12308 
12309 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12310     const OMPExecutableDirective &S,
12311     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12312     const {
12313   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12314   // Vars in target/task regions must be excluded completely.
12315   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12316       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12317     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12318     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12319     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12320     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12321       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12322         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12323     }
12324   }
12325   // Exclude vars in private clauses.
12326   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12327     for (const Expr *Ref : C->varlists()) {
12328       if (!Ref->getType()->isScalarType())
12329         continue;
12330       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12331       if (!DRE)
12332         continue;
12333       NeedToCheckForLPCs.insert(DRE->getDecl());
12334     }
12335   }
12336   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12337     for (const Expr *Ref : C->varlists()) {
12338       if (!Ref->getType()->isScalarType())
12339         continue;
12340       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12341       if (!DRE)
12342         continue;
12343       NeedToCheckForLPCs.insert(DRE->getDecl());
12344     }
12345   }
12346   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12347     for (const Expr *Ref : C->varlists()) {
12348       if (!Ref->getType()->isScalarType())
12349         continue;
12350       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12351       if (!DRE)
12352         continue;
12353       NeedToCheckForLPCs.insert(DRE->getDecl());
12354     }
12355   }
12356   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12357     for (const Expr *Ref : C->varlists()) {
12358       if (!Ref->getType()->isScalarType())
12359         continue;
12360       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12361       if (!DRE)
12362         continue;
12363       NeedToCheckForLPCs.insert(DRE->getDecl());
12364     }
12365   }
12366   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12367     for (const Expr *Ref : C->varlists()) {
12368       if (!Ref->getType()->isScalarType())
12369         continue;
12370       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12371       if (!DRE)
12372         continue;
12373       NeedToCheckForLPCs.insert(DRE->getDecl());
12374     }
12375   }
12376   for (const Decl *VD : NeedToCheckForLPCs) {
12377     for (const LastprivateConditionalData &Data :
12378          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12379       if (Data.DeclToUniqueName.count(VD) > 0) {
12380         if (!Data.Disabled)
12381           NeedToAddForLPCsAsDisabled.insert(VD);
12382         break;
12383       }
12384     }
12385   }
12386 }
12387 
12388 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12389     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12390     : CGM(CGF.CGM),
12391       Action((CGM.getLangOpts().OpenMP >= 50 &&
12392               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12393                            [](const OMPLastprivateClause *C) {
12394                              return C->getKind() ==
12395                                     OMPC_LASTPRIVATE_conditional;
12396                            }))
12397                  ? ActionToDo::PushAsLastprivateConditional
12398                  : ActionToDo::DoNotPush) {
12399   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12400   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12401     return;
12402   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12403          "Expected a push action.");
12404   LastprivateConditionalData &Data =
12405       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12406   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12407     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12408       continue;
12409 
12410     for (const Expr *Ref : C->varlists()) {
12411       Data.DeclToUniqueName.insert(std::make_pair(
12412           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12413           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12414     }
12415   }
12416   Data.IVLVal = IVLVal;
12417   Data.Fn = CGF.CurFn;
12418 }
12419 
12420 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12421     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12422     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12423   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12424   if (CGM.getLangOpts().OpenMP < 50)
12425     return;
12426   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12427   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12428   if (!NeedToAddForLPCsAsDisabled.empty()) {
12429     Action = ActionToDo::DisableLastprivateConditional;
12430     LastprivateConditionalData &Data =
12431         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12432     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12433       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12434     Data.Fn = CGF.CurFn;
12435     Data.Disabled = true;
12436   }
12437 }
12438 
12439 CGOpenMPRuntime::LastprivateConditionalRAII
12440 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12441     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12442   return LastprivateConditionalRAII(CGF, S);
12443 }
12444 
12445 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12446   if (CGM.getLangOpts().OpenMP < 50)
12447     return;
12448   if (Action == ActionToDo::DisableLastprivateConditional) {
12449     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12450            "Expected list of disabled private vars.");
12451     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12452   }
12453   if (Action == ActionToDo::PushAsLastprivateConditional) {
12454     assert(
12455         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12456         "Expected list of lastprivate conditional vars.");
12457     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12458   }
12459 }
12460 
12461 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12462                                                         const VarDecl *VD) {
12463   ASTContext &C = CGM.getContext();
12464   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12465   if (I == LastprivateConditionalToTypes.end())
12466     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12467   QualType NewType;
12468   const FieldDecl *VDField;
12469   const FieldDecl *FiredField;
12470   LValue BaseLVal;
12471   auto VI = I->getSecond().find(VD);
12472   if (VI == I->getSecond().end()) {
12473     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12474     RD->startDefinition();
12475     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12476     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12477     RD->completeDefinition();
12478     NewType = C.getRecordType(RD);
12479     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12480     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12481     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12482   } else {
12483     NewType = std::get<0>(VI->getSecond());
12484     VDField = std::get<1>(VI->getSecond());
12485     FiredField = std::get<2>(VI->getSecond());
12486     BaseLVal = std::get<3>(VI->getSecond());
12487   }
12488   LValue FiredLVal =
12489       CGF.EmitLValueForField(BaseLVal, FiredField);
12490   CGF.EmitStoreOfScalar(
12491       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12492       FiredLVal);
12493   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12494 }
12495 
12496 namespace {
12497 /// Checks if the lastprivate conditional variable is referenced in LHS.
12498 class LastprivateConditionalRefChecker final
12499     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12500   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12501   const Expr *FoundE = nullptr;
12502   const Decl *FoundD = nullptr;
12503   StringRef UniqueDeclName;
12504   LValue IVLVal;
12505   llvm::Function *FoundFn = nullptr;
12506   SourceLocation Loc;
12507 
12508 public:
12509   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12510     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12511          llvm::reverse(LPM)) {
12512       auto It = D.DeclToUniqueName.find(E->getDecl());
12513       if (It == D.DeclToUniqueName.end())
12514         continue;
12515       if (D.Disabled)
12516         return false;
12517       FoundE = E;
12518       FoundD = E->getDecl()->getCanonicalDecl();
12519       UniqueDeclName = It->second;
12520       IVLVal = D.IVLVal;
12521       FoundFn = D.Fn;
12522       break;
12523     }
12524     return FoundE == E;
12525   }
12526   bool VisitMemberExpr(const MemberExpr *E) {
12527     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12528       return false;
12529     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12530          llvm::reverse(LPM)) {
12531       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12532       if (It == D.DeclToUniqueName.end())
12533         continue;
12534       if (D.Disabled)
12535         return false;
12536       FoundE = E;
12537       FoundD = E->getMemberDecl()->getCanonicalDecl();
12538       UniqueDeclName = It->second;
12539       IVLVal = D.IVLVal;
12540       FoundFn = D.Fn;
12541       break;
12542     }
12543     return FoundE == E;
12544   }
12545   bool VisitStmt(const Stmt *S) {
12546     for (const Stmt *Child : S->children()) {
12547       if (!Child)
12548         continue;
12549       if (const auto *E = dyn_cast<Expr>(Child))
12550         if (!E->isGLValue())
12551           continue;
12552       if (Visit(Child))
12553         return true;
12554     }
12555     return false;
12556   }
12557   explicit LastprivateConditionalRefChecker(
12558       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12559       : LPM(LPM) {}
12560   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12561   getFoundData() const {
12562     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12563   }
12564 };
12565 } // namespace
12566 
12567 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12568                                                        LValue IVLVal,
12569                                                        StringRef UniqueDeclName,
12570                                                        LValue LVal,
12571                                                        SourceLocation Loc) {
12572   // Last updated loop counter for the lastprivate conditional var.
12573   // int<xx> last_iv = 0;
12574   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12575   llvm::Constant *LastIV =
12576       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12577   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12578       IVLVal.getAlignment().getAsAlign());
12579   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12580 
12581   // Last value of the lastprivate conditional.
12582   // decltype(priv_a) last_a;
12583   llvm::Constant *Last = getOrCreateInternalVariable(
12584       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12585   cast<llvm::GlobalVariable>(Last)->setAlignment(
12586       LVal.getAlignment().getAsAlign());
12587   LValue LastLVal =
12588       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12589 
12590   // Global loop counter. Required to handle inner parallel-for regions.
12591   // iv
12592   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12593 
12594   // #pragma omp critical(a)
12595   // if (last_iv <= iv) {
12596   //   last_iv = iv;
12597   //   last_a = priv_a;
12598   // }
12599   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12600                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12601     Action.Enter(CGF);
12602     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12603     // (last_iv <= iv) ? Check if the variable is updated and store new
12604     // value in global var.
12605     llvm::Value *CmpRes;
12606     if (IVLVal.getType()->isSignedIntegerType()) {
12607       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12608     } else {
12609       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12610              "Loop iteration variable must be integer.");
12611       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12612     }
12613     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12614     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12615     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12616     // {
12617     CGF.EmitBlock(ThenBB);
12618 
12619     //   last_iv = iv;
12620     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12621 
12622     //   last_a = priv_a;
12623     switch (CGF.getEvaluationKind(LVal.getType())) {
12624     case TEK_Scalar: {
12625       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12626       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12627       break;
12628     }
12629     case TEK_Complex: {
12630       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12631       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12632       break;
12633     }
12634     case TEK_Aggregate:
12635       llvm_unreachable(
12636           "Aggregates are not supported in lastprivate conditional.");
12637     }
12638     // }
12639     CGF.EmitBranch(ExitBB);
12640     // There is no need to emit line number for unconditional branch.
12641     (void)ApplyDebugLocation::CreateEmpty(CGF);
12642     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12643   };
12644 
12645   if (CGM.getLangOpts().OpenMPSimd) {
12646     // Do not emit as a critical region as no parallel region could be emitted.
12647     RegionCodeGenTy ThenRCG(CodeGen);
12648     ThenRCG(CGF);
12649   } else {
12650     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12651   }
12652 }
12653 
12654 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12655                                                          const Expr *LHS) {
12656   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12657     return;
12658   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12659   if (!Checker.Visit(LHS))
12660     return;
12661   const Expr *FoundE;
12662   const Decl *FoundD;
12663   StringRef UniqueDeclName;
12664   LValue IVLVal;
12665   llvm::Function *FoundFn;
12666   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12667       Checker.getFoundData();
12668   if (FoundFn != CGF.CurFn) {
12669     // Special codegen for inner parallel regions.
12670     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12671     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12672     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12673            "Lastprivate conditional is not found in outer region.");
12674     QualType StructTy = std::get<0>(It->getSecond());
12675     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12676     LValue PrivLVal = CGF.EmitLValue(FoundE);
12677     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12678         PrivLVal.getAddress(CGF),
12679         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12680     LValue BaseLVal =
12681         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12682     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12683     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12684                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12685                         FiredLVal, llvm::AtomicOrdering::Unordered,
12686                         /*IsVolatile=*/true, /*isInit=*/false);
12687     return;
12688   }
12689 
12690   // Private address of the lastprivate conditional in the current context.
12691   // priv_a
12692   LValue LVal = CGF.EmitLValue(FoundE);
12693   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12694                                    FoundE->getExprLoc());
12695 }
12696 
12697 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12698     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12699     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12700   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12701     return;
12702   auto Range = llvm::reverse(LastprivateConditionalStack);
12703   auto It = llvm::find_if(
12704       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12705   if (It == Range.end() || It->Fn != CGF.CurFn)
12706     return;
12707   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12708   assert(LPCI != LastprivateConditionalToTypes.end() &&
12709          "Lastprivates must be registered already.");
12710   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12711   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12712   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12713   for (const auto &Pair : It->DeclToUniqueName) {
12714     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12715     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12716       continue;
12717     auto I = LPCI->getSecond().find(Pair.first);
12718     assert(I != LPCI->getSecond().end() &&
12719            "Lastprivate must be rehistered already.");
12720     // bool Cmp = priv_a.Fired != 0;
12721     LValue BaseLVal = std::get<3>(I->getSecond());
12722     LValue FiredLVal =
12723         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12724     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12725     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12726     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12727     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12728     // if (Cmp) {
12729     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12730     CGF.EmitBlock(ThenBB);
12731     Address Addr = CGF.GetAddrOfLocalVar(VD);
12732     LValue LVal;
12733     if (VD->getType()->isReferenceType())
12734       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12735                                            AlignmentSource::Decl);
12736     else
12737       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12738                                 AlignmentSource::Decl);
12739     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12740                                      D.getBeginLoc());
12741     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12742     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12743     // }
12744   }
12745 }
12746 
12747 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12748     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12749     SourceLocation Loc) {
12750   if (CGF.getLangOpts().OpenMP < 50)
12751     return;
12752   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12753   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12754          "Unknown lastprivate conditional variable.");
12755   StringRef UniqueName = It->second;
12756   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12757   // The variable was not updated in the region - exit.
12758   if (!GV)
12759     return;
12760   LValue LPLVal = CGF.MakeAddrLValue(
12761       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12762   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12763   CGF.EmitStoreOfScalar(Res, PrivLVal);
12764 }
12765 
12766 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12767     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12768     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12769   llvm_unreachable("Not supported in SIMD-only mode");
12770 }
12771 
12772 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12773     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12774     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12775   llvm_unreachable("Not supported in SIMD-only mode");
12776 }
12777 
12778 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12779     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12780     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12781     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12782     bool Tied, unsigned &NumberOfParts) {
12783   llvm_unreachable("Not supported in SIMD-only mode");
12784 }
12785 
12786 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12787                                            SourceLocation Loc,
12788                                            llvm::Function *OutlinedFn,
12789                                            ArrayRef<llvm::Value *> CapturedVars,
12790                                            const Expr *IfCond) {
12791   llvm_unreachable("Not supported in SIMD-only mode");
12792 }
12793 
12794 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12795     CodeGenFunction &CGF, StringRef CriticalName,
12796     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12797     const Expr *Hint) {
12798   llvm_unreachable("Not supported in SIMD-only mode");
12799 }
12800 
12801 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12802                                            const RegionCodeGenTy &MasterOpGen,
12803                                            SourceLocation Loc) {
12804   llvm_unreachable("Not supported in SIMD-only mode");
12805 }
12806 
12807 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12808                                            const RegionCodeGenTy &MasterOpGen,
12809                                            SourceLocation Loc,
12810                                            const Expr *Filter) {
12811   llvm_unreachable("Not supported in SIMD-only mode");
12812 }
12813 
12814 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12815                                             SourceLocation Loc) {
12816   llvm_unreachable("Not supported in SIMD-only mode");
12817 }
12818 
12819 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12820     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12821     SourceLocation Loc) {
12822   llvm_unreachable("Not supported in SIMD-only mode");
12823 }
12824 
12825 void CGOpenMPSIMDRuntime::emitSingleRegion(
12826     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12827     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12828     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12829     ArrayRef<const Expr *> AssignmentOps) {
12830   llvm_unreachable("Not supported in SIMD-only mode");
12831 }
12832 
12833 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12834                                             const RegionCodeGenTy &OrderedOpGen,
12835                                             SourceLocation Loc,
12836                                             bool IsThreads) {
12837   llvm_unreachable("Not supported in SIMD-only mode");
12838 }
12839 
12840 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12841                                           SourceLocation Loc,
12842                                           OpenMPDirectiveKind Kind,
12843                                           bool EmitChecks,
12844                                           bool ForceSimpleCall) {
12845   llvm_unreachable("Not supported in SIMD-only mode");
12846 }
12847 
12848 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12849     CodeGenFunction &CGF, SourceLocation Loc,
12850     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12851     bool Ordered, const DispatchRTInput &DispatchValues) {
12852   llvm_unreachable("Not supported in SIMD-only mode");
12853 }
12854 
12855 void CGOpenMPSIMDRuntime::emitForStaticInit(
12856     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12857     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12858   llvm_unreachable("Not supported in SIMD-only mode");
12859 }
12860 
12861 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12862     CodeGenFunction &CGF, SourceLocation Loc,
12863     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12864   llvm_unreachable("Not supported in SIMD-only mode");
12865 }
12866 
12867 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12868                                                      SourceLocation Loc,
12869                                                      unsigned IVSize,
12870                                                      bool IVSigned) {
12871   llvm_unreachable("Not supported in SIMD-only mode");
12872 }
12873 
12874 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12875                                               SourceLocation Loc,
12876                                               OpenMPDirectiveKind DKind) {
12877   llvm_unreachable("Not supported in SIMD-only mode");
12878 }
12879 
12880 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12881                                               SourceLocation Loc,
12882                                               unsigned IVSize, bool IVSigned,
12883                                               Address IL, Address LB,
12884                                               Address UB, Address ST) {
12885   llvm_unreachable("Not supported in SIMD-only mode");
12886 }
12887 
12888 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12889                                                llvm::Value *NumThreads,
12890                                                SourceLocation Loc) {
12891   llvm_unreachable("Not supported in SIMD-only mode");
12892 }
12893 
12894 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12895                                              ProcBindKind ProcBind,
12896                                              SourceLocation Loc) {
12897   llvm_unreachable("Not supported in SIMD-only mode");
12898 }
12899 
12900 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12901                                                     const VarDecl *VD,
12902                                                     Address VDAddr,
12903                                                     SourceLocation Loc) {
12904   llvm_unreachable("Not supported in SIMD-only mode");
12905 }
12906 
12907 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12908     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12909     CodeGenFunction *CGF) {
12910   llvm_unreachable("Not supported in SIMD-only mode");
12911 }
12912 
12913 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12914     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
12918 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12919                                     ArrayRef<const Expr *> Vars,
12920                                     SourceLocation Loc,
12921                                     llvm::AtomicOrdering AO) {
12922   llvm_unreachable("Not supported in SIMD-only mode");
12923 }
12924 
12925 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12926                                        const OMPExecutableDirective &D,
12927                                        llvm::Function *TaskFunction,
12928                                        QualType SharedsTy, Address Shareds,
12929                                        const Expr *IfCond,
12930                                        const OMPTaskDataTy &Data) {
12931   llvm_unreachable("Not supported in SIMD-only mode");
12932 }
12933 
12934 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12935     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12936     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12937     const Expr *IfCond, const OMPTaskDataTy &Data) {
12938   llvm_unreachable("Not supported in SIMD-only mode");
12939 }
12940 
12941 void CGOpenMPSIMDRuntime::emitReduction(
12942     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12943     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12944     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12945   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12946   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12947                                  ReductionOps, Options);
12948 }
12949 
12950 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12951     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12952     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12953   llvm_unreachable("Not supported in SIMD-only mode");
12954 }
12955 
12956 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12957                                                 SourceLocation Loc,
12958                                                 bool IsWorksharingReduction) {
12959   llvm_unreachable("Not supported in SIMD-only mode");
12960 }
12961 
12962 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12963                                                   SourceLocation Loc,
12964                                                   ReductionCodeGen &RCG,
12965                                                   unsigned N) {
12966   llvm_unreachable("Not supported in SIMD-only mode");
12967 }
12968 
12969 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12970                                                   SourceLocation Loc,
12971                                                   llvm::Value *ReductionsPtr,
12972                                                   LValue SharedLVal) {
12973   llvm_unreachable("Not supported in SIMD-only mode");
12974 }
12975 
12976 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12977                                            SourceLocation Loc) {
12978   llvm_unreachable("Not supported in SIMD-only mode");
12979 }
12980 
12981 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12982     CodeGenFunction &CGF, SourceLocation Loc,
12983     OpenMPDirectiveKind CancelRegion) {
12984   llvm_unreachable("Not supported in SIMD-only mode");
12985 }
12986 
12987 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12988                                          SourceLocation Loc, const Expr *IfCond,
12989                                          OpenMPDirectiveKind CancelRegion) {
12990   llvm_unreachable("Not supported in SIMD-only mode");
12991 }
12992 
12993 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12994     const OMPExecutableDirective &D, StringRef ParentName,
12995     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12996     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12997   llvm_unreachable("Not supported in SIMD-only mode");
12998 }
12999 
13000 void CGOpenMPSIMDRuntime::emitTargetCall(
13001     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13002     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13003     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13004     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13005                                      const OMPLoopDirective &D)>
13006         SizeEmitter) {
13007   llvm_unreachable("Not supported in SIMD-only mode");
13008 }
13009 
13010 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13011   llvm_unreachable("Not supported in SIMD-only mode");
13012 }
13013 
13014 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13015   llvm_unreachable("Not supported in SIMD-only mode");
13016 }
13017 
13018 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13019   return false;
13020 }
13021 
13022 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13023                                         const OMPExecutableDirective &D,
13024                                         SourceLocation Loc,
13025                                         llvm::Function *OutlinedFn,
13026                                         ArrayRef<llvm::Value *> CapturedVars) {
13027   llvm_unreachable("Not supported in SIMD-only mode");
13028 }
13029 
13030 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13031                                              const Expr *NumTeams,
13032                                              const Expr *ThreadLimit,
13033                                              SourceLocation Loc) {
13034   llvm_unreachable("Not supported in SIMD-only mode");
13035 }
13036 
13037 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13038     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13039     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13040   llvm_unreachable("Not supported in SIMD-only mode");
13041 }
13042 
13043 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13044     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13045     const Expr *Device) {
13046   llvm_unreachable("Not supported in SIMD-only mode");
13047 }
13048 
13049 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13050                                            const OMPLoopDirective &D,
13051                                            ArrayRef<Expr *> NumIterations) {
13052   llvm_unreachable("Not supported in SIMD-only mode");
13053 }
13054 
13055 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13056                                               const OMPDependClause *C) {
13057   llvm_unreachable("Not supported in SIMD-only mode");
13058 }
13059 
13060 const VarDecl *
13061 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13062                                         const VarDecl *NativeParam) const {
13063   llvm_unreachable("Not supported in SIMD-only mode");
13064 }
13065 
13066 Address
13067 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13068                                          const VarDecl *NativeParam,
13069                                          const VarDecl *TargetParam) const {
13070   llvm_unreachable("Not supported in SIMD-only mode");
13071 }
13072