1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DerivedTypes.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/AtomicOrdering.h"
37 #include "llvm/Support/Format.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <cassert>
40 #include <numeric>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
572 /// region.
573 class CleanupTy final : public EHScopeStack::Cleanup {
574   PrePostActionTy *Action;
575 
576 public:
577   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
578   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
579     if (!CGF.HaveInsertPoint())
580       return;
581     Action->Exit(CGF);
582   }
583 };
584 
585 } // anonymous namespace
586 
587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
588   CodeGenFunction::RunCleanupsScope Scope(CGF);
589   if (PrePostAction) {
590     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
591     Callback(CodeGen, CGF, *PrePostAction);
592   } else {
593     PrePostActionTy Action;
594     Callback(CodeGen, CGF, Action);
595   }
596 }
597 
598 /// Check if the combiner is a call to UDR combiner and if it is so return the
599 /// UDR decl used for reduction.
600 static const OMPDeclareReductionDecl *
601 getReductionInit(const Expr *ReductionOp) {
602   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
603     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
604       if (const auto *DRE =
605               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
606         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
607           return DRD;
608   return nullptr;
609 }
610 
611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
612                                              const OMPDeclareReductionDecl *DRD,
613                                              const Expr *InitOp,
614                                              Address Private, Address Original,
615                                              QualType Ty) {
616   if (DRD->getInitializer()) {
617     std::pair<llvm::Function *, llvm::Function *> Reduction =
618         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
619     const auto *CE = cast<CallExpr>(InitOp);
620     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
621     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
622     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
623     const auto *LHSDRE =
624         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
625     const auto *RHSDRE =
626         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
627     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
628     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
629                             [=]() { return Private; });
630     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
631                             [=]() { return Original; });
632     (void)PrivateScope.Privatize();
633     RValue Func = RValue::get(Reduction.second);
634     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
635     CGF.EmitIgnoredExpr(InitOp);
636   } else {
637     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
638     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
639     auto *GV = new llvm::GlobalVariable(
640         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
641         llvm::GlobalValue::PrivateLinkage, Init, Name);
642     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
643     RValue InitRVal;
644     switch (CGF.getEvaluationKind(Ty)) {
645     case TEK_Scalar:
646       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
647       break;
648     case TEK_Complex:
649       InitRVal =
650           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
651       break;
652     case TEK_Aggregate:
653       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
654       break;
655     }
656     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
657     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
658     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659                          /*IsInitializer=*/false);
660   }
661 }
662 
663 /// Emit initialization of arrays of complex types.
664 /// \param DestAddr Address of the array.
665 /// \param Type Type of array.
666 /// \param Init Initial expression of array.
667 /// \param SrcAddr Address of the original array.
668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
669                                  QualType Type, bool EmitDeclareReductionInit,
670                                  const Expr *Init,
671                                  const OMPDeclareReductionDecl *DRD,
672                                  Address SrcAddr = Address::invalid()) {
673   // Perform element-by-element initialization.
674   QualType ElementTy;
675 
676   // Drill down to the base element type on both arrays.
677   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
678   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
679   DestAddr =
680       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
681   if (DRD)
682     SrcAddr =
683         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
684 
685   llvm::Value *SrcBegin = nullptr;
686   if (DRD)
687     SrcBegin = SrcAddr.getPointer();
688   llvm::Value *DestBegin = DestAddr.getPointer();
689   // Cast from pointer to array type to pointer to single element.
690   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
691   // The basic structure here is a while-do loop.
692   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
693   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
694   llvm::Value *IsEmpty =
695       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
696   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
697 
698   // Enter the loop body, making that address the current address.
699   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
700   CGF.EmitBlock(BodyBB);
701 
702   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
703 
704   llvm::PHINode *SrcElementPHI = nullptr;
705   Address SrcElementCurrent = Address::invalid();
706   if (DRD) {
707     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
708                                           "omp.arraycpy.srcElementPast");
709     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
710     SrcElementCurrent =
711         Address(SrcElementPHI,
712                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
713   }
714   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
715       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
716   DestElementPHI->addIncoming(DestBegin, EntryBB);
717   Address DestElementCurrent =
718       Address(DestElementPHI,
719               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
720 
721   // Emit copy.
722   {
723     CodeGenFunction::RunCleanupsScope InitScope(CGF);
724     if (EmitDeclareReductionInit) {
725       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
726                                        SrcElementCurrent, ElementTy);
727     } else
728       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
729                            /*IsInitializer=*/false);
730   }
731 
732   if (DRD) {
733     // Shift the address forward by one element.
734     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
735         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
736     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
737   }
738 
739   // Shift the address forward by one element.
740   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
741       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
742   // Check whether we've reached the end.
743   llvm::Value *Done =
744       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
745   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
746   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
747 
748   // Done.
749   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
750 }
751 
752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
753   return CGF.EmitOMPSharedLValue(E);
754 }
755 
756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
757                                             const Expr *E) {
758   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
759     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
760   return LValue();
761 }
762 
763 void ReductionCodeGen::emitAggregateInitialization(
764     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
765     const OMPDeclareReductionDecl *DRD) {
766   // Emit VarDecl with copy init for arrays.
767   // Get the address of the original variable captured in current
768   // captured region.
769   const auto *PrivateVD =
770       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
771   bool EmitDeclareReductionInit =
772       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
773   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
774                        EmitDeclareReductionInit,
775                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
776                                                 : PrivateVD->getInit(),
777                        DRD, SharedLVal.getAddress(CGF));
778 }
779 
780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
781                                    ArrayRef<const Expr *> Origs,
782                                    ArrayRef<const Expr *> Privates,
783                                    ArrayRef<const Expr *> ReductionOps) {
784   ClausesData.reserve(Shareds.size());
785   SharedAddresses.reserve(Shareds.size());
786   Sizes.reserve(Shareds.size());
787   BaseDecls.reserve(Shareds.size());
788   const auto *IOrig = Origs.begin();
789   const auto *IPriv = Privates.begin();
790   const auto *IRed = ReductionOps.begin();
791   for (const Expr *Ref : Shareds) {
792     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
793     std::advance(IOrig, 1);
794     std::advance(IPriv, 1);
795     std::advance(IRed, 1);
796   }
797 }
798 
799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
800   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
801          "Number of generated lvalues must be exactly N.");
802   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
803   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
804   SharedAddresses.emplace_back(First, Second);
805   if (ClausesData[N].Shared == ClausesData[N].Ref) {
806     OrigAddresses.emplace_back(First, Second);
807   } else {
808     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
809     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
810     OrigAddresses.emplace_back(First, Second);
811   }
812 }
813 
814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
815   const auto *PrivateVD =
816       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
817   QualType PrivateType = PrivateVD->getType();
818   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
819   if (!PrivateType->isVariablyModifiedType()) {
820     Sizes.emplace_back(
821         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
822         nullptr);
823     return;
824   }
825   llvm::Value *Size;
826   llvm::Value *SizeInChars;
827   auto *ElemType =
828       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
829           ->getElementType();
830   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
831   if (AsArraySection) {
832     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
833                                      OrigAddresses[N].first.getPointer(CGF));
834     Size = CGF.Builder.CreateNUWAdd(
835         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
836     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
837   } else {
838     SizeInChars =
839         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
840     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
841   }
842   Sizes.emplace_back(SizeInChars, Size);
843   CodeGenFunction::OpaqueValueMapping OpaqueMap(
844       CGF,
845       cast<OpaqueValueExpr>(
846           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847       RValue::get(Size));
848   CGF.EmitVariablyModifiedType(PrivateType);
849 }
850 
851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
852                                          llvm::Value *Size) {
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   QualType PrivateType = PrivateVD->getType();
856   if (!PrivateType->isVariablyModifiedType()) {
857     assert(!Size && !Sizes[N].second &&
858            "Size should be nullptr for non-variably modified reduction "
859            "items.");
860     return;
861   }
862   CodeGenFunction::OpaqueValueMapping OpaqueMap(
863       CGF,
864       cast<OpaqueValueExpr>(
865           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
866       RValue::get(Size));
867   CGF.EmitVariablyModifiedType(PrivateType);
868 }
869 
870 void ReductionCodeGen::emitInitialization(
871     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
872     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
873   assert(SharedAddresses.size() > N && "No variable was generated");
874   const auto *PrivateVD =
875       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
876   const OMPDeclareReductionDecl *DRD =
877       getReductionInit(ClausesData[N].ReductionOp);
878   QualType PrivateType = PrivateVD->getType();
879   PrivateAddr = CGF.Builder.CreateElementBitCast(
880       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
881   QualType SharedType = SharedAddresses[N].first.getType();
882   SharedLVal = CGF.MakeAddrLValue(
883       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
884                                        CGF.ConvertTypeForMem(SharedType)),
885       SharedType, SharedAddresses[N].first.getBaseInfo(),
886       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
887   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
888     if (DRD && DRD->getInitializer())
889       (void)DefaultInit(CGF);
890     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
891   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
892     (void)DefaultInit(CGF);
893     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
894                                      PrivateAddr, SharedLVal.getAddress(CGF),
895                                      SharedLVal.getType());
896   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
897              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
898     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
899                          PrivateVD->getType().getQualifiers(),
900                          /*IsInitializer=*/false);
901   }
902 }
903 
904 bool ReductionCodeGen::needCleanups(unsigned N) {
905   const auto *PrivateVD =
906       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
907   QualType PrivateType = PrivateVD->getType();
908   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
909   return DTorKind != QualType::DK_none;
910 }
911 
912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
913                                     Address PrivateAddr) {
914   const auto *PrivateVD =
915       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
916   QualType PrivateType = PrivateVD->getType();
917   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
918   if (needCleanups(N)) {
919     PrivateAddr = CGF.Builder.CreateElementBitCast(
920         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
921     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
922   }
923 }
924 
925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
926                           LValue BaseLV) {
927   BaseTy = BaseTy.getNonReferenceType();
928   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
929          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
930     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
931       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
932     } else {
933       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
934       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
935     }
936     BaseTy = BaseTy->getPointeeType();
937   }
938   return CGF.MakeAddrLValue(
939       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
940                                        CGF.ConvertTypeForMem(ElTy)),
941       BaseLV.getType(), BaseLV.getBaseInfo(),
942       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
943 }
944 
945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
946                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
947                           llvm::Value *Addr) {
948   Address Tmp = Address::invalid();
949   Address TopTmp = Address::invalid();
950   Address MostTopTmp = Address::invalid();
951   BaseTy = BaseTy.getNonReferenceType();
952   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
953          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
954     Tmp = CGF.CreateMemTemp(BaseTy);
955     if (TopTmp.isValid())
956       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
957     else
958       MostTopTmp = Tmp;
959     TopTmp = Tmp;
960     BaseTy = BaseTy->getPointeeType();
961   }
962   llvm::Type *Ty = BaseLVType;
963   if (Tmp.isValid())
964     Ty = Tmp.getElementType();
965   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
966   if (Tmp.isValid()) {
967     CGF.Builder.CreateStore(Addr, Tmp);
968     return MostTopTmp;
969   }
970   return Address(Addr, BaseLVAlignment);
971 }
972 
973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
974   const VarDecl *OrigVD = nullptr;
975   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
976     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
978       Base = TempOASE->getBase()->IgnoreParenImpCasts();
979     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
980       Base = TempASE->getBase()->IgnoreParenImpCasts();
981     DE = cast<DeclRefExpr>(Base);
982     OrigVD = cast<VarDecl>(DE->getDecl());
983   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
984     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
985     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986       Base = TempASE->getBase()->IgnoreParenImpCasts();
987     DE = cast<DeclRefExpr>(Base);
988     OrigVD = cast<VarDecl>(DE->getDecl());
989   }
990   return OrigVD;
991 }
992 
993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
994                                                Address PrivateAddr) {
995   const DeclRefExpr *DE;
996   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
997     BaseDecls.emplace_back(OrigVD);
998     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
999     LValue BaseLValue =
1000         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1001                     OriginalBaseLValue);
1002     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1003         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1004     llvm::Value *PrivatePointer =
1005         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1006             PrivateAddr.getPointer(),
1007             SharedAddresses[N].first.getAddress(CGF).getType());
1008     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1009     return castToBase(CGF, OrigVD->getType(),
1010                       SharedAddresses[N].first.getType(),
1011                       OriginalBaseLValue.getAddress(CGF).getType(),
1012                       OriginalBaseLValue.getAlignment(), Ptr);
1013   }
1014   BaseDecls.emplace_back(
1015       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1016   return PrivateAddr;
1017 }
1018 
1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1020   const OMPDeclareReductionDecl *DRD =
1021       getReductionInit(ClausesData[N].ReductionOp);
1022   return DRD && DRD->getInitializer();
1023 }
1024 
1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1026   return CGF.EmitLoadOfPointerLValue(
1027       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1028       getThreadIDVariable()->getType()->castAs<PointerType>());
1029 }
1030 
1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1032   if (!CGF.HaveInsertPoint())
1033     return;
1034   // 1.2.2 OpenMP Language Terminology
1035   // Structured block - An executable statement with a single entry at the
1036   // top and a single exit at the bottom.
1037   // The point of exit cannot be a branch out of the structured block.
1038   // longjmp() and throw() must not violate the entry/exit criteria.
1039   CGF.EHStack.pushTerminate();
1040   CodeGen(CGF);
1041   CGF.EHStack.popTerminate();
1042 }
1043 
1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1045     CodeGenFunction &CGF) {
1046   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1047                             getThreadIDVariable()->getType(),
1048                             AlignmentSource::Decl);
1049 }
1050 
1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1052                                        QualType FieldTy) {
1053   auto *Field = FieldDecl::Create(
1054       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1055       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1056       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1057   Field->setAccess(AS_public);
1058   DC->addDecl(Field);
1059   return Field;
1060 }
1061 
1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1063                                  StringRef Separator)
1064     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1065       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1066   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1067 
1068   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1069   OMPBuilder.initialize();
1070   loadOffloadInfoMetadata();
1071 }
1072 
1073 void CGOpenMPRuntime::clear() {
1074   InternalVars.clear();
1075   // Clean non-target variable declarations possibly used only in debug info.
1076   for (const auto &Data : EmittedNonTargetVariables) {
1077     if (!Data.getValue().pointsToAliveValue())
1078       continue;
1079     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1080     if (!GV)
1081       continue;
1082     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1083       continue;
1084     GV->eraseFromParent();
1085   }
1086 }
1087 
1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1089   SmallString<128> Buffer;
1090   llvm::raw_svector_ostream OS(Buffer);
1091   StringRef Sep = FirstSeparator;
1092   for (StringRef Part : Parts) {
1093     OS << Sep << Part;
1094     Sep = Separator;
1095   }
1096   return std::string(OS.str());
1097 }
1098 
1099 static llvm::Function *
1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1101                           const Expr *CombinerInitializer, const VarDecl *In,
1102                           const VarDecl *Out, bool IsCombiner) {
1103   // void .omp_combiner.(Ty *in, Ty *out);
1104   ASTContext &C = CGM.getContext();
1105   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1106   FunctionArgList Args;
1107   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1108                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1110                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1111   Args.push_back(&OmpOutParm);
1112   Args.push_back(&OmpInParm);
1113   const CGFunctionInfo &FnInfo =
1114       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1115   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1116   std::string Name = CGM.getOpenMPRuntime().getName(
1117       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1118   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1119                                     Name, &CGM.getModule());
1120   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1121   if (CGM.getLangOpts().Optimize) {
1122     Fn->removeFnAttr(llvm::Attribute::NoInline);
1123     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1124     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1125   }
1126   CodeGenFunction CGF(CGM);
1127   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1128   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1129   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1130                     Out->getLocation());
1131   CodeGenFunction::OMPPrivateScope Scope(CGF);
1132   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1133   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1134     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1135         .getAddress(CGF);
1136   });
1137   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1138   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1139     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1140         .getAddress(CGF);
1141   });
1142   (void)Scope.Privatize();
1143   if (!IsCombiner && Out->hasInit() &&
1144       !CGF.isTrivialInitializer(Out->getInit())) {
1145     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1146                          Out->getType().getQualifiers(),
1147                          /*IsInitializer=*/true);
1148   }
1149   if (CombinerInitializer)
1150     CGF.EmitIgnoredExpr(CombinerInitializer);
1151   Scope.ForceCleanup();
1152   CGF.FinishFunction();
1153   return Fn;
1154 }
1155 
1156 void CGOpenMPRuntime::emitUserDefinedReduction(
1157     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1158   if (UDRMap.count(D) > 0)
1159     return;
1160   llvm::Function *Combiner = emitCombinerOrInitializer(
1161       CGM, D->getType(), D->getCombiner(),
1162       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1163       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1164       /*IsCombiner=*/true);
1165   llvm::Function *Initializer = nullptr;
1166   if (const Expr *Init = D->getInitializer()) {
1167     Initializer = emitCombinerOrInitializer(
1168         CGM, D->getType(),
1169         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1170                                                                      : nullptr,
1171         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1172         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1173         /*IsCombiner=*/false);
1174   }
1175   UDRMap.try_emplace(D, Combiner, Initializer);
1176   if (CGF) {
1177     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1178     Decls.second.push_back(D);
1179   }
1180 }
1181 
1182 std::pair<llvm::Function *, llvm::Function *>
1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1184   auto I = UDRMap.find(D);
1185   if (I != UDRMap.end())
1186     return I->second;
1187   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1188   return UDRMap.lookup(D);
1189 }
1190 
1191 namespace {
1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1193 // Builder if one is present.
1194 struct PushAndPopStackRAII {
1195   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1196                       bool HasCancel)
1197       : OMPBuilder(OMPBuilder) {
1198     if (!OMPBuilder)
1199       return;
1200 
1201     // The following callback is the crucial part of clangs cleanup process.
1202     //
1203     // NOTE:
1204     // Once the OpenMPIRBuilder is used to create parallel regions (and
1205     // similar), the cancellation destination (Dest below) is determined via
1206     // IP. That means if we have variables to finalize we split the block at IP,
1207     // use the new block (=BB) as destination to build a JumpDest (via
1208     // getJumpDestInCurrentScope(BB)) which then is fed to
1209     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1210     // to push & pop an FinalizationInfo object.
1211     // The FiniCB will still be needed but at the point where the
1212     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1213     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1214       assert(IP.getBlock()->end() == IP.getPoint() &&
1215              "Clang CG should cause non-terminated block!");
1216       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1217       CGF.Builder.restoreIP(IP);
1218       CodeGenFunction::JumpDest Dest =
1219           CGF.getOMPCancelDestination(OMPD_parallel);
1220       CGF.EmitBranchThroughCleanup(Dest);
1221     };
1222 
1223     // TODO: Remove this once we emit parallel regions through the
1224     //       OpenMPIRBuilder as it can do this setup internally.
1225     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1226         {FiniCB, OMPD_parallel, HasCancel});
1227     OMPBuilder->pushFinalizationCB(std::move(FI));
1228   }
1229   ~PushAndPopStackRAII() {
1230     if (OMPBuilder)
1231       OMPBuilder->popFinalizationCB();
1232   }
1233   llvm::OpenMPIRBuilder *OMPBuilder;
1234 };
1235 } // namespace
1236 
1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1238     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1239     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1240     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1241   assert(ThreadIDVar->getType()->isPointerType() &&
1242          "thread id variable must be of type kmp_int32 *");
1243   CodeGenFunction CGF(CGM, true);
1244   bool HasCancel = false;
1245   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1246     HasCancel = OPD->hasCancel();
1247   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1248     HasCancel = OPD->hasCancel();
1249   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1250     HasCancel = OPSD->hasCancel();
1251   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1252     HasCancel = OPFD->hasCancel();
1253   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1254     HasCancel = OPFD->hasCancel();
1255   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1256     HasCancel = OPFD->hasCancel();
1257   else if (const auto *OPFD =
1258                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1259     HasCancel = OPFD->hasCancel();
1260   else if (const auto *OPFD =
1261                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1262     HasCancel = OPFD->hasCancel();
1263 
1264   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1265   //       parallel region to make cancellation barriers work properly.
1266   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1267   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1268   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1269                                     HasCancel, OutlinedHelperName);
1270   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1271   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1272 }
1273 
1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1276     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1277   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1278   return emitParallelOrTeamsOutlinedFunction(
1279       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1280 }
1281 
1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1285   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1286   return emitParallelOrTeamsOutlinedFunction(
1287       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1288 }
1289 
1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1291     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1292     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1294     bool Tied, unsigned &NumberOfParts) {
1295   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1296                                               PrePostActionTy &) {
1297     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1298     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1299     llvm::Value *TaskArgs[] = {
1300         UpLoc, ThreadID,
1301         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1302                                     TaskTVar->getType()->castAs<PointerType>())
1303             .getPointer(CGF)};
1304     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1305                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1306                         TaskArgs);
1307   };
1308   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1309                                                             UntiedCodeGen);
1310   CodeGen.setAction(Action);
1311   assert(!ThreadIDVar->getType()->isPointerType() &&
1312          "thread id variable must be of type kmp_int32 for tasks");
1313   const OpenMPDirectiveKind Region =
1314       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1315                                                       : OMPD_task;
1316   const CapturedStmt *CS = D.getCapturedStmt(Region);
1317   bool HasCancel = false;
1318   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1319     HasCancel = TD->hasCancel();
1320   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1321     HasCancel = TD->hasCancel();
1322   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1323     HasCancel = TD->hasCancel();
1324   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1325     HasCancel = TD->hasCancel();
1326 
1327   CodeGenFunction CGF(CGM, true);
1328   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1329                                         InnermostKind, HasCancel, Action);
1330   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1331   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1332   if (!Tied)
1333     NumberOfParts = Action.getNumberOfParts();
1334   return Res;
1335 }
1336 
1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1338                              const RecordDecl *RD, const CGRecordLayout &RL,
1339                              ArrayRef<llvm::Constant *> Data) {
1340   llvm::StructType *StructTy = RL.getLLVMType();
1341   unsigned PrevIdx = 0;
1342   ConstantInitBuilder CIBuilder(CGM);
1343   auto DI = Data.begin();
1344   for (const FieldDecl *FD : RD->fields()) {
1345     unsigned Idx = RL.getLLVMFieldNo(FD);
1346     // Fill the alignment.
1347     for (unsigned I = PrevIdx; I < Idx; ++I)
1348       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1349     PrevIdx = Idx + 1;
1350     Fields.add(*DI);
1351     ++DI;
1352   }
1353 }
1354 
1355 template <class... As>
1356 static llvm::GlobalVariable *
1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1358                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1359                    As &&... Args) {
1360   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1361   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1362   ConstantInitBuilder CIBuilder(CGM);
1363   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1364   buildStructValue(Fields, CGM, RD, RL, Data);
1365   return Fields.finishAndCreateGlobal(
1366       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1367       std::forward<As>(Args)...);
1368 }
1369 
1370 template <typename T>
1371 static void
1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1373                                          ArrayRef<llvm::Constant *> Data,
1374                                          T &Parent) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1378   buildStructValue(Fields, CGM, RD, RL, Data);
1379   Fields.finishAndAddTo(Parent);
1380 }
1381 
1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1383                                              bool AtCurrentPoint) {
1384   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1386 
1387   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1388   if (AtCurrentPoint) {
1389     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1390         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1391   } else {
1392     Elem.second.ServiceInsertPt =
1393         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1394     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1395   }
1396 }
1397 
1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1399   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1400   if (Elem.second.ServiceInsertPt) {
1401     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1402     Elem.second.ServiceInsertPt = nullptr;
1403     Ptr->eraseFromParent();
1404   }
1405 }
1406 
1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1408                                                   SourceLocation Loc,
1409                                                   SmallString<128> &Buffer) {
1410   llvm::raw_svector_ostream OS(Buffer);
1411   // Build debug location
1412   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1413   OS << ";" << PLoc.getFilename() << ";";
1414   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1415     OS << FD->getQualifiedNameAsString();
1416   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1417   return OS.str();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1421                                                  SourceLocation Loc,
1422                                                  unsigned Flags) {
1423   llvm::Constant *SrcLocStr;
1424   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1425       Loc.isInvalid()) {
1426     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1427   } else {
1428     std::string FunctionName = "";
1429     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1430       FunctionName = FD->getQualifiedNameAsString();
1431     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1432     const char *FileName = PLoc.getFilename();
1433     unsigned Line = PLoc.getLine();
1434     unsigned Column = PLoc.getColumn();
1435     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1436                                                 Line, Column);
1437   }
1438   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1439   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1440                                      Reserved2Flags);
1441 }
1442 
1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1444                                           SourceLocation Loc) {
1445   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1446   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1447   // the clang invariants used below might be broken.
1448   if (CGM.getLangOpts().OpenMPIRBuilder) {
1449     SmallString<128> Buffer;
1450     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1451     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1452         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1453     return OMPBuilder.getOrCreateThreadID(
1454         OMPBuilder.getOrCreateIdent(SrcLocStr));
1455   }
1456 
1457   llvm::Value *ThreadID = nullptr;
1458   // Check whether we've already cached a load of the thread id in this
1459   // function.
1460   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1461   if (I != OpenMPLocThreadIDMap.end()) {
1462     ThreadID = I->second.ThreadID;
1463     if (ThreadID != nullptr)
1464       return ThreadID;
1465   }
1466   // If exceptions are enabled, do not use parameter to avoid possible crash.
1467   if (auto *OMPRegionInfo =
1468           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1469     if (OMPRegionInfo->getThreadIDVariable()) {
1470       // Check if this an outlined function with thread id passed as argument.
1471       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1472       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1473       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1474           !CGF.getLangOpts().CXXExceptions ||
1475           CGF.Builder.GetInsertBlock() == TopBlock ||
1476           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1477           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1478               TopBlock ||
1479           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1480               CGF.Builder.GetInsertBlock()) {
1481         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1482         // If value loaded in entry block, cache it and use it everywhere in
1483         // function.
1484         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1485           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1486           Elem.second.ThreadID = ThreadID;
1487         }
1488         return ThreadID;
1489       }
1490     }
1491   }
1492 
1493   // This is not an outlined function region - need to call __kmpc_int32
1494   // kmpc_global_thread_num(ident_t *loc).
1495   // Generate thread id value and cache this value for use across the
1496   // function.
1497   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498   if (!Elem.second.ServiceInsertPt)
1499     setLocThreadIdInsertPt(CGF);
1500   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1501   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1502   llvm::CallInst *Call = CGF.Builder.CreateCall(
1503       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1504                                             OMPRTL___kmpc_global_thread_num),
1505       emitUpdateLocation(CGF, Loc));
1506   Call->setCallingConv(CGF.getRuntimeCC());
1507   Elem.second.ThreadID = Call;
1508   return Call;
1509 }
1510 
1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1512   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1513   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1514     clearLocThreadIdInsertPt(CGF);
1515     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1516   }
1517   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1518     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1519       UDRMap.erase(D);
1520     FunctionUDRMap.erase(CGF.CurFn);
1521   }
1522   auto I = FunctionUDMMap.find(CGF.CurFn);
1523   if (I != FunctionUDMMap.end()) {
1524     for(const auto *D : I->second)
1525       UDMMap.erase(D);
1526     FunctionUDMMap.erase(I);
1527   }
1528   LastprivateConditionalToTypes.erase(CGF.CurFn);
1529   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1530 }
1531 
1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1533   return OMPBuilder.IdentPtr;
1534 }
1535 
1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1537   if (!Kmpc_MicroTy) {
1538     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1539     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1540                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1541     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1542   }
1543   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1544 }
1545 
1546 llvm::FunctionCallee
1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1548   assert((IVSize == 32 || IVSize == 64) &&
1549          "IV size is not compatible with the omp runtime");
1550   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1551                                             : "__kmpc_for_static_init_4u")
1552                                 : (IVSigned ? "__kmpc_for_static_init_8"
1553                                             : "__kmpc_for_static_init_8u");
1554   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1555   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1556   llvm::Type *TypeParams[] = {
1557     getIdentTyPointerTy(),                     // loc
1558     CGM.Int32Ty,                               // tid
1559     CGM.Int32Ty,                               // schedtype
1560     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1561     PtrTy,                                     // p_lower
1562     PtrTy,                                     // p_upper
1563     PtrTy,                                     // p_stride
1564     ITy,                                       // incr
1565     ITy                                        // chunk
1566   };
1567   auto *FnTy =
1568       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1569   return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1574   assert((IVSize == 32 || IVSize == 64) &&
1575          "IV size is not compatible with the omp runtime");
1576   StringRef Name =
1577       IVSize == 32
1578           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1579           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1580   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1582                                CGM.Int32Ty,           // tid
1583                                CGM.Int32Ty,           // schedtype
1584                                ITy,                   // lower
1585                                ITy,                   // upper
1586                                ITy,                   // stride
1587                                ITy                    // chunk
1588   };
1589   auto *FnTy =
1590       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1591   return CGM.CreateRuntimeFunction(FnTy, Name);
1592 }
1593 
1594 llvm::FunctionCallee
1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1596   assert((IVSize == 32 || IVSize == 64) &&
1597          "IV size is not compatible with the omp runtime");
1598   StringRef Name =
1599       IVSize == 32
1600           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1601           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1602   llvm::Type *TypeParams[] = {
1603       getIdentTyPointerTy(), // loc
1604       CGM.Int32Ty,           // tid
1605   };
1606   auto *FnTy =
1607       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1608   return CGM.CreateRuntimeFunction(FnTy, Name);
1609 }
1610 
1611 llvm::FunctionCallee
1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1613   assert((IVSize == 32 || IVSize == 64) &&
1614          "IV size is not compatible with the omp runtime");
1615   StringRef Name =
1616       IVSize == 32
1617           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1618           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1621   llvm::Type *TypeParams[] = {
1622     getIdentTyPointerTy(),                     // loc
1623     CGM.Int32Ty,                               // tid
1624     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1625     PtrTy,                                     // p_lower
1626     PtrTy,                                     // p_upper
1627     PtrTy                                      // p_stride
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 /// Obtain information that uniquely identifies a target entry. This
1635 /// consists of the file and device IDs as well as line number associated with
1636 /// the relevant entry source location.
1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1638                                      unsigned &DeviceID, unsigned &FileID,
1639                                      unsigned &LineNum) {
1640   SourceManager &SM = C.getSourceManager();
1641 
1642   // The loc should be always valid and have a file ID (the user cannot use
1643   // #pragma directives in macros)
1644 
1645   assert(Loc.isValid() && "Source location is expected to be always valid.");
1646 
1647   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1648   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1649 
1650   llvm::sys::fs::UniqueID ID;
1651   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1652     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1653         << PLoc.getFilename() << EC.message();
1654 
1655   DeviceID = ID.getDevice();
1656   FileID = ID.getFile();
1657   LineNum = PLoc.getLine();
1658 }
1659 
1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1661   if (CGM.getLangOpts().OpenMPSimd)
1662     return Address::invalid();
1663   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1664       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1665   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1666               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1667                HasRequiresUnifiedSharedMemory))) {
1668     SmallString<64> PtrName;
1669     {
1670       llvm::raw_svector_ostream OS(PtrName);
1671       OS << CGM.getMangledName(GlobalDecl(VD));
1672       if (!VD->isExternallyVisible()) {
1673         unsigned DeviceID, FileID, Line;
1674         getTargetEntryUniqueInfo(CGM.getContext(),
1675                                  VD->getCanonicalDecl()->getBeginLoc(),
1676                                  DeviceID, FileID, Line);
1677         OS << llvm::format("_%x", FileID);
1678       }
1679       OS << "_decl_tgt_ref_ptr";
1680     }
1681     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1682     if (!Ptr) {
1683       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1684       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1685                                         PtrName);
1686 
1687       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1688       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1689 
1690       if (!CGM.getLangOpts().OpenMPIsDevice)
1691         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1692       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1693     }
1694     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1695   }
1696   return Address::invalid();
1697 }
1698 
1699 llvm::Constant *
1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1701   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1702          !CGM.getContext().getTargetInfo().isTLSSupported());
1703   // Lookup the entry, lazily creating it if necessary.
1704   std::string Suffix = getName({"cache", ""});
1705   return getOrCreateInternalVariable(
1706       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1707 }
1708 
1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1710                                                 const VarDecl *VD,
1711                                                 Address VDAddr,
1712                                                 SourceLocation Loc) {
1713   if (CGM.getLangOpts().OpenMPUseTLS &&
1714       CGM.getContext().getTargetInfo().isTLSSupported())
1715     return VDAddr;
1716 
1717   llvm::Type *VarTy = VDAddr.getElementType();
1718   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1719                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1720                                                        CGM.Int8PtrTy),
1721                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1722                          getOrCreateThreadPrivateCache(VD)};
1723   return Address(CGF.EmitRuntimeCall(
1724                      OMPBuilder.getOrCreateRuntimeFunction(
1725                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1726                      Args),
1727                  VDAddr.getAlignment());
1728 }
1729 
1730 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1731     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1732     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1733   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1734   // library.
1735   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1736   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1737                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1738                       OMPLoc);
1739   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1740   // to register constructor/destructor for variable.
1741   llvm::Value *Args[] = {
1742       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1743       Ctor, CopyCtor, Dtor};
1744   CGF.EmitRuntimeCall(
1745       OMPBuilder.getOrCreateRuntimeFunction(
1746           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1747       Args);
1748 }
1749 
1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1751     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1752     bool PerformInit, CodeGenFunction *CGF) {
1753   if (CGM.getLangOpts().OpenMPUseTLS &&
1754       CGM.getContext().getTargetInfo().isTLSSupported())
1755     return nullptr;
1756 
1757   VD = VD->getDefinition(CGM.getContext());
1758   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1759     QualType ASTTy = VD->getType();
1760 
1761     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1762     const Expr *Init = VD->getAnyInitializer();
1763     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1764       // Generate function that re-emits the declaration's initializer into the
1765       // threadprivate copy of the variable VD
1766       CodeGenFunction CtorCGF(CGM);
1767       FunctionArgList Args;
1768       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1769                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770                             ImplicitParamDecl::Other);
1771       Args.push_back(&Dst);
1772 
1773       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1774           CGM.getContext().VoidPtrTy, Args);
1775       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1776       std::string Name = getName({"__kmpc_global_ctor_", ""});
1777       llvm::Function *Fn =
1778           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1779       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1780                             Args, Loc, Loc);
1781       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1782           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1783           CGM.getContext().VoidPtrTy, Dst.getLocation());
1784       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1785       Arg = CtorCGF.Builder.CreateElementBitCast(
1786           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1787       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1788                                /*IsInitializer=*/true);
1789       ArgVal = CtorCGF.EmitLoadOfScalar(
1790           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1791           CGM.getContext().VoidPtrTy, Dst.getLocation());
1792       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1793       CtorCGF.FinishFunction();
1794       Ctor = Fn;
1795     }
1796     if (VD->getType().isDestructedType() != QualType::DK_none) {
1797       // Generate function that emits destructor call for the threadprivate copy
1798       // of the variable VD
1799       CodeGenFunction DtorCGF(CGM);
1800       FunctionArgList Args;
1801       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1802                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1803                             ImplicitParamDecl::Other);
1804       Args.push_back(&Dst);
1805 
1806       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1807           CGM.getContext().VoidTy, Args);
1808       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1809       std::string Name = getName({"__kmpc_global_dtor_", ""});
1810       llvm::Function *Fn =
1811           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1812       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1813       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1814                             Loc, Loc);
1815       // Create a scope with an artificial location for the body of this function.
1816       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1817       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1818           DtorCGF.GetAddrOfLocalVar(&Dst),
1819           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1820       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1821                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1822                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1823       DtorCGF.FinishFunction();
1824       Dtor = Fn;
1825     }
1826     // Do not emit init function if it is not required.
1827     if (!Ctor && !Dtor)
1828       return nullptr;
1829 
1830     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1831     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1832                                                /*isVarArg=*/false)
1833                            ->getPointerTo();
1834     // Copying constructor for the threadprivate variable.
1835     // Must be NULL - reserved by runtime, but currently it requires that this
1836     // parameter is always NULL. Otherwise it fires assertion.
1837     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1838     if (Ctor == nullptr) {
1839       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1840                                              /*isVarArg=*/false)
1841                          ->getPointerTo();
1842       Ctor = llvm::Constant::getNullValue(CtorTy);
1843     }
1844     if (Dtor == nullptr) {
1845       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1846                                              /*isVarArg=*/false)
1847                          ->getPointerTo();
1848       Dtor = llvm::Constant::getNullValue(DtorTy);
1849     }
1850     if (!CGF) {
1851       auto *InitFunctionTy =
1852           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1853       std::string Name = getName({"__omp_threadprivate_init_", ""});
1854       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1855           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1856       CodeGenFunction InitCGF(CGM);
1857       FunctionArgList ArgList;
1858       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1859                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1860                             Loc, Loc);
1861       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1862       InitCGF.FinishFunction();
1863       return InitFunction;
1864     }
1865     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1866   }
1867   return nullptr;
1868 }
1869 
1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1871                                                      llvm::GlobalVariable *Addr,
1872                                                      bool PerformInit) {
1873   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1874       !CGM.getLangOpts().OpenMPIsDevice)
1875     return false;
1876   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1877       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1878   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1879       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1880        HasRequiresUnifiedSharedMemory))
1881     return CGM.getLangOpts().OpenMPIsDevice;
1882   VD = VD->getDefinition(CGM.getContext());
1883   assert(VD && "Unknown VarDecl");
1884 
1885   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1886     return CGM.getLangOpts().OpenMPIsDevice;
1887 
1888   QualType ASTTy = VD->getType();
1889   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1890 
1891   // Produce the unique prefix to identify the new target regions. We use
1892   // the source location of the variable declaration which we know to not
1893   // conflict with any target region.
1894   unsigned DeviceID;
1895   unsigned FileID;
1896   unsigned Line;
1897   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1898   SmallString<128> Buffer, Out;
1899   {
1900     llvm::raw_svector_ostream OS(Buffer);
1901     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1902        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1903   }
1904 
1905   const Expr *Init = VD->getAnyInitializer();
1906   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1907     llvm::Constant *Ctor;
1908     llvm::Constant *ID;
1909     if (CGM.getLangOpts().OpenMPIsDevice) {
1910       // Generate function that re-emits the declaration's initializer into
1911       // the threadprivate copy of the variable VD
1912       CodeGenFunction CtorCGF(CGM);
1913 
1914       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1918       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1919       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1920                             FunctionArgList(), Loc, Loc);
1921       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1922       CtorCGF.EmitAnyExprToMem(Init,
1923                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1924                                Init->getType().getQualifiers(),
1925                                /*IsInitializer=*/true);
1926       CtorCGF.FinishFunction();
1927       Ctor = Fn;
1928       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1929       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1930     } else {
1931       Ctor = new llvm::GlobalVariable(
1932           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1933           llvm::GlobalValue::PrivateLinkage,
1934           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1935       ID = Ctor;
1936     }
1937 
1938     // Register the information for the entry associated with the constructor.
1939     Out.clear();
1940     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1941         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1942         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1943   }
1944   if (VD->getType().isDestructedType() != QualType::DK_none) {
1945     llvm::Constant *Dtor;
1946     llvm::Constant *ID;
1947     if (CGM.getLangOpts().OpenMPIsDevice) {
1948       // Generate function that emits destructor call for the threadprivate
1949       // copy of the variable VD
1950       CodeGenFunction DtorCGF(CGM);
1951 
1952       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1953       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1954       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1955           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1956       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1957       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1958                             FunctionArgList(), Loc, Loc);
1959       // Create a scope with an artificial location for the body of this
1960       // function.
1961       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1962       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1963                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1964                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1965       DtorCGF.FinishFunction();
1966       Dtor = Fn;
1967       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1968       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1969     } else {
1970       Dtor = new llvm::GlobalVariable(
1971           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1972           llvm::GlobalValue::PrivateLinkage,
1973           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1974       ID = Dtor;
1975     }
1976     // Register the information for the entry associated with the destructor.
1977     Out.clear();
1978     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1979         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1980         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1981   }
1982   return CGM.getLangOpts().OpenMPIsDevice;
1983 }
1984 
1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1986                                                           QualType VarType,
1987                                                           StringRef Name) {
1988   std::string Suffix = getName({"artificial", ""});
1989   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1990   llvm::Value *GAddr =
1991       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1992   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1993       CGM.getTarget().isTLSSupported()) {
1994     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
1995     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
1996   }
1997   std::string CacheSuffix = getName({"cache", ""});
1998   llvm::Value *Args[] = {
1999       emitUpdateLocation(CGF, SourceLocation()),
2000       getThreadID(CGF, SourceLocation()),
2001       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2002       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2003                                 /*isSigned=*/false),
2004       getOrCreateInternalVariable(
2005           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2006   return Address(
2007       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2008           CGF.EmitRuntimeCall(
2009               OMPBuilder.getOrCreateRuntimeFunction(
2010                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2011               Args),
2012           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2013       CGM.getContext().getTypeAlignInChars(VarType));
2014 }
2015 
2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2017                                    const RegionCodeGenTy &ThenGen,
2018                                    const RegionCodeGenTy &ElseGen) {
2019   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2020 
2021   // If the condition constant folds and can be elided, try to avoid emitting
2022   // the condition and the dead arm of the if/else.
2023   bool CondConstant;
2024   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2025     if (CondConstant)
2026       ThenGen(CGF);
2027     else
2028       ElseGen(CGF);
2029     return;
2030   }
2031 
2032   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2033   // emit the conditional branch.
2034   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2035   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2036   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2037   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2038 
2039   // Emit the 'then' code.
2040   CGF.EmitBlock(ThenBlock);
2041   ThenGen(CGF);
2042   CGF.EmitBranch(ContBlock);
2043   // Emit the 'else' code if present.
2044   // There is no need to emit line number for unconditional branch.
2045   (void)ApplyDebugLocation::CreateEmpty(CGF);
2046   CGF.EmitBlock(ElseBlock);
2047   ElseGen(CGF);
2048   // There is no need to emit line number for unconditional branch.
2049   (void)ApplyDebugLocation::CreateEmpty(CGF);
2050   CGF.EmitBranch(ContBlock);
2051   // Emit the continuation block for code after the if.
2052   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2053 }
2054 
2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2056                                        llvm::Function *OutlinedFn,
2057                                        ArrayRef<llvm::Value *> CapturedVars,
2058                                        const Expr *IfCond) {
2059   if (!CGF.HaveInsertPoint())
2060     return;
2061   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2062   auto &M = CGM.getModule();
2063   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2064                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2065     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2066     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067     llvm::Value *Args[] = {
2068         RTLoc,
2069         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2070         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2071     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2072     RealArgs.append(std::begin(Args), std::end(Args));
2073     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2074 
2075     llvm::FunctionCallee RTLFn =
2076         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2077     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2078   };
2079   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2082     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2083     // Build calls:
2084     // __kmpc_serialized_parallel(&Loc, GTid);
2085     llvm::Value *Args[] = {RTLoc, ThreadID};
2086     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087                             M, OMPRTL___kmpc_serialized_parallel),
2088                         Args);
2089 
2090     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2091     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2092     Address ZeroAddrBound =
2093         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2094                                          /*Name=*/".bound.zero.addr");
2095     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2096     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2097     // ThreadId for serialized parallels is 0.
2098     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2099     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2100     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2101     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2102 
2103     // __kmpc_end_serialized_parallel(&Loc, GTid);
2104     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2105     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2106                             M, OMPRTL___kmpc_end_serialized_parallel),
2107                         EndArgs);
2108   };
2109   if (IfCond) {
2110     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2111   } else {
2112     RegionCodeGenTy ThenRCG(ThenGen);
2113     ThenRCG(CGF);
2114   }
2115 }
2116 
2117 // If we're inside an (outlined) parallel region, use the region info's
2118 // thread-ID variable (it is passed in a first argument of the outlined function
2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2120 // regular serial code region, get thread ID by calling kmp_int32
2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2122 // return the address of that temp.
2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2124                                              SourceLocation Loc) {
2125   if (auto *OMPRegionInfo =
2126           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2127     if (OMPRegionInfo->getThreadIDVariable())
2128       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2129 
2130   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2131   QualType Int32Ty =
2132       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2133   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2134   CGF.EmitStoreOfScalar(ThreadID,
2135                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2136 
2137   return ThreadIDTemp;
2138 }
2139 
2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2141     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2142   SmallString<256> Buffer;
2143   llvm::raw_svector_ostream Out(Buffer);
2144   Out << Name;
2145   StringRef RuntimeName = Out.str();
2146   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2147   if (Elem.second) {
2148     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2149            "OMP internal variable has different type than requested");
2150     return &*Elem.second;
2151   }
2152 
2153   return Elem.second = new llvm::GlobalVariable(
2154              CGM.getModule(), Ty, /*IsConstant*/ false,
2155              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2156              Elem.first(), /*InsertBefore=*/nullptr,
2157              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2158 }
2159 
2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2161   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2162   std::string Name = getName({Prefix, "var"});
2163   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2164 }
2165 
2166 namespace {
2167 /// Common pre(post)-action for different OpenMP constructs.
2168 class CommonActionTy final : public PrePostActionTy {
2169   llvm::FunctionCallee EnterCallee;
2170   ArrayRef<llvm::Value *> EnterArgs;
2171   llvm::FunctionCallee ExitCallee;
2172   ArrayRef<llvm::Value *> ExitArgs;
2173   bool Conditional;
2174   llvm::BasicBlock *ContBlock = nullptr;
2175 
2176 public:
2177   CommonActionTy(llvm::FunctionCallee EnterCallee,
2178                  ArrayRef<llvm::Value *> EnterArgs,
2179                  llvm::FunctionCallee ExitCallee,
2180                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2181       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2182         ExitArgs(ExitArgs), Conditional(Conditional) {}
2183   void Enter(CodeGenFunction &CGF) override {
2184     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2185     if (Conditional) {
2186       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2187       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2188       ContBlock = CGF.createBasicBlock("omp_if.end");
2189       // Generate the branch (If-stmt)
2190       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2191       CGF.EmitBlock(ThenBlock);
2192     }
2193   }
2194   void Done(CodeGenFunction &CGF) {
2195     // Emit the rest of blocks/branches
2196     CGF.EmitBranch(ContBlock);
2197     CGF.EmitBlock(ContBlock, true);
2198   }
2199   void Exit(CodeGenFunction &CGF) override {
2200     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2201   }
2202 };
2203 } // anonymous namespace
2204 
2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2206                                          StringRef CriticalName,
2207                                          const RegionCodeGenTy &CriticalOpGen,
2208                                          SourceLocation Loc, const Expr *Hint) {
2209   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2210   // CriticalOpGen();
2211   // __kmpc_end_critical(ident_t *, gtid, Lock);
2212   // Prepare arguments and build a call to __kmpc_critical
2213   if (!CGF.HaveInsertPoint())
2214     return;
2215   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2216                          getCriticalRegionLock(CriticalName)};
2217   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2218                                                 std::end(Args));
2219   if (Hint) {
2220     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2221         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2222   }
2223   CommonActionTy Action(
2224       OMPBuilder.getOrCreateRuntimeFunction(
2225           CGM.getModule(),
2226           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2227       EnterArgs,
2228       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2229                                             OMPRTL___kmpc_end_critical),
2230       Args);
2231   CriticalOpGen.setAction(Action);
2232   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2233 }
2234 
2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2236                                        const RegionCodeGenTy &MasterOpGen,
2237                                        SourceLocation Loc) {
2238   if (!CGF.HaveInsertPoint())
2239     return;
2240   // if(__kmpc_master(ident_t *, gtid)) {
2241   //   MasterOpGen();
2242   //   __kmpc_end_master(ident_t *, gtid);
2243   // }
2244   // Prepare arguments and build a call to __kmpc_master
2245   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2246   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2247                             CGM.getModule(), OMPRTL___kmpc_master),
2248                         Args,
2249                         OMPBuilder.getOrCreateRuntimeFunction(
2250                             CGM.getModule(), OMPRTL___kmpc_end_master),
2251                         Args,
2252                         /*Conditional=*/true);
2253   MasterOpGen.setAction(Action);
2254   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2255   Action.Done(CGF);
2256 }
2257 
2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2259                                         SourceLocation Loc) {
2260   if (!CGF.HaveInsertPoint())
2261     return;
2262   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2263     OMPBuilder.CreateTaskyield(CGF.Builder);
2264   } else {
2265     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2266     llvm::Value *Args[] = {
2267         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2268         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2269     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2270                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2271                         Args);
2272   }
2273 
2274   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2275     Region->emitUntiedSwitch(CGF);
2276 }
2277 
2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2279                                           const RegionCodeGenTy &TaskgroupOpGen,
2280                                           SourceLocation Loc) {
2281   if (!CGF.HaveInsertPoint())
2282     return;
2283   // __kmpc_taskgroup(ident_t *, gtid);
2284   // TaskgroupOpGen();
2285   // __kmpc_end_taskgroup(ident_t *, gtid);
2286   // Prepare arguments and build a call to __kmpc_taskgroup
2287   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2288   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2289                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2290                         Args,
2291                         OMPBuilder.getOrCreateRuntimeFunction(
2292                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2293                         Args);
2294   TaskgroupOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2296 }
2297 
2298 /// Given an array of pointers to variables, project the address of a
2299 /// given variable.
2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2301                                       unsigned Index, const VarDecl *Var) {
2302   // Pull out the pointer to the variable.
2303   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2304   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2305 
2306   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2307   Addr = CGF.Builder.CreateElementBitCast(
2308       Addr, CGF.ConvertTypeForMem(Var->getType()));
2309   return Addr;
2310 }
2311 
2312 static llvm::Value *emitCopyprivateCopyFunction(
2313     CodeGenModule &CGM, llvm::Type *ArgsType,
2314     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2315     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2316     SourceLocation Loc) {
2317   ASTContext &C = CGM.getContext();
2318   // void copy_func(void *LHSArg, void *RHSArg);
2319   FunctionArgList Args;
2320   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2321                            ImplicitParamDecl::Other);
2322   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2323                            ImplicitParamDecl::Other);
2324   Args.push_back(&LHSArg);
2325   Args.push_back(&RHSArg);
2326   const auto &CGFI =
2327       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2328   std::string Name =
2329       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2330   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2331                                     llvm::GlobalValue::InternalLinkage, Name,
2332                                     &CGM.getModule());
2333   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2334   Fn->setDoesNotRecurse();
2335   CodeGenFunction CGF(CGM);
2336   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2337   // Dest = (void*[n])(LHSArg);
2338   // Src = (void*[n])(RHSArg);
2339   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2340       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2341       ArgsType), CGF.getPointerAlign());
2342   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2343       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2344       ArgsType), CGF.getPointerAlign());
2345   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2346   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2347   // ...
2348   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2349   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2350     const auto *DestVar =
2351         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2352     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2353 
2354     const auto *SrcVar =
2355         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2356     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2357 
2358     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2359     QualType Type = VD->getType();
2360     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2361   }
2362   CGF.FinishFunction();
2363   return Fn;
2364 }
2365 
2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2367                                        const RegionCodeGenTy &SingleOpGen,
2368                                        SourceLocation Loc,
2369                                        ArrayRef<const Expr *> CopyprivateVars,
2370                                        ArrayRef<const Expr *> SrcExprs,
2371                                        ArrayRef<const Expr *> DstExprs,
2372                                        ArrayRef<const Expr *> AssignmentOps) {
2373   if (!CGF.HaveInsertPoint())
2374     return;
2375   assert(CopyprivateVars.size() == SrcExprs.size() &&
2376          CopyprivateVars.size() == DstExprs.size() &&
2377          CopyprivateVars.size() == AssignmentOps.size());
2378   ASTContext &C = CGM.getContext();
2379   // int32 did_it = 0;
2380   // if(__kmpc_single(ident_t *, gtid)) {
2381   //   SingleOpGen();
2382   //   __kmpc_end_single(ident_t *, gtid);
2383   //   did_it = 1;
2384   // }
2385   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2386   // <copy_func>, did_it);
2387 
2388   Address DidIt = Address::invalid();
2389   if (!CopyprivateVars.empty()) {
2390     // int32 did_it = 0;
2391     QualType KmpInt32Ty =
2392         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2393     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2394     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2395   }
2396   // Prepare arguments and build a call to __kmpc_single
2397   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2398   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2399                             CGM.getModule(), OMPRTL___kmpc_single),
2400                         Args,
2401                         OMPBuilder.getOrCreateRuntimeFunction(
2402                             CGM.getModule(), OMPRTL___kmpc_end_single),
2403                         Args,
2404                         /*Conditional=*/true);
2405   SingleOpGen.setAction(Action);
2406   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2407   if (DidIt.isValid()) {
2408     // did_it = 1;
2409     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2410   }
2411   Action.Done(CGF);
2412   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2413   // <copy_func>, did_it);
2414   if (DidIt.isValid()) {
2415     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2416     QualType CopyprivateArrayTy = C.getConstantArrayType(
2417         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2418         /*IndexTypeQuals=*/0);
2419     // Create a list of all private variables for copyprivate.
2420     Address CopyprivateList =
2421         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2422     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2423       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2424       CGF.Builder.CreateStore(
2425           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2426               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2427               CGF.VoidPtrTy),
2428           Elem);
2429     }
2430     // Build function that copies private values from single region to all other
2431     // threads in the corresponding parallel region.
2432     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2433         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2434         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2435     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2436     Address CL =
2437       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2438                                                       CGF.VoidPtrTy);
2439     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2440     llvm::Value *Args[] = {
2441         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2442         getThreadID(CGF, Loc),        // i32 <gtid>
2443         BufSize,                      // size_t <buf_size>
2444         CL.getPointer(),              // void *<copyprivate list>
2445         CpyFn,                        // void (*) (void *, void *) <copy_func>
2446         DidItVal                      // i32 did_it
2447     };
2448     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2449                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2450                         Args);
2451   }
2452 }
2453 
2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2455                                         const RegionCodeGenTy &OrderedOpGen,
2456                                         SourceLocation Loc, bool IsThreads) {
2457   if (!CGF.HaveInsertPoint())
2458     return;
2459   // __kmpc_ordered(ident_t *, gtid);
2460   // OrderedOpGen();
2461   // __kmpc_end_ordered(ident_t *, gtid);
2462   // Prepare arguments and build a call to __kmpc_ordered
2463   if (IsThreads) {
2464     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2465     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2466                               CGM.getModule(), OMPRTL___kmpc_ordered),
2467                           Args,
2468                           OMPBuilder.getOrCreateRuntimeFunction(
2469                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2470                           Args);
2471     OrderedOpGen.setAction(Action);
2472     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2473     return;
2474   }
2475   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2476 }
2477 
2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2479   unsigned Flags;
2480   if (Kind == OMPD_for)
2481     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2482   else if (Kind == OMPD_sections)
2483     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2484   else if (Kind == OMPD_single)
2485     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2486   else if (Kind == OMPD_barrier)
2487     Flags = OMP_IDENT_BARRIER_EXPL;
2488   else
2489     Flags = OMP_IDENT_BARRIER_IMPL;
2490   return Flags;
2491 }
2492 
2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2494     CodeGenFunction &CGF, const OMPLoopDirective &S,
2495     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2496   // Check if the loop directive is actually a doacross loop directive. In this
2497   // case choose static, 1 schedule.
2498   if (llvm::any_of(
2499           S.getClausesOfKind<OMPOrderedClause>(),
2500           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2501     ScheduleKind = OMPC_SCHEDULE_static;
2502     // Chunk size is 1 in this case.
2503     llvm::APInt ChunkSize(32, 1);
2504     ChunkExpr = IntegerLiteral::Create(
2505         CGF.getContext(), ChunkSize,
2506         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2507         SourceLocation());
2508   }
2509 }
2510 
2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2512                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2513                                       bool ForceSimpleCall) {
2514   // Check if we should use the OMPBuilder
2515   auto *OMPRegionInfo =
2516       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2517   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2518     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2519         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2520     return;
2521   }
2522 
2523   if (!CGF.HaveInsertPoint())
2524     return;
2525   // Build call __kmpc_cancel_barrier(loc, thread_id);
2526   // Build call __kmpc_barrier(loc, thread_id);
2527   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2528   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2529   // thread_id);
2530   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2531                          getThreadID(CGF, Loc)};
2532   if (OMPRegionInfo) {
2533     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2534       llvm::Value *Result = CGF.EmitRuntimeCall(
2535           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2536                                                 OMPRTL___kmpc_cancel_barrier),
2537           Args);
2538       if (EmitChecks) {
2539         // if (__kmpc_cancel_barrier()) {
2540         //   exit from construct;
2541         // }
2542         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2543         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2544         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2545         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2546         CGF.EmitBlock(ExitBB);
2547         //   exit from construct;
2548         CodeGenFunction::JumpDest CancelDestination =
2549             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2550         CGF.EmitBranchThroughCleanup(CancelDestination);
2551         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2552       }
2553       return;
2554     }
2555   }
2556   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2557                           CGM.getModule(), OMPRTL___kmpc_barrier),
2558                       Args);
2559 }
2560 
2561 /// Map the OpenMP loop schedule to the runtime enumeration.
2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2563                                           bool Chunked, bool Ordered) {
2564   switch (ScheduleKind) {
2565   case OMPC_SCHEDULE_static:
2566     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2567                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2568   case OMPC_SCHEDULE_dynamic:
2569     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2570   case OMPC_SCHEDULE_guided:
2571     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2572   case OMPC_SCHEDULE_runtime:
2573     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2574   case OMPC_SCHEDULE_auto:
2575     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2576   case OMPC_SCHEDULE_unknown:
2577     assert(!Chunked && "chunk was specified but schedule kind not known");
2578     return Ordered ? OMP_ord_static : OMP_sch_static;
2579   }
2580   llvm_unreachable("Unexpected runtime schedule");
2581 }
2582 
2583 /// Map the OpenMP distribute schedule to the runtime enumeration.
2584 static OpenMPSchedType
2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2586   // only static is allowed for dist_schedule
2587   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2588 }
2589 
2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2591                                          bool Chunked) const {
2592   OpenMPSchedType Schedule =
2593       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2594   return Schedule == OMP_sch_static;
2595 }
2596 
2597 bool CGOpenMPRuntime::isStaticNonchunked(
2598     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2599   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2600   return Schedule == OMP_dist_sch_static;
2601 }
2602 
2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2604                                       bool Chunked) const {
2605   OpenMPSchedType Schedule =
2606       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2607   return Schedule == OMP_sch_static_chunked;
2608 }
2609 
2610 bool CGOpenMPRuntime::isStaticChunked(
2611     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2612   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2613   return Schedule == OMP_dist_sch_static_chunked;
2614 }
2615 
2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2617   OpenMPSchedType Schedule =
2618       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2619   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2620   return Schedule != OMP_sch_static;
2621 }
2622 
2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2624                                   OpenMPScheduleClauseModifier M1,
2625                                   OpenMPScheduleClauseModifier M2) {
2626   int Modifier = 0;
2627   switch (M1) {
2628   case OMPC_SCHEDULE_MODIFIER_monotonic:
2629     Modifier = OMP_sch_modifier_monotonic;
2630     break;
2631   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2632     Modifier = OMP_sch_modifier_nonmonotonic;
2633     break;
2634   case OMPC_SCHEDULE_MODIFIER_simd:
2635     if (Schedule == OMP_sch_static_chunked)
2636       Schedule = OMP_sch_static_balanced_chunked;
2637     break;
2638   case OMPC_SCHEDULE_MODIFIER_last:
2639   case OMPC_SCHEDULE_MODIFIER_unknown:
2640     break;
2641   }
2642   switch (M2) {
2643   case OMPC_SCHEDULE_MODIFIER_monotonic:
2644     Modifier = OMP_sch_modifier_monotonic;
2645     break;
2646   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2647     Modifier = OMP_sch_modifier_nonmonotonic;
2648     break;
2649   case OMPC_SCHEDULE_MODIFIER_simd:
2650     if (Schedule == OMP_sch_static_chunked)
2651       Schedule = OMP_sch_static_balanced_chunked;
2652     break;
2653   case OMPC_SCHEDULE_MODIFIER_last:
2654   case OMPC_SCHEDULE_MODIFIER_unknown:
2655     break;
2656   }
2657   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2658   // If the static schedule kind is specified or if the ordered clause is
2659   // specified, and if the nonmonotonic modifier is not specified, the effect is
2660   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2661   // modifier is specified, the effect is as if the nonmonotonic modifier is
2662   // specified.
2663   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2664     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2665           Schedule == OMP_sch_static_balanced_chunked ||
2666           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2667           Schedule == OMP_dist_sch_static_chunked ||
2668           Schedule == OMP_dist_sch_static))
2669       Modifier = OMP_sch_modifier_nonmonotonic;
2670   }
2671   return Schedule | Modifier;
2672 }
2673 
2674 void CGOpenMPRuntime::emitForDispatchInit(
2675     CodeGenFunction &CGF, SourceLocation Loc,
2676     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2677     bool Ordered, const DispatchRTInput &DispatchValues) {
2678   if (!CGF.HaveInsertPoint())
2679     return;
2680   OpenMPSchedType Schedule = getRuntimeSchedule(
2681       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2682   assert(Ordered ||
2683          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2684           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2685           Schedule != OMP_sch_static_balanced_chunked));
2686   // Call __kmpc_dispatch_init(
2687   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2688   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2689   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2690 
2691   // If the Chunk was not specified in the clause - use default value 1.
2692   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2693                                             : CGF.Builder.getIntN(IVSize, 1);
2694   llvm::Value *Args[] = {
2695       emitUpdateLocation(CGF, Loc),
2696       getThreadID(CGF, Loc),
2697       CGF.Builder.getInt32(addMonoNonMonoModifier(
2698           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2699       DispatchValues.LB,                                     // Lower
2700       DispatchValues.UB,                                     // Upper
2701       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2702       Chunk                                                  // Chunk
2703   };
2704   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2705 }
2706 
2707 static void emitForStaticInitCall(
2708     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2709     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2710     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2711     const CGOpenMPRuntime::StaticRTInput &Values) {
2712   if (!CGF.HaveInsertPoint())
2713     return;
2714 
2715   assert(!Values.Ordered);
2716   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2717          Schedule == OMP_sch_static_balanced_chunked ||
2718          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2719          Schedule == OMP_dist_sch_static ||
2720          Schedule == OMP_dist_sch_static_chunked);
2721 
2722   // Call __kmpc_for_static_init(
2723   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2724   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2725   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2726   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2727   llvm::Value *Chunk = Values.Chunk;
2728   if (Chunk == nullptr) {
2729     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2730             Schedule == OMP_dist_sch_static) &&
2731            "expected static non-chunked schedule");
2732     // If the Chunk was not specified in the clause - use default value 1.
2733     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2734   } else {
2735     assert((Schedule == OMP_sch_static_chunked ||
2736             Schedule == OMP_sch_static_balanced_chunked ||
2737             Schedule == OMP_ord_static_chunked ||
2738             Schedule == OMP_dist_sch_static_chunked) &&
2739            "expected static chunked schedule");
2740   }
2741   llvm::Value *Args[] = {
2742       UpdateLocation,
2743       ThreadId,
2744       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2745                                                   M2)), // Schedule type
2746       Values.IL.getPointer(),                           // &isLastIter
2747       Values.LB.getPointer(),                           // &LB
2748       Values.UB.getPointer(),                           // &UB
2749       Values.ST.getPointer(),                           // &Stride
2750       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2751       Chunk                                             // Chunk
2752   };
2753   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2754 }
2755 
2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2757                                         SourceLocation Loc,
2758                                         OpenMPDirectiveKind DKind,
2759                                         const OpenMPScheduleTy &ScheduleKind,
2760                                         const StaticRTInput &Values) {
2761   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2762       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2763   assert(isOpenMPWorksharingDirective(DKind) &&
2764          "Expected loop-based or sections-based directive.");
2765   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2766                                              isOpenMPLoopDirective(DKind)
2767                                                  ? OMP_IDENT_WORK_LOOP
2768                                                  : OMP_IDENT_WORK_SECTIONS);
2769   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2770   llvm::FunctionCallee StaticInitFunction =
2771       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2772   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2773   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2774                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2775 }
2776 
2777 void CGOpenMPRuntime::emitDistributeStaticInit(
2778     CodeGenFunction &CGF, SourceLocation Loc,
2779     OpenMPDistScheduleClauseKind SchedKind,
2780     const CGOpenMPRuntime::StaticRTInput &Values) {
2781   OpenMPSchedType ScheduleNum =
2782       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2783   llvm::Value *UpdatedLocation =
2784       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2785   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2786   llvm::FunctionCallee StaticInitFunction =
2787       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2788   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2789                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2790                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2791 }
2792 
2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2794                                           SourceLocation Loc,
2795                                           OpenMPDirectiveKind DKind) {
2796   if (!CGF.HaveInsertPoint())
2797     return;
2798   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2799   llvm::Value *Args[] = {
2800       emitUpdateLocation(CGF, Loc,
2801                          isOpenMPDistributeDirective(DKind)
2802                              ? OMP_IDENT_WORK_DISTRIBUTE
2803                              : isOpenMPLoopDirective(DKind)
2804                                    ? OMP_IDENT_WORK_LOOP
2805                                    : OMP_IDENT_WORK_SECTIONS),
2806       getThreadID(CGF, Loc)};
2807   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2808   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2809                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2810                       Args);
2811 }
2812 
2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2814                                                  SourceLocation Loc,
2815                                                  unsigned IVSize,
2816                                                  bool IVSigned) {
2817   if (!CGF.HaveInsertPoint())
2818     return;
2819   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2820   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2821   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2822 }
2823 
2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2825                                           SourceLocation Loc, unsigned IVSize,
2826                                           bool IVSigned, Address IL,
2827                                           Address LB, Address UB,
2828                                           Address ST) {
2829   // Call __kmpc_dispatch_next(
2830   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2831   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2832   //          kmp_int[32|64] *p_stride);
2833   llvm::Value *Args[] = {
2834       emitUpdateLocation(CGF, Loc),
2835       getThreadID(CGF, Loc),
2836       IL.getPointer(), // &isLastIter
2837       LB.getPointer(), // &Lower
2838       UB.getPointer(), // &Upper
2839       ST.getPointer()  // &Stride
2840   };
2841   llvm::Value *Call =
2842       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2843   return CGF.EmitScalarConversion(
2844       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2845       CGF.getContext().BoolTy, Loc);
2846 }
2847 
2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2849                                            llvm::Value *NumThreads,
2850                                            SourceLocation Loc) {
2851   if (!CGF.HaveInsertPoint())
2852     return;
2853   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2854   llvm::Value *Args[] = {
2855       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2856       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2857   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2858                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2859                       Args);
2860 }
2861 
2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2863                                          ProcBindKind ProcBind,
2864                                          SourceLocation Loc) {
2865   if (!CGF.HaveInsertPoint())
2866     return;
2867   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2868   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2869   llvm::Value *Args[] = {
2870       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2871       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2872   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2873                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2874                       Args);
2875 }
2876 
2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2878                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2879   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2880     OMPBuilder.CreateFlush(CGF.Builder);
2881   } else {
2882     if (!CGF.HaveInsertPoint())
2883       return;
2884     // Build call void __kmpc_flush(ident_t *loc)
2885     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2886                             CGM.getModule(), OMPRTL___kmpc_flush),
2887                         emitUpdateLocation(CGF, Loc));
2888   }
2889 }
2890 
2891 namespace {
2892 /// Indexes of fields for type kmp_task_t.
2893 enum KmpTaskTFields {
2894   /// List of shared variables.
2895   KmpTaskTShareds,
2896   /// Task routine.
2897   KmpTaskTRoutine,
2898   /// Partition id for the untied tasks.
2899   KmpTaskTPartId,
2900   /// Function with call of destructors for private variables.
2901   Data1,
2902   /// Task priority.
2903   Data2,
2904   /// (Taskloops only) Lower bound.
2905   KmpTaskTLowerBound,
2906   /// (Taskloops only) Upper bound.
2907   KmpTaskTUpperBound,
2908   /// (Taskloops only) Stride.
2909   KmpTaskTStride,
2910   /// (Taskloops only) Is last iteration flag.
2911   KmpTaskTLastIter,
2912   /// (Taskloops only) Reduction data.
2913   KmpTaskTReductions,
2914 };
2915 } // anonymous namespace
2916 
2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2918   return OffloadEntriesTargetRegion.empty() &&
2919          OffloadEntriesDeviceGlobalVar.empty();
2920 }
2921 
2922 /// Initialize target region entry.
2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2924     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2925                                     StringRef ParentName, unsigned LineNum,
2926                                     unsigned Order) {
2927   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2928                                              "only required for the device "
2929                                              "code generation.");
2930   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2931       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2932                                    OMPTargetRegionEntryTargetRegion);
2933   ++OffloadingEntriesNum;
2934 }
2935 
2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2937     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2938                                   StringRef ParentName, unsigned LineNum,
2939                                   llvm::Constant *Addr, llvm::Constant *ID,
2940                                   OMPTargetRegionEntryKind Flags) {
2941   // If we are emitting code for a target, the entry is already initialized,
2942   // only has to be registered.
2943   if (CGM.getLangOpts().OpenMPIsDevice) {
2944     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
2945       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2946           DiagnosticsEngine::Error,
2947           "Unable to find target region on line '%0' in the device code.");
2948       CGM.getDiags().Report(DiagID) << LineNum;
2949       return;
2950     }
2951     auto &Entry =
2952         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2953     assert(Entry.isValid() && "Entry not initialized!");
2954     Entry.setAddress(Addr);
2955     Entry.setID(ID);
2956     Entry.setFlags(Flags);
2957   } else {
2958     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2959     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2960     ++OffloadingEntriesNum;
2961   }
2962 }
2963 
2964 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2965     unsigned DeviceID, unsigned FileID, StringRef ParentName,
2966     unsigned LineNum) const {
2967   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2968   if (PerDevice == OffloadEntriesTargetRegion.end())
2969     return false;
2970   auto PerFile = PerDevice->second.find(FileID);
2971   if (PerFile == PerDevice->second.end())
2972     return false;
2973   auto PerParentName = PerFile->second.find(ParentName);
2974   if (PerParentName == PerFile->second.end())
2975     return false;
2976   auto PerLine = PerParentName->second.find(LineNum);
2977   if (PerLine == PerParentName->second.end())
2978     return false;
2979   // Fail if this entry is already registered.
2980   if (PerLine->second.getAddress() || PerLine->second.getID())
2981     return false;
2982   return true;
2983 }
2984 
2985 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2986     const OffloadTargetRegionEntryInfoActTy &Action) {
2987   // Scan all target region entries and perform the provided action.
2988   for (const auto &D : OffloadEntriesTargetRegion)
2989     for (const auto &F : D.second)
2990       for (const auto &P : F.second)
2991         for (const auto &L : P.second)
2992           Action(D.first, F.first, P.first(), L.first, L.second);
2993 }
2994 
2995 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2996     initializeDeviceGlobalVarEntryInfo(StringRef Name,
2997                                        OMPTargetGlobalVarEntryKind Flags,
2998                                        unsigned Order) {
2999   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3000                                              "only required for the device "
3001                                              "code generation.");
3002   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3003   ++OffloadingEntriesNum;
3004 }
3005 
3006 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3007     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3008                                      CharUnits VarSize,
3009                                      OMPTargetGlobalVarEntryKind Flags,
3010                                      llvm::GlobalValue::LinkageTypes Linkage) {
3011   if (CGM.getLangOpts().OpenMPIsDevice) {
3012     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3013     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3014            "Entry not initialized!");
3015     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3016            "Resetting with the new address.");
3017     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3018       if (Entry.getVarSize().isZero()) {
3019         Entry.setVarSize(VarSize);
3020         Entry.setLinkage(Linkage);
3021       }
3022       return;
3023     }
3024     Entry.setVarSize(VarSize);
3025     Entry.setLinkage(Linkage);
3026     Entry.setAddress(Addr);
3027   } else {
3028     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3029       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3030       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3031              "Entry not initialized!");
3032       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3033              "Resetting with the new address.");
3034       if (Entry.getVarSize().isZero()) {
3035         Entry.setVarSize(VarSize);
3036         Entry.setLinkage(Linkage);
3037       }
3038       return;
3039     }
3040     OffloadEntriesDeviceGlobalVar.try_emplace(
3041         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3042     ++OffloadingEntriesNum;
3043   }
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3047     actOnDeviceGlobalVarEntriesInfo(
3048         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3049   // Scan all target region entries and perform the provided action.
3050   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3051     Action(E.getKey(), E.getValue());
3052 }
3053 
3054 void CGOpenMPRuntime::createOffloadEntry(
3055     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3056     llvm::GlobalValue::LinkageTypes Linkage) {
3057   StringRef Name = Addr->getName();
3058   llvm::Module &M = CGM.getModule();
3059   llvm::LLVMContext &C = M.getContext();
3060 
3061   // Create constant string with the name.
3062   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3063 
3064   std::string StringName = getName({"omp_offloading", "entry_name"});
3065   auto *Str = new llvm::GlobalVariable(
3066       M, StrPtrInit->getType(), /*isConstant=*/true,
3067       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3068   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3069 
3070   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3071                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3072                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3073                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3074                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3075   std::string EntryName = getName({"omp_offloading", "entry", ""});
3076   llvm::GlobalVariable *Entry = createGlobalStruct(
3077       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3078       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3079 
3080   // The entry has to be created in the section the linker expects it to be.
3081   Entry->setSection("omp_offloading_entries");
3082 }
3083 
3084 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3085   // Emit the offloading entries and metadata so that the device codegen side
3086   // can easily figure out what to emit. The produced metadata looks like
3087   // this:
3088   //
3089   // !omp_offload.info = !{!1, ...}
3090   //
3091   // Right now we only generate metadata for function that contain target
3092   // regions.
3093 
3094   // If we are in simd mode or there are no entries, we don't need to do
3095   // anything.
3096   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3097     return;
3098 
3099   llvm::Module &M = CGM.getModule();
3100   llvm::LLVMContext &C = M.getContext();
3101   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3102                          SourceLocation, StringRef>,
3103               16>
3104       OrderedEntries(OffloadEntriesInfoManager.size());
3105   llvm::SmallVector<StringRef, 16> ParentFunctions(
3106       OffloadEntriesInfoManager.size());
3107 
3108   // Auxiliary methods to create metadata values and strings.
3109   auto &&GetMDInt = [this](unsigned V) {
3110     return llvm::ConstantAsMetadata::get(
3111         llvm::ConstantInt::get(CGM.Int32Ty, V));
3112   };
3113 
3114   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3115 
3116   // Create the offloading info metadata node.
3117   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3118 
3119   // Create function that emits metadata for each target region entry;
3120   auto &&TargetRegionMetadataEmitter =
3121       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3122        &GetMDString](
3123           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3124           unsigned Line,
3125           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3126         // Generate metadata for target regions. Each entry of this metadata
3127         // contains:
3128         // - Entry 0 -> Kind of this type of metadata (0).
3129         // - Entry 1 -> Device ID of the file where the entry was identified.
3130         // - Entry 2 -> File ID of the file where the entry was identified.
3131         // - Entry 3 -> Mangled name of the function where the entry was
3132         // identified.
3133         // - Entry 4 -> Line in the file where the entry was identified.
3134         // - Entry 5 -> Order the entry was created.
3135         // The first element of the metadata node is the kind.
3136         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3137                                  GetMDInt(FileID),      GetMDString(ParentName),
3138                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3139 
3140         SourceLocation Loc;
3141         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3142                   E = CGM.getContext().getSourceManager().fileinfo_end();
3143              I != E; ++I) {
3144           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3145               I->getFirst()->getUniqueID().getFile() == FileID) {
3146             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3147                 I->getFirst(), Line, 1);
3148             break;
3149           }
3150         }
3151         // Save this entry in the right position of the ordered entries array.
3152         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3153         ParentFunctions[E.getOrder()] = ParentName;
3154 
3155         // Add metadata to the named metadata node.
3156         MD->addOperand(llvm::MDNode::get(C, Ops));
3157       };
3158 
3159   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3160       TargetRegionMetadataEmitter);
3161 
3162   // Create function that emits metadata for each device global variable entry;
3163   auto &&DeviceGlobalVarMetadataEmitter =
3164       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3165        MD](StringRef MangledName,
3166            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3167                &E) {
3168         // Generate metadata for global variables. Each entry of this metadata
3169         // contains:
3170         // - Entry 0 -> Kind of this type of metadata (1).
3171         // - Entry 1 -> Mangled name of the variable.
3172         // - Entry 2 -> Declare target kind.
3173         // - Entry 3 -> Order the entry was created.
3174         // The first element of the metadata node is the kind.
3175         llvm::Metadata *Ops[] = {
3176             GetMDInt(E.getKind()), GetMDString(MangledName),
3177             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3178 
3179         // Save this entry in the right position of the ordered entries array.
3180         OrderedEntries[E.getOrder()] =
3181             std::make_tuple(&E, SourceLocation(), MangledName);
3182 
3183         // Add metadata to the named metadata node.
3184         MD->addOperand(llvm::MDNode::get(C, Ops));
3185       };
3186 
3187   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3188       DeviceGlobalVarMetadataEmitter);
3189 
3190   for (const auto &E : OrderedEntries) {
3191     assert(std::get<0>(E) && "All ordered entries must exist!");
3192     if (const auto *CE =
3193             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3194                 std::get<0>(E))) {
3195       if (!CE->getID() || !CE->getAddress()) {
3196         // Do not blame the entry if the parent funtion is not emitted.
3197         StringRef FnName = ParentFunctions[CE->getOrder()];
3198         if (!CGM.GetGlobalValue(FnName))
3199           continue;
3200         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3201             DiagnosticsEngine::Error,
3202             "Offloading entry for target region in %0 is incorrect: either the "
3203             "address or the ID is invalid.");
3204         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3205         continue;
3206       }
3207       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3208                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3209     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3210                                              OffloadEntryInfoDeviceGlobalVar>(
3211                    std::get<0>(E))) {
3212       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3213           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3214               CE->getFlags());
3215       switch (Flags) {
3216       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3217         if (CGM.getLangOpts().OpenMPIsDevice &&
3218             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3219           continue;
3220         if (!CE->getAddress()) {
3221           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3222               DiagnosticsEngine::Error, "Offloading entry for declare target "
3223                                         "variable %0 is incorrect: the "
3224                                         "address is invalid.");
3225           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3226           continue;
3227         }
3228         // The vaiable has no definition - no need to add the entry.
3229         if (CE->getVarSize().isZero())
3230           continue;
3231         break;
3232       }
3233       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3234         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3235                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3236                "Declaret target link address is set.");
3237         if (CGM.getLangOpts().OpenMPIsDevice)
3238           continue;
3239         if (!CE->getAddress()) {
3240           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3241               DiagnosticsEngine::Error,
3242               "Offloading entry for declare target variable is incorrect: the "
3243               "address is invalid.");
3244           CGM.getDiags().Report(DiagID);
3245           continue;
3246         }
3247         break;
3248       }
3249       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3250                          CE->getVarSize().getQuantity(), Flags,
3251                          CE->getLinkage());
3252     } else {
3253       llvm_unreachable("Unsupported entry kind.");
3254     }
3255   }
3256 }
3257 
3258 /// Loads all the offload entries information from the host IR
3259 /// metadata.
3260 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3261   // If we are in target mode, load the metadata from the host IR. This code has
3262   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3263 
3264   if (!CGM.getLangOpts().OpenMPIsDevice)
3265     return;
3266 
3267   if (CGM.getLangOpts().OMPHostIRFile.empty())
3268     return;
3269 
3270   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3271   if (auto EC = Buf.getError()) {
3272     CGM.getDiags().Report(diag::err_cannot_open_file)
3273         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3274     return;
3275   }
3276 
3277   llvm::LLVMContext C;
3278   auto ME = expectedToErrorOrAndEmitErrors(
3279       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3280 
3281   if (auto EC = ME.getError()) {
3282     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3283         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3284     CGM.getDiags().Report(DiagID)
3285         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3286     return;
3287   }
3288 
3289   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3290   if (!MD)
3291     return;
3292 
3293   for (llvm::MDNode *MN : MD->operands()) {
3294     auto &&GetMDInt = [MN](unsigned Idx) {
3295       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3296       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3297     };
3298 
3299     auto &&GetMDString = [MN](unsigned Idx) {
3300       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3301       return V->getString();
3302     };
3303 
3304     switch (GetMDInt(0)) {
3305     default:
3306       llvm_unreachable("Unexpected metadata!");
3307       break;
3308     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3309         OffloadingEntryInfoTargetRegion:
3310       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3311           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3312           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3313           /*Order=*/GetMDInt(5));
3314       break;
3315     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3316         OffloadingEntryInfoDeviceGlobalVar:
3317       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3318           /*MangledName=*/GetMDString(1),
3319           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3320               /*Flags=*/GetMDInt(2)),
3321           /*Order=*/GetMDInt(3));
3322       break;
3323     }
3324   }
3325 }
3326 
3327 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3328   if (!KmpRoutineEntryPtrTy) {
3329     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3330     ASTContext &C = CGM.getContext();
3331     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3332     FunctionProtoType::ExtProtoInfo EPI;
3333     KmpRoutineEntryPtrQTy = C.getPointerType(
3334         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3335     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3336   }
3337 }
3338 
3339 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3340   // Make sure the type of the entry is already created. This is the type we
3341   // have to create:
3342   // struct __tgt_offload_entry{
3343   //   void      *addr;       // Pointer to the offload entry info.
3344   //                          // (function or global)
3345   //   char      *name;       // Name of the function or global.
3346   //   size_t     size;       // Size of the entry info (0 if it a function).
3347   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3348   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3349   // };
3350   if (TgtOffloadEntryQTy.isNull()) {
3351     ASTContext &C = CGM.getContext();
3352     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3353     RD->startDefinition();
3354     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3355     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3356     addFieldToRecordDecl(C, RD, C.getSizeType());
3357     addFieldToRecordDecl(
3358         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3359     addFieldToRecordDecl(
3360         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3361     RD->completeDefinition();
3362     RD->addAttr(PackedAttr::CreateImplicit(C));
3363     TgtOffloadEntryQTy = C.getRecordType(RD);
3364   }
3365   return TgtOffloadEntryQTy;
3366 }
3367 
3368 namespace {
3369 struct PrivateHelpersTy {
3370   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3371                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3372       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3373         PrivateElemInit(PrivateElemInit) {}
3374   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3375   const Expr *OriginalRef = nullptr;
3376   const VarDecl *Original = nullptr;
3377   const VarDecl *PrivateCopy = nullptr;
3378   const VarDecl *PrivateElemInit = nullptr;
3379   bool isLocalPrivate() const {
3380     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3381   }
3382 };
3383 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3384 } // anonymous namespace
3385 
3386 static bool isAllocatableDecl(const VarDecl *VD) {
3387   const VarDecl *CVD = VD->getCanonicalDecl();
3388   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3389     return false;
3390   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3391   // Use the default allocation.
3392   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3393             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3394            !AA->getAllocator());
3395 }
3396 
3397 static RecordDecl *
3398 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3399   if (!Privates.empty()) {
3400     ASTContext &C = CGM.getContext();
3401     // Build struct .kmp_privates_t. {
3402     //         /*  private vars  */
3403     //       };
3404     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3405     RD->startDefinition();
3406     for (const auto &Pair : Privates) {
3407       const VarDecl *VD = Pair.second.Original;
3408       QualType Type = VD->getType().getNonReferenceType();
3409       // If the private variable is a local variable with lvalue ref type,
3410       // allocate the pointer instead of the pointee type.
3411       if (Pair.second.isLocalPrivate()) {
3412         if (VD->getType()->isLValueReferenceType())
3413           Type = C.getPointerType(Type);
3414         if (isAllocatableDecl(VD))
3415           Type = C.getPointerType(Type);
3416       }
3417       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3418       if (VD->hasAttrs()) {
3419         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3420              E(VD->getAttrs().end());
3421              I != E; ++I)
3422           FD->addAttr(*I);
3423       }
3424     }
3425     RD->completeDefinition();
3426     return RD;
3427   }
3428   return nullptr;
3429 }
3430 
3431 static RecordDecl *
3432 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3433                          QualType KmpInt32Ty,
3434                          QualType KmpRoutineEntryPointerQTy) {
3435   ASTContext &C = CGM.getContext();
3436   // Build struct kmp_task_t {
3437   //         void *              shareds;
3438   //         kmp_routine_entry_t routine;
3439   //         kmp_int32           part_id;
3440   //         kmp_cmplrdata_t data1;
3441   //         kmp_cmplrdata_t data2;
3442   // For taskloops additional fields:
3443   //         kmp_uint64          lb;
3444   //         kmp_uint64          ub;
3445   //         kmp_int64           st;
3446   //         kmp_int32           liter;
3447   //         void *              reductions;
3448   //       };
3449   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3450   UD->startDefinition();
3451   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3452   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3453   UD->completeDefinition();
3454   QualType KmpCmplrdataTy = C.getRecordType(UD);
3455   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3456   RD->startDefinition();
3457   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3458   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3459   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3460   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3461   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3462   if (isOpenMPTaskLoopDirective(Kind)) {
3463     QualType KmpUInt64Ty =
3464         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3465     QualType KmpInt64Ty =
3466         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3467     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3468     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3469     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3470     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3471     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3472   }
3473   RD->completeDefinition();
3474   return RD;
3475 }
3476 
3477 static RecordDecl *
3478 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3479                                      ArrayRef<PrivateDataTy> Privates) {
3480   ASTContext &C = CGM.getContext();
3481   // Build struct kmp_task_t_with_privates {
3482   //         kmp_task_t task_data;
3483   //         .kmp_privates_t. privates;
3484   //       };
3485   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3486   RD->startDefinition();
3487   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3488   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3489     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3490   RD->completeDefinition();
3491   return RD;
3492 }
3493 
3494 /// Emit a proxy function which accepts kmp_task_t as the second
3495 /// argument.
3496 /// \code
3497 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3498 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3499 ///   For taskloops:
3500 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3501 ///   tt->reductions, tt->shareds);
3502 ///   return 0;
3503 /// }
3504 /// \endcode
3505 static llvm::Function *
3506 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3507                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3508                       QualType KmpTaskTWithPrivatesPtrQTy,
3509                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3510                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3511                       llvm::Value *TaskPrivatesMap) {
3512   ASTContext &C = CGM.getContext();
3513   FunctionArgList Args;
3514   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3515                             ImplicitParamDecl::Other);
3516   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3517                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3518                                 ImplicitParamDecl::Other);
3519   Args.push_back(&GtidArg);
3520   Args.push_back(&TaskTypeArg);
3521   const auto &TaskEntryFnInfo =
3522       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3523   llvm::FunctionType *TaskEntryTy =
3524       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3525   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3526   auto *TaskEntry = llvm::Function::Create(
3527       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3528   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3529   TaskEntry->setDoesNotRecurse();
3530   CodeGenFunction CGF(CGM);
3531   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3532                     Loc, Loc);
3533 
3534   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3535   // tt,
3536   // For taskloops:
3537   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3538   // tt->task_data.shareds);
3539   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3540       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3541   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3542       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3543       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3544   const auto *KmpTaskTWithPrivatesQTyRD =
3545       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3546   LValue Base =
3547       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3548   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3549   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3550   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3551   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3552 
3553   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3554   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3555   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3556       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3557       CGF.ConvertTypeForMem(SharedsPtrTy));
3558 
3559   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3560   llvm::Value *PrivatesParam;
3561   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3562     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3563     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3564         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3565   } else {
3566     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3567   }
3568 
3569   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3570                                TaskPrivatesMap,
3571                                CGF.Builder
3572                                    .CreatePointerBitCastOrAddrSpaceCast(
3573                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3574                                    .getPointer()};
3575   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3576                                           std::end(CommonArgs));
3577   if (isOpenMPTaskLoopDirective(Kind)) {
3578     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3579     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3580     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3581     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3582     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3583     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3584     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3585     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3586     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3587     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3588     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3589     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3590     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3591     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3592     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3593     CallArgs.push_back(LBParam);
3594     CallArgs.push_back(UBParam);
3595     CallArgs.push_back(StParam);
3596     CallArgs.push_back(LIParam);
3597     CallArgs.push_back(RParam);
3598   }
3599   CallArgs.push_back(SharedsParam);
3600 
3601   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3602                                                   CallArgs);
3603   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3604                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3605   CGF.FinishFunction();
3606   return TaskEntry;
3607 }
3608 
3609 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3610                                             SourceLocation Loc,
3611                                             QualType KmpInt32Ty,
3612                                             QualType KmpTaskTWithPrivatesPtrQTy,
3613                                             QualType KmpTaskTWithPrivatesQTy) {
3614   ASTContext &C = CGM.getContext();
3615   FunctionArgList Args;
3616   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3617                             ImplicitParamDecl::Other);
3618   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3619                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3620                                 ImplicitParamDecl::Other);
3621   Args.push_back(&GtidArg);
3622   Args.push_back(&TaskTypeArg);
3623   const auto &DestructorFnInfo =
3624       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3625   llvm::FunctionType *DestructorFnTy =
3626       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3627   std::string Name =
3628       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3629   auto *DestructorFn =
3630       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3631                              Name, &CGM.getModule());
3632   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3633                                     DestructorFnInfo);
3634   DestructorFn->setDoesNotRecurse();
3635   CodeGenFunction CGF(CGM);
3636   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3637                     Args, Loc, Loc);
3638 
3639   LValue Base = CGF.EmitLoadOfPointerLValue(
3640       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3641       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3642   const auto *KmpTaskTWithPrivatesQTyRD =
3643       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3644   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3645   Base = CGF.EmitLValueForField(Base, *FI);
3646   for (const auto *Field :
3647        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3648     if (QualType::DestructionKind DtorKind =
3649             Field->getType().isDestructedType()) {
3650       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3651       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3652     }
3653   }
3654   CGF.FinishFunction();
3655   return DestructorFn;
3656 }
3657 
3658 /// Emit a privates mapping function for correct handling of private and
3659 /// firstprivate variables.
3660 /// \code
3661 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3662 /// **noalias priv1,...,  <tyn> **noalias privn) {
3663 ///   *priv1 = &.privates.priv1;
3664 ///   ...;
3665 ///   *privn = &.privates.privn;
3666 /// }
3667 /// \endcode
3668 static llvm::Value *
3669 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3670                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3671                                ArrayRef<PrivateDataTy> Privates) {
3672   ASTContext &C = CGM.getContext();
3673   FunctionArgList Args;
3674   ImplicitParamDecl TaskPrivatesArg(
3675       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3676       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3677       ImplicitParamDecl::Other);
3678   Args.push_back(&TaskPrivatesArg);
3679   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3680   unsigned Counter = 1;
3681   for (const Expr *E : Data.PrivateVars) {
3682     Args.push_back(ImplicitParamDecl::Create(
3683         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3684         C.getPointerType(C.getPointerType(E->getType()))
3685             .withConst()
3686             .withRestrict(),
3687         ImplicitParamDecl::Other));
3688     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3689     PrivateVarsPos[VD] = Counter;
3690     ++Counter;
3691   }
3692   for (const Expr *E : Data.FirstprivateVars) {
3693     Args.push_back(ImplicitParamDecl::Create(
3694         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3695         C.getPointerType(C.getPointerType(E->getType()))
3696             .withConst()
3697             .withRestrict(),
3698         ImplicitParamDecl::Other));
3699     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3700     PrivateVarsPos[VD] = Counter;
3701     ++Counter;
3702   }
3703   for (const Expr *E : Data.LastprivateVars) {
3704     Args.push_back(ImplicitParamDecl::Create(
3705         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3706         C.getPointerType(C.getPointerType(E->getType()))
3707             .withConst()
3708             .withRestrict(),
3709         ImplicitParamDecl::Other));
3710     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3711     PrivateVarsPos[VD] = Counter;
3712     ++Counter;
3713   }
3714   for (const VarDecl *VD : Data.PrivateLocals) {
3715     QualType Ty = VD->getType().getNonReferenceType();
3716     if (VD->getType()->isLValueReferenceType())
3717       Ty = C.getPointerType(Ty);
3718     if (isAllocatableDecl(VD))
3719       Ty = C.getPointerType(Ty);
3720     Args.push_back(ImplicitParamDecl::Create(
3721         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3722         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3723         ImplicitParamDecl::Other));
3724     PrivateVarsPos[VD] = Counter;
3725     ++Counter;
3726   }
3727   const auto &TaskPrivatesMapFnInfo =
3728       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3729   llvm::FunctionType *TaskPrivatesMapTy =
3730       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3731   std::string Name =
3732       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3733   auto *TaskPrivatesMap = llvm::Function::Create(
3734       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3735       &CGM.getModule());
3736   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3737                                     TaskPrivatesMapFnInfo);
3738   if (CGM.getLangOpts().Optimize) {
3739     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3740     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3741     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3742   }
3743   CodeGenFunction CGF(CGM);
3744   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3745                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3746 
3747   // *privi = &.privates.privi;
3748   LValue Base = CGF.EmitLoadOfPointerLValue(
3749       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3750       TaskPrivatesArg.getType()->castAs<PointerType>());
3751   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3752   Counter = 0;
3753   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3754     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3755     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3756     LValue RefLVal =
3757         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3758     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3759         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3760     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3761     ++Counter;
3762   }
3763   CGF.FinishFunction();
3764   return TaskPrivatesMap;
3765 }
3766 
3767 /// Emit initialization for private variables in task-based directives.
3768 static void emitPrivatesInit(CodeGenFunction &CGF,
3769                              const OMPExecutableDirective &D,
3770                              Address KmpTaskSharedsPtr, LValue TDBase,
3771                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3772                              QualType SharedsTy, QualType SharedsPtrTy,
3773                              const OMPTaskDataTy &Data,
3774                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3775   ASTContext &C = CGF.getContext();
3776   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3777   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3778   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3779                                  ? OMPD_taskloop
3780                                  : OMPD_task;
3781   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3782   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3783   LValue SrcBase;
3784   bool IsTargetTask =
3785       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3786       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3787   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3788   // PointersArray, SizesArray, and MappersArray. The original variables for
3789   // these arrays are not captured and we get their addresses explicitly.
3790   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3791       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3792     SrcBase = CGF.MakeAddrLValue(
3793         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3794             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3795         SharedsTy);
3796   }
3797   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3798   for (const PrivateDataTy &Pair : Privates) {
3799     // Do not initialize private locals.
3800     if (Pair.second.isLocalPrivate()) {
3801       ++FI;
3802       continue;
3803     }
3804     const VarDecl *VD = Pair.second.PrivateCopy;
3805     const Expr *Init = VD->getAnyInitializer();
3806     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3807                              !CGF.isTrivialInitializer(Init)))) {
3808       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3809       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3810         const VarDecl *OriginalVD = Pair.second.Original;
3811         // Check if the variable is the target-based BasePointersArray,
3812         // PointersArray, SizesArray, or MappersArray.
3813         LValue SharedRefLValue;
3814         QualType Type = PrivateLValue.getType();
3815         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3816         if (IsTargetTask && !SharedField) {
3817           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3818                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3819                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3820                          ->getNumParams() == 0 &&
3821                  isa<TranslationUnitDecl>(
3822                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3823                          ->getDeclContext()) &&
3824                  "Expected artificial target data variable.");
3825           SharedRefLValue =
3826               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3827         } else if (ForDup) {
3828           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3829           SharedRefLValue = CGF.MakeAddrLValue(
3830               Address(SharedRefLValue.getPointer(CGF),
3831                       C.getDeclAlign(OriginalVD)),
3832               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3833               SharedRefLValue.getTBAAInfo());
3834         } else if (CGF.LambdaCaptureFields.count(
3835                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3836                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3837           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3838         } else {
3839           // Processing for implicitly captured variables.
3840           InlinedOpenMPRegionRAII Region(
3841               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3842               /*HasCancel=*/false);
3843           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3844         }
3845         if (Type->isArrayType()) {
3846           // Initialize firstprivate array.
3847           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3848             // Perform simple memcpy.
3849             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3850           } else {
3851             // Initialize firstprivate array using element-by-element
3852             // initialization.
3853             CGF.EmitOMPAggregateAssign(
3854                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3855                 Type,
3856                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3857                                                   Address SrcElement) {
3858                   // Clean up any temporaries needed by the initialization.
3859                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3860                   InitScope.addPrivate(
3861                       Elem, [SrcElement]() -> Address { return SrcElement; });
3862                   (void)InitScope.Privatize();
3863                   // Emit initialization for single element.
3864                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3865                       CGF, &CapturesInfo);
3866                   CGF.EmitAnyExprToMem(Init, DestElement,
3867                                        Init->getType().getQualifiers(),
3868                                        /*IsInitializer=*/false);
3869                 });
3870           }
3871         } else {
3872           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3873           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3874             return SharedRefLValue.getAddress(CGF);
3875           });
3876           (void)InitScope.Privatize();
3877           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3878           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3879                              /*capturedByInit=*/false);
3880         }
3881       } else {
3882         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3883       }
3884     }
3885     ++FI;
3886   }
3887 }
3888 
3889 /// Check if duplication function is required for taskloops.
3890 static bool checkInitIsRequired(CodeGenFunction &CGF,
3891                                 ArrayRef<PrivateDataTy> Privates) {
3892   bool InitRequired = false;
3893   for (const PrivateDataTy &Pair : Privates) {
3894     if (Pair.second.isLocalPrivate())
3895       continue;
3896     const VarDecl *VD = Pair.second.PrivateCopy;
3897     const Expr *Init = VD->getAnyInitializer();
3898     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3899                                     !CGF.isTrivialInitializer(Init));
3900     if (InitRequired)
3901       break;
3902   }
3903   return InitRequired;
3904 }
3905 
3906 
3907 /// Emit task_dup function (for initialization of
3908 /// private/firstprivate/lastprivate vars and last_iter flag)
3909 /// \code
3910 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3911 /// lastpriv) {
3912 /// // setup lastprivate flag
3913 ///    task_dst->last = lastpriv;
3914 /// // could be constructor calls here...
3915 /// }
3916 /// \endcode
3917 static llvm::Value *
3918 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3919                     const OMPExecutableDirective &D,
3920                     QualType KmpTaskTWithPrivatesPtrQTy,
3921                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3922                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3923                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3924                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3925   ASTContext &C = CGM.getContext();
3926   FunctionArgList Args;
3927   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3928                            KmpTaskTWithPrivatesPtrQTy,
3929                            ImplicitParamDecl::Other);
3930   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3931                            KmpTaskTWithPrivatesPtrQTy,
3932                            ImplicitParamDecl::Other);
3933   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3934                                 ImplicitParamDecl::Other);
3935   Args.push_back(&DstArg);
3936   Args.push_back(&SrcArg);
3937   Args.push_back(&LastprivArg);
3938   const auto &TaskDupFnInfo =
3939       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3940   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3941   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3942   auto *TaskDup = llvm::Function::Create(
3943       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3944   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3945   TaskDup->setDoesNotRecurse();
3946   CodeGenFunction CGF(CGM);
3947   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3948                     Loc);
3949 
3950   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3951       CGF.GetAddrOfLocalVar(&DstArg),
3952       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3953   // task_dst->liter = lastpriv;
3954   if (WithLastIter) {
3955     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3956     LValue Base = CGF.EmitLValueForField(
3957         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3958     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3959     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3960         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3961     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3962   }
3963 
3964   // Emit initial values for private copies (if any).
3965   assert(!Privates.empty());
3966   Address KmpTaskSharedsPtr = Address::invalid();
3967   if (!Data.FirstprivateVars.empty()) {
3968     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3969         CGF.GetAddrOfLocalVar(&SrcArg),
3970         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3971     LValue Base = CGF.EmitLValueForField(
3972         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3973     KmpTaskSharedsPtr = Address(
3974         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3975                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3976                                                   KmpTaskTShareds)),
3977                              Loc),
3978         CGM.getNaturalTypeAlignment(SharedsTy));
3979   }
3980   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3981                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3982   CGF.FinishFunction();
3983   return TaskDup;
3984 }
3985 
3986 /// Checks if destructor function is required to be generated.
3987 /// \return true if cleanups are required, false otherwise.
3988 static bool
3989 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3990                          ArrayRef<PrivateDataTy> Privates) {
3991   for (const PrivateDataTy &P : Privates) {
3992     if (P.second.isLocalPrivate())
3993       continue;
3994     QualType Ty = P.second.Original->getType().getNonReferenceType();
3995     if (Ty.isDestructedType())
3996       return true;
3997   }
3998   return false;
3999 }
4000 
4001 namespace {
4002 /// Loop generator for OpenMP iterator expression.
4003 class OMPIteratorGeneratorScope final
4004     : public CodeGenFunction::OMPPrivateScope {
4005   CodeGenFunction &CGF;
4006   const OMPIteratorExpr *E = nullptr;
4007   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4008   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4009   OMPIteratorGeneratorScope() = delete;
4010   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4011 
4012 public:
4013   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4014       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4015     if (!E)
4016       return;
4017     SmallVector<llvm::Value *, 4> Uppers;
4018     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4019       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4020       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4021       addPrivate(VD, [&CGF, VD]() {
4022         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4023       });
4024       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4025       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4026         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4027                                  "counter.addr");
4028       });
4029     }
4030     Privatize();
4031 
4032     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4033       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4034       LValue CLVal =
4035           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4036                              HelperData.CounterVD->getType());
4037       // Counter = 0;
4038       CGF.EmitStoreOfScalar(
4039           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4040           CLVal);
4041       CodeGenFunction::JumpDest &ContDest =
4042           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4043       CodeGenFunction::JumpDest &ExitDest =
4044           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4045       // N = <number-of_iterations>;
4046       llvm::Value *N = Uppers[I];
4047       // cont:
4048       // if (Counter < N) goto body; else goto exit;
4049       CGF.EmitBlock(ContDest.getBlock());
4050       auto *CVal =
4051           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4052       llvm::Value *Cmp =
4053           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4054               ? CGF.Builder.CreateICmpSLT(CVal, N)
4055               : CGF.Builder.CreateICmpULT(CVal, N);
4056       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4057       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4058       // body:
4059       CGF.EmitBlock(BodyBB);
4060       // Iteri = Begini + Counter * Stepi;
4061       CGF.EmitIgnoredExpr(HelperData.Update);
4062     }
4063   }
4064   ~OMPIteratorGeneratorScope() {
4065     if (!E)
4066       return;
4067     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4068       // Counter = Counter + 1;
4069       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4070       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4071       // goto cont;
4072       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4073       // exit:
4074       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4075     }
4076   }
4077 };
4078 } // namespace
4079 
4080 static std::pair<llvm::Value *, llvm::Value *>
4081 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4082   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4083   llvm::Value *Addr;
4084   if (OASE) {
4085     const Expr *Base = OASE->getBase();
4086     Addr = CGF.EmitScalarExpr(Base);
4087   } else {
4088     Addr = CGF.EmitLValue(E).getPointer(CGF);
4089   }
4090   llvm::Value *SizeVal;
4091   QualType Ty = E->getType();
4092   if (OASE) {
4093     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4094     for (const Expr *SE : OASE->getDimensions()) {
4095       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4096       Sz = CGF.EmitScalarConversion(
4097           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4098       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4099     }
4100   } else if (const auto *ASE =
4101                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4102     LValue UpAddrLVal =
4103         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4104     llvm::Value *UpAddr =
4105         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4106     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4107     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4108     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4109   } else {
4110     SizeVal = CGF.getTypeSize(Ty);
4111   }
4112   return std::make_pair(Addr, SizeVal);
4113 }
4114 
4115 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4116 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4117   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4118   if (KmpTaskAffinityInfoTy.isNull()) {
4119     RecordDecl *KmpAffinityInfoRD =
4120         C.buildImplicitRecord("kmp_task_affinity_info_t");
4121     KmpAffinityInfoRD->startDefinition();
4122     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4123     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4124     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4125     KmpAffinityInfoRD->completeDefinition();
4126     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4127   }
4128 }
4129 
4130 CGOpenMPRuntime::TaskResultTy
4131 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4132                               const OMPExecutableDirective &D,
4133                               llvm::Function *TaskFunction, QualType SharedsTy,
4134                               Address Shareds, const OMPTaskDataTy &Data) {
4135   ASTContext &C = CGM.getContext();
4136   llvm::SmallVector<PrivateDataTy, 4> Privates;
4137   // Aggregate privates and sort them by the alignment.
4138   const auto *I = Data.PrivateCopies.begin();
4139   for (const Expr *E : Data.PrivateVars) {
4140     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4141     Privates.emplace_back(
4142         C.getDeclAlign(VD),
4143         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4144                          /*PrivateElemInit=*/nullptr));
4145     ++I;
4146   }
4147   I = Data.FirstprivateCopies.begin();
4148   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4149   for (const Expr *E : Data.FirstprivateVars) {
4150     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4151     Privates.emplace_back(
4152         C.getDeclAlign(VD),
4153         PrivateHelpersTy(
4154             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4155             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4156     ++I;
4157     ++IElemInitRef;
4158   }
4159   I = Data.LastprivateCopies.begin();
4160   for (const Expr *E : Data.LastprivateVars) {
4161     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4162     Privates.emplace_back(
4163         C.getDeclAlign(VD),
4164         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4165                          /*PrivateElemInit=*/nullptr));
4166     ++I;
4167   }
4168   for (const VarDecl *VD : Data.PrivateLocals) {
4169     if (isAllocatableDecl(VD))
4170       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4171     else
4172       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4173   }
4174   llvm::stable_sort(Privates,
4175                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4176                       return L.first > R.first;
4177                     });
4178   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4179   // Build type kmp_routine_entry_t (if not built yet).
4180   emitKmpRoutineEntryT(KmpInt32Ty);
4181   // Build type kmp_task_t (if not built yet).
4182   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4183     if (SavedKmpTaskloopTQTy.isNull()) {
4184       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4185           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4186     }
4187     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4188   } else {
4189     assert((D.getDirectiveKind() == OMPD_task ||
4190             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4191             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4192            "Expected taskloop, task or target directive");
4193     if (SavedKmpTaskTQTy.isNull()) {
4194       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4195           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4196     }
4197     KmpTaskTQTy = SavedKmpTaskTQTy;
4198   }
4199   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4200   // Build particular struct kmp_task_t for the given task.
4201   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4202       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4203   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4204   QualType KmpTaskTWithPrivatesPtrQTy =
4205       C.getPointerType(KmpTaskTWithPrivatesQTy);
4206   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4207   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4208       KmpTaskTWithPrivatesTy->getPointerTo();
4209   llvm::Value *KmpTaskTWithPrivatesTySize =
4210       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4211   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4212 
4213   // Emit initial values for private copies (if any).
4214   llvm::Value *TaskPrivatesMap = nullptr;
4215   llvm::Type *TaskPrivatesMapTy =
4216       std::next(TaskFunction->arg_begin(), 3)->getType();
4217   if (!Privates.empty()) {
4218     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4219     TaskPrivatesMap =
4220         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4221     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4222         TaskPrivatesMap, TaskPrivatesMapTy);
4223   } else {
4224     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4225         cast<llvm::PointerType>(TaskPrivatesMapTy));
4226   }
4227   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4228   // kmp_task_t *tt);
4229   llvm::Function *TaskEntry = emitProxyTaskFunction(
4230       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4231       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4232       TaskPrivatesMap);
4233 
4234   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4235   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4236   // kmp_routine_entry_t *task_entry);
4237   // Task flags. Format is taken from
4238   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4239   // description of kmp_tasking_flags struct.
4240   enum {
4241     TiedFlag = 0x1,
4242     FinalFlag = 0x2,
4243     DestructorsFlag = 0x8,
4244     PriorityFlag = 0x20,
4245     DetachableFlag = 0x40,
4246   };
4247   unsigned Flags = Data.Tied ? TiedFlag : 0;
4248   bool NeedsCleanup = false;
4249   if (!Privates.empty()) {
4250     NeedsCleanup =
4251         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4252     if (NeedsCleanup)
4253       Flags = Flags | DestructorsFlag;
4254   }
4255   if (Data.Priority.getInt())
4256     Flags = Flags | PriorityFlag;
4257   if (D.hasClausesOfKind<OMPDetachClause>())
4258     Flags = Flags | DetachableFlag;
4259   llvm::Value *TaskFlags =
4260       Data.Final.getPointer()
4261           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4262                                      CGF.Builder.getInt32(FinalFlag),
4263                                      CGF.Builder.getInt32(/*C=*/0))
4264           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4265   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4266   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4267   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4268       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4269       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4270           TaskEntry, KmpRoutineEntryPtrTy)};
4271   llvm::Value *NewTask;
4272   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4273     // Check if we have any device clause associated with the directive.
4274     const Expr *Device = nullptr;
4275     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4276       Device = C->getDevice();
4277     // Emit device ID if any otherwise use default value.
4278     llvm::Value *DeviceID;
4279     if (Device)
4280       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4281                                            CGF.Int64Ty, /*isSigned=*/true);
4282     else
4283       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4284     AllocArgs.push_back(DeviceID);
4285     NewTask = CGF.EmitRuntimeCall(
4286         OMPBuilder.getOrCreateRuntimeFunction(
4287             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4288         AllocArgs);
4289   } else {
4290     NewTask =
4291         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4292                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4293                             AllocArgs);
4294   }
4295   // Emit detach clause initialization.
4296   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4297   // task_descriptor);
4298   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4299     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4300     LValue EvtLVal = CGF.EmitLValue(Evt);
4301 
4302     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4303     // int gtid, kmp_task_t *task);
4304     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4305     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4306     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4307     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4308         OMPBuilder.getOrCreateRuntimeFunction(
4309             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4310         {Loc, Tid, NewTask});
4311     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4312                                       Evt->getExprLoc());
4313     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4314   }
4315   // Process affinity clauses.
4316   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4317     // Process list of affinity data.
4318     ASTContext &C = CGM.getContext();
4319     Address AffinitiesArray = Address::invalid();
4320     // Calculate number of elements to form the array of affinity data.
4321     llvm::Value *NumOfElements = nullptr;
4322     unsigned NumAffinities = 0;
4323     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4324       if (const Expr *Modifier = C->getModifier()) {
4325         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4326         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4327           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4328           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4329           NumOfElements =
4330               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4331         }
4332       } else {
4333         NumAffinities += C->varlist_size();
4334       }
4335     }
4336     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4337     // Fields ids in kmp_task_affinity_info record.
4338     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4339 
4340     QualType KmpTaskAffinityInfoArrayTy;
4341     if (NumOfElements) {
4342       NumOfElements = CGF.Builder.CreateNUWAdd(
4343           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4344       OpaqueValueExpr OVE(
4345           Loc,
4346           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4347           VK_RValue);
4348       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4349                                                     RValue::get(NumOfElements));
4350       KmpTaskAffinityInfoArrayTy =
4351           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4352                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4353       // Properly emit variable-sized array.
4354       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4355                                            ImplicitParamDecl::Other);
4356       CGF.EmitVarDecl(*PD);
4357       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4358       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4359                                                 /*isSigned=*/false);
4360     } else {
4361       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4362           KmpTaskAffinityInfoTy,
4363           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4364           ArrayType::Normal, /*IndexTypeQuals=*/0);
4365       AffinitiesArray =
4366           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4367       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4368       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4369                                              /*isSigned=*/false);
4370     }
4371 
4372     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4373     // Fill array by elements without iterators.
4374     unsigned Pos = 0;
4375     bool HasIterator = false;
4376     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4377       if (C->getModifier()) {
4378         HasIterator = true;
4379         continue;
4380       }
4381       for (const Expr *E : C->varlists()) {
4382         llvm::Value *Addr;
4383         llvm::Value *Size;
4384         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4385         LValue Base =
4386             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4387                                KmpTaskAffinityInfoTy);
4388         // affs[i].base_addr = &<Affinities[i].second>;
4389         LValue BaseAddrLVal = CGF.EmitLValueForField(
4390             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4391         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4392                               BaseAddrLVal);
4393         // affs[i].len = sizeof(<Affinities[i].second>);
4394         LValue LenLVal = CGF.EmitLValueForField(
4395             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4396         CGF.EmitStoreOfScalar(Size, LenLVal);
4397         ++Pos;
4398       }
4399     }
4400     LValue PosLVal;
4401     if (HasIterator) {
4402       PosLVal = CGF.MakeAddrLValue(
4403           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4404           C.getSizeType());
4405       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4406     }
4407     // Process elements with iterators.
4408     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4409       const Expr *Modifier = C->getModifier();
4410       if (!Modifier)
4411         continue;
4412       OMPIteratorGeneratorScope IteratorScope(
4413           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4414       for (const Expr *E : C->varlists()) {
4415         llvm::Value *Addr;
4416         llvm::Value *Size;
4417         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4418         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4419         LValue Base = CGF.MakeAddrLValue(
4420             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4421                     AffinitiesArray.getAlignment()),
4422             KmpTaskAffinityInfoTy);
4423         // affs[i].base_addr = &<Affinities[i].second>;
4424         LValue BaseAddrLVal = CGF.EmitLValueForField(
4425             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4426         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4427                               BaseAddrLVal);
4428         // affs[i].len = sizeof(<Affinities[i].second>);
4429         LValue LenLVal = CGF.EmitLValueForField(
4430             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4431         CGF.EmitStoreOfScalar(Size, LenLVal);
4432         Idx = CGF.Builder.CreateNUWAdd(
4433             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4434         CGF.EmitStoreOfScalar(Idx, PosLVal);
4435       }
4436     }
4437     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4438     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4439     // naffins, kmp_task_affinity_info_t *affin_list);
4440     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4441     llvm::Value *GTid = getThreadID(CGF, Loc);
4442     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4443         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4444     // FIXME: Emit the function and ignore its result for now unless the
4445     // runtime function is properly implemented.
4446     (void)CGF.EmitRuntimeCall(
4447         OMPBuilder.getOrCreateRuntimeFunction(
4448             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4449         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4450   }
4451   llvm::Value *NewTaskNewTaskTTy =
4452       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4453           NewTask, KmpTaskTWithPrivatesPtrTy);
4454   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4455                                                KmpTaskTWithPrivatesQTy);
4456   LValue TDBase =
4457       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4458   // Fill the data in the resulting kmp_task_t record.
4459   // Copy shareds if there are any.
4460   Address KmpTaskSharedsPtr = Address::invalid();
4461   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4462     KmpTaskSharedsPtr =
4463         Address(CGF.EmitLoadOfScalar(
4464                     CGF.EmitLValueForField(
4465                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4466                                            KmpTaskTShareds)),
4467                     Loc),
4468                 CGM.getNaturalTypeAlignment(SharedsTy));
4469     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4470     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4471     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4472   }
4473   // Emit initial values for private copies (if any).
4474   TaskResultTy Result;
4475   if (!Privates.empty()) {
4476     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4477                      SharedsTy, SharedsPtrTy, Data, Privates,
4478                      /*ForDup=*/false);
4479     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4480         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4481       Result.TaskDupFn = emitTaskDupFunction(
4482           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4483           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4484           /*WithLastIter=*/!Data.LastprivateVars.empty());
4485     }
4486   }
4487   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4488   enum { Priority = 0, Destructors = 1 };
4489   // Provide pointer to function with destructors for privates.
4490   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4491   const RecordDecl *KmpCmplrdataUD =
4492       (*FI)->getType()->getAsUnionType()->getDecl();
4493   if (NeedsCleanup) {
4494     llvm::Value *DestructorFn = emitDestructorsFunction(
4495         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4496         KmpTaskTWithPrivatesQTy);
4497     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4498     LValue DestructorsLV = CGF.EmitLValueForField(
4499         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4500     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4501                               DestructorFn, KmpRoutineEntryPtrTy),
4502                           DestructorsLV);
4503   }
4504   // Set priority.
4505   if (Data.Priority.getInt()) {
4506     LValue Data2LV = CGF.EmitLValueForField(
4507         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4508     LValue PriorityLV = CGF.EmitLValueForField(
4509         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4510     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4511   }
4512   Result.NewTask = NewTask;
4513   Result.TaskEntry = TaskEntry;
4514   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4515   Result.TDBase = TDBase;
4516   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4517   return Result;
4518 }
4519 
4520 namespace {
4521 /// Dependence kind for RTL.
4522 enum RTLDependenceKindTy {
4523   DepIn = 0x01,
4524   DepInOut = 0x3,
4525   DepMutexInOutSet = 0x4
4526 };
4527 /// Fields ids in kmp_depend_info record.
4528 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4529 } // namespace
4530 
4531 /// Translates internal dependency kind into the runtime kind.
4532 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4533   RTLDependenceKindTy DepKind;
4534   switch (K) {
4535   case OMPC_DEPEND_in:
4536     DepKind = DepIn;
4537     break;
4538   // Out and InOut dependencies must use the same code.
4539   case OMPC_DEPEND_out:
4540   case OMPC_DEPEND_inout:
4541     DepKind = DepInOut;
4542     break;
4543   case OMPC_DEPEND_mutexinoutset:
4544     DepKind = DepMutexInOutSet;
4545     break;
4546   case OMPC_DEPEND_source:
4547   case OMPC_DEPEND_sink:
4548   case OMPC_DEPEND_depobj:
4549   case OMPC_DEPEND_unknown:
4550     llvm_unreachable("Unknown task dependence type");
4551   }
4552   return DepKind;
4553 }
4554 
4555 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4556 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4557                            QualType &FlagsTy) {
4558   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4559   if (KmpDependInfoTy.isNull()) {
4560     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4561     KmpDependInfoRD->startDefinition();
4562     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4563     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4564     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4565     KmpDependInfoRD->completeDefinition();
4566     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4567   }
4568 }
4569 
4570 std::pair<llvm::Value *, LValue>
4571 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4572                                    SourceLocation Loc) {
4573   ASTContext &C = CGM.getContext();
4574   QualType FlagsTy;
4575   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4576   RecordDecl *KmpDependInfoRD =
4577       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4578   LValue Base = CGF.EmitLoadOfPointerLValue(
4579       DepobjLVal.getAddress(CGF),
4580       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4581   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4582   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4583           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4584   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4585                             Base.getTBAAInfo());
4586   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4587       Addr.getPointer(),
4588       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4589   LValue NumDepsBase = CGF.MakeAddrLValue(
4590       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4591       Base.getBaseInfo(), Base.getTBAAInfo());
4592   // NumDeps = deps[i].base_addr;
4593   LValue BaseAddrLVal = CGF.EmitLValueForField(
4594       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4595   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4596   return std::make_pair(NumDeps, Base);
4597 }
4598 
4599 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4600                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4601                            const OMPTaskDataTy::DependData &Data,
4602                            Address DependenciesArray) {
4603   CodeGenModule &CGM = CGF.CGM;
4604   ASTContext &C = CGM.getContext();
4605   QualType FlagsTy;
4606   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4607   RecordDecl *KmpDependInfoRD =
4608       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4609   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4610 
4611   OMPIteratorGeneratorScope IteratorScope(
4612       CGF, cast_or_null<OMPIteratorExpr>(
4613                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4614                                  : nullptr));
4615   for (const Expr *E : Data.DepExprs) {
4616     llvm::Value *Addr;
4617     llvm::Value *Size;
4618     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4619     LValue Base;
4620     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4621       Base = CGF.MakeAddrLValue(
4622           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4623     } else {
4624       LValue &PosLVal = *Pos.get<LValue *>();
4625       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4626       Base = CGF.MakeAddrLValue(
4627           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4628                   DependenciesArray.getAlignment()),
4629           KmpDependInfoTy);
4630     }
4631     // deps[i].base_addr = &<Dependencies[i].second>;
4632     LValue BaseAddrLVal = CGF.EmitLValueForField(
4633         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4634     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4635                           BaseAddrLVal);
4636     // deps[i].len = sizeof(<Dependencies[i].second>);
4637     LValue LenLVal = CGF.EmitLValueForField(
4638         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4639     CGF.EmitStoreOfScalar(Size, LenLVal);
4640     // deps[i].flags = <Dependencies[i].first>;
4641     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4642     LValue FlagsLVal = CGF.EmitLValueForField(
4643         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4644     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4645                           FlagsLVal);
4646     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4647       ++(*P);
4648     } else {
4649       LValue &PosLVal = *Pos.get<LValue *>();
4650       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4651       Idx = CGF.Builder.CreateNUWAdd(Idx,
4652                                      llvm::ConstantInt::get(Idx->getType(), 1));
4653       CGF.EmitStoreOfScalar(Idx, PosLVal);
4654     }
4655   }
4656 }
4657 
4658 static SmallVector<llvm::Value *, 4>
4659 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4660                         const OMPTaskDataTy::DependData &Data) {
4661   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4662          "Expected depobj dependecy kind.");
4663   SmallVector<llvm::Value *, 4> Sizes;
4664   SmallVector<LValue, 4> SizeLVals;
4665   ASTContext &C = CGF.getContext();
4666   QualType FlagsTy;
4667   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4668   RecordDecl *KmpDependInfoRD =
4669       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4670   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4671   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4672   {
4673     OMPIteratorGeneratorScope IteratorScope(
4674         CGF, cast_or_null<OMPIteratorExpr>(
4675                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4676                                    : nullptr));
4677     for (const Expr *E : Data.DepExprs) {
4678       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4679       LValue Base = CGF.EmitLoadOfPointerLValue(
4680           DepobjLVal.getAddress(CGF),
4681           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4682       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4683           Base.getAddress(CGF), KmpDependInfoPtrT);
4684       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4685                                 Base.getTBAAInfo());
4686       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4687           Addr.getPointer(),
4688           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4689       LValue NumDepsBase = CGF.MakeAddrLValue(
4690           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4691           Base.getBaseInfo(), Base.getTBAAInfo());
4692       // NumDeps = deps[i].base_addr;
4693       LValue BaseAddrLVal = CGF.EmitLValueForField(
4694           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4695       llvm::Value *NumDeps =
4696           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4697       LValue NumLVal = CGF.MakeAddrLValue(
4698           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4699           C.getUIntPtrType());
4700       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4701                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4702       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4703       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4704       CGF.EmitStoreOfScalar(Add, NumLVal);
4705       SizeLVals.push_back(NumLVal);
4706     }
4707   }
4708   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4709     llvm::Value *Size =
4710         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4711     Sizes.push_back(Size);
4712   }
4713   return Sizes;
4714 }
4715 
4716 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4717                                LValue PosLVal,
4718                                const OMPTaskDataTy::DependData &Data,
4719                                Address DependenciesArray) {
4720   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4721          "Expected depobj dependecy kind.");
4722   ASTContext &C = CGF.getContext();
4723   QualType FlagsTy;
4724   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4725   RecordDecl *KmpDependInfoRD =
4726       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4727   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4728   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4729   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4730   {
4731     OMPIteratorGeneratorScope IteratorScope(
4732         CGF, cast_or_null<OMPIteratorExpr>(
4733                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4734                                    : nullptr));
4735     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4736       const Expr *E = Data.DepExprs[I];
4737       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4738       LValue Base = CGF.EmitLoadOfPointerLValue(
4739           DepobjLVal.getAddress(CGF),
4740           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4741       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4742           Base.getAddress(CGF), KmpDependInfoPtrT);
4743       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4744                                 Base.getTBAAInfo());
4745 
4746       // Get number of elements in a single depobj.
4747       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4748           Addr.getPointer(),
4749           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4750       LValue NumDepsBase = CGF.MakeAddrLValue(
4751           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4752           Base.getBaseInfo(), Base.getTBAAInfo());
4753       // NumDeps = deps[i].base_addr;
4754       LValue BaseAddrLVal = CGF.EmitLValueForField(
4755           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4756       llvm::Value *NumDeps =
4757           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4758 
4759       // memcopy dependency data.
4760       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4761           ElSize,
4762           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4763       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4764       Address DepAddr =
4765           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4766                   DependenciesArray.getAlignment());
4767       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4768 
4769       // Increase pos.
4770       // pos += size;
4771       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4772       CGF.EmitStoreOfScalar(Add, PosLVal);
4773     }
4774   }
4775 }
4776 
4777 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4778     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4779     SourceLocation Loc) {
4780   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4781         return D.DepExprs.empty();
4782       }))
4783     return std::make_pair(nullptr, Address::invalid());
4784   // Process list of dependencies.
4785   ASTContext &C = CGM.getContext();
4786   Address DependenciesArray = Address::invalid();
4787   llvm::Value *NumOfElements = nullptr;
4788   unsigned NumDependencies = std::accumulate(
4789       Dependencies.begin(), Dependencies.end(), 0,
4790       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4791         return D.DepKind == OMPC_DEPEND_depobj
4792                    ? V
4793                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4794       });
4795   QualType FlagsTy;
4796   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4797   bool HasDepobjDeps = false;
4798   bool HasRegularWithIterators = false;
4799   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4800   llvm::Value *NumOfRegularWithIterators =
4801       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4802   // Calculate number of depobj dependecies and regular deps with the iterators.
4803   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4804     if (D.DepKind == OMPC_DEPEND_depobj) {
4805       SmallVector<llvm::Value *, 4> Sizes =
4806           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4807       for (llvm::Value *Size : Sizes) {
4808         NumOfDepobjElements =
4809             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4810       }
4811       HasDepobjDeps = true;
4812       continue;
4813     }
4814     // Include number of iterations, if any.
4815     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4816       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4817         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4818         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4819         NumOfRegularWithIterators =
4820             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4821       }
4822       HasRegularWithIterators = true;
4823       continue;
4824     }
4825   }
4826 
4827   QualType KmpDependInfoArrayTy;
4828   if (HasDepobjDeps || HasRegularWithIterators) {
4829     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4830                                            /*isSigned=*/false);
4831     if (HasDepobjDeps) {
4832       NumOfElements =
4833           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4834     }
4835     if (HasRegularWithIterators) {
4836       NumOfElements =
4837           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4838     }
4839     OpaqueValueExpr OVE(Loc,
4840                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4841                         VK_RValue);
4842     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4843                                                   RValue::get(NumOfElements));
4844     KmpDependInfoArrayTy =
4845         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4846                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4847     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4848     // Properly emit variable-sized array.
4849     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4850                                          ImplicitParamDecl::Other);
4851     CGF.EmitVarDecl(*PD);
4852     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4853     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4854                                               /*isSigned=*/false);
4855   } else {
4856     KmpDependInfoArrayTy = C.getConstantArrayType(
4857         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4858         ArrayType::Normal, /*IndexTypeQuals=*/0);
4859     DependenciesArray =
4860         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4861     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4862     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4863                                            /*isSigned=*/false);
4864   }
4865   unsigned Pos = 0;
4866   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4867     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4868         Dependencies[I].IteratorExpr)
4869       continue;
4870     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4871                    DependenciesArray);
4872   }
4873   // Copy regular dependecies with iterators.
4874   LValue PosLVal = CGF.MakeAddrLValue(
4875       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4876   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4877   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4878     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4879         !Dependencies[I].IteratorExpr)
4880       continue;
4881     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4882                    DependenciesArray);
4883   }
4884   // Copy final depobj arrays without iterators.
4885   if (HasDepobjDeps) {
4886     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4887       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4888         continue;
4889       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4890                          DependenciesArray);
4891     }
4892   }
4893   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4894       DependenciesArray, CGF.VoidPtrTy);
4895   return std::make_pair(NumOfElements, DependenciesArray);
4896 }
4897 
4898 Address CGOpenMPRuntime::emitDepobjDependClause(
4899     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4900     SourceLocation Loc) {
4901   if (Dependencies.DepExprs.empty())
4902     return Address::invalid();
4903   // Process list of dependencies.
4904   ASTContext &C = CGM.getContext();
4905   Address DependenciesArray = Address::invalid();
4906   unsigned NumDependencies = Dependencies.DepExprs.size();
4907   QualType FlagsTy;
4908   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4909   RecordDecl *KmpDependInfoRD =
4910       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4911 
4912   llvm::Value *Size;
4913   // Define type kmp_depend_info[<Dependencies.size()>];
4914   // For depobj reserve one extra element to store the number of elements.
4915   // It is required to handle depobj(x) update(in) construct.
4916   // kmp_depend_info[<Dependencies.size()>] deps;
4917   llvm::Value *NumDepsVal;
4918   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4919   if (const auto *IE =
4920           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4921     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4922     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4923       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4924       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4925       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4926     }
4927     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4928                                     NumDepsVal);
4929     CharUnits SizeInBytes =
4930         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4931     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4932     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4933     NumDepsVal =
4934         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4935   } else {
4936     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4937         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4938         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4939     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4940     Size = CGM.getSize(Sz.alignTo(Align));
4941     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4942   }
4943   // Need to allocate on the dynamic memory.
4944   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4945   // Use default allocator.
4946   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4947   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4948 
4949   llvm::Value *Addr =
4950       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4951                               CGM.getModule(), OMPRTL___kmpc_alloc),
4952                           Args, ".dep.arr.addr");
4953   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4954       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4955   DependenciesArray = Address(Addr, Align);
4956   // Write number of elements in the first element of array for depobj.
4957   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4958   // deps[i].base_addr = NumDependencies;
4959   LValue BaseAddrLVal = CGF.EmitLValueForField(
4960       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4961   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4962   llvm::PointerUnion<unsigned *, LValue *> Pos;
4963   unsigned Idx = 1;
4964   LValue PosLVal;
4965   if (Dependencies.IteratorExpr) {
4966     PosLVal = CGF.MakeAddrLValue(
4967         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4968         C.getSizeType());
4969     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4970                           /*IsInit=*/true);
4971     Pos = &PosLVal;
4972   } else {
4973     Pos = &Idx;
4974   }
4975   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4976   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4977       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4978   return DependenciesArray;
4979 }
4980 
4981 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4982                                         SourceLocation Loc) {
4983   ASTContext &C = CGM.getContext();
4984   QualType FlagsTy;
4985   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4986   LValue Base = CGF.EmitLoadOfPointerLValue(
4987       DepobjLVal.getAddress(CGF),
4988       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4989   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4990   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4991       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4992   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4993       Addr.getPointer(),
4994       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4995   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4996                                                                CGF.VoidPtrTy);
4997   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4998   // Use default allocator.
4999   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5000   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5001 
5002   // _kmpc_free(gtid, addr, nullptr);
5003   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5004                                 CGM.getModule(), OMPRTL___kmpc_free),
5005                             Args);
5006 }
5007 
5008 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5009                                        OpenMPDependClauseKind NewDepKind,
5010                                        SourceLocation Loc) {
5011   ASTContext &C = CGM.getContext();
5012   QualType FlagsTy;
5013   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5014   RecordDecl *KmpDependInfoRD =
5015       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5016   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5017   llvm::Value *NumDeps;
5018   LValue Base;
5019   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5020 
5021   Address Begin = Base.getAddress(CGF);
5022   // Cast from pointer to array type to pointer to single element.
5023   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5024   // The basic structure here is a while-do loop.
5025   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5026   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5027   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5028   CGF.EmitBlock(BodyBB);
5029   llvm::PHINode *ElementPHI =
5030       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5031   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5032   Begin = Address(ElementPHI, Begin.getAlignment());
5033   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5034                             Base.getTBAAInfo());
5035   // deps[i].flags = NewDepKind;
5036   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5037   LValue FlagsLVal = CGF.EmitLValueForField(
5038       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5039   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5040                         FlagsLVal);
5041 
5042   // Shift the address forward by one element.
5043   Address ElementNext =
5044       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5045   ElementPHI->addIncoming(ElementNext.getPointer(),
5046                           CGF.Builder.GetInsertBlock());
5047   llvm::Value *IsEmpty =
5048       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5049   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5050   // Done.
5051   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5052 }
5053 
5054 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5055                                    const OMPExecutableDirective &D,
5056                                    llvm::Function *TaskFunction,
5057                                    QualType SharedsTy, Address Shareds,
5058                                    const Expr *IfCond,
5059                                    const OMPTaskDataTy &Data) {
5060   if (!CGF.HaveInsertPoint())
5061     return;
5062 
5063   TaskResultTy Result =
5064       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5065   llvm::Value *NewTask = Result.NewTask;
5066   llvm::Function *TaskEntry = Result.TaskEntry;
5067   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5068   LValue TDBase = Result.TDBase;
5069   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5070   // Process list of dependences.
5071   Address DependenciesArray = Address::invalid();
5072   llvm::Value *NumOfElements;
5073   std::tie(NumOfElements, DependenciesArray) =
5074       emitDependClause(CGF, Data.Dependences, Loc);
5075 
5076   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5077   // libcall.
5078   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5079   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5080   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5081   // list is not empty
5082   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5083   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5084   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5085   llvm::Value *DepTaskArgs[7];
5086   if (!Data.Dependences.empty()) {
5087     DepTaskArgs[0] = UpLoc;
5088     DepTaskArgs[1] = ThreadID;
5089     DepTaskArgs[2] = NewTask;
5090     DepTaskArgs[3] = NumOfElements;
5091     DepTaskArgs[4] = DependenciesArray.getPointer();
5092     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5093     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5094   }
5095   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5096                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5097     if (!Data.Tied) {
5098       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5099       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5100       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5101     }
5102     if (!Data.Dependences.empty()) {
5103       CGF.EmitRuntimeCall(
5104           OMPBuilder.getOrCreateRuntimeFunction(
5105               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5106           DepTaskArgs);
5107     } else {
5108       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5109                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5110                           TaskArgs);
5111     }
5112     // Check if parent region is untied and build return for untied task;
5113     if (auto *Region =
5114             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5115       Region->emitUntiedSwitch(CGF);
5116   };
5117 
5118   llvm::Value *DepWaitTaskArgs[6];
5119   if (!Data.Dependences.empty()) {
5120     DepWaitTaskArgs[0] = UpLoc;
5121     DepWaitTaskArgs[1] = ThreadID;
5122     DepWaitTaskArgs[2] = NumOfElements;
5123     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5124     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5125     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5126   }
5127   auto &M = CGM.getModule();
5128   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5129                         TaskEntry, &Data, &DepWaitTaskArgs,
5130                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5131     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5132     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5133     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5134     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5135     // is specified.
5136     if (!Data.Dependences.empty())
5137       CGF.EmitRuntimeCall(
5138           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5139           DepWaitTaskArgs);
5140     // Call proxy_task_entry(gtid, new_task);
5141     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5142                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5143       Action.Enter(CGF);
5144       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5145       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5146                                                           OutlinedFnArgs);
5147     };
5148 
5149     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5150     // kmp_task_t *new_task);
5151     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5152     // kmp_task_t *new_task);
5153     RegionCodeGenTy RCG(CodeGen);
5154     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5155                               M, OMPRTL___kmpc_omp_task_begin_if0),
5156                           TaskArgs,
5157                           OMPBuilder.getOrCreateRuntimeFunction(
5158                               M, OMPRTL___kmpc_omp_task_complete_if0),
5159                           TaskArgs);
5160     RCG.setAction(Action);
5161     RCG(CGF);
5162   };
5163 
5164   if (IfCond) {
5165     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5166   } else {
5167     RegionCodeGenTy ThenRCG(ThenCodeGen);
5168     ThenRCG(CGF);
5169   }
5170 }
5171 
5172 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5173                                        const OMPLoopDirective &D,
5174                                        llvm::Function *TaskFunction,
5175                                        QualType SharedsTy, Address Shareds,
5176                                        const Expr *IfCond,
5177                                        const OMPTaskDataTy &Data) {
5178   if (!CGF.HaveInsertPoint())
5179     return;
5180   TaskResultTy Result =
5181       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5182   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5183   // libcall.
5184   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5185   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5186   // sched, kmp_uint64 grainsize, void *task_dup);
5187   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5188   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5189   llvm::Value *IfVal;
5190   if (IfCond) {
5191     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5192                                       /*isSigned=*/true);
5193   } else {
5194     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5195   }
5196 
5197   LValue LBLVal = CGF.EmitLValueForField(
5198       Result.TDBase,
5199       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5200   const auto *LBVar =
5201       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5202   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5203                        LBLVal.getQuals(),
5204                        /*IsInitializer=*/true);
5205   LValue UBLVal = CGF.EmitLValueForField(
5206       Result.TDBase,
5207       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5208   const auto *UBVar =
5209       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5210   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5211                        UBLVal.getQuals(),
5212                        /*IsInitializer=*/true);
5213   LValue StLVal = CGF.EmitLValueForField(
5214       Result.TDBase,
5215       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5216   const auto *StVar =
5217       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5218   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5219                        StLVal.getQuals(),
5220                        /*IsInitializer=*/true);
5221   // Store reductions address.
5222   LValue RedLVal = CGF.EmitLValueForField(
5223       Result.TDBase,
5224       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5225   if (Data.Reductions) {
5226     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5227   } else {
5228     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5229                                CGF.getContext().VoidPtrTy);
5230   }
5231   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5232   llvm::Value *TaskArgs[] = {
5233       UpLoc,
5234       ThreadID,
5235       Result.NewTask,
5236       IfVal,
5237       LBLVal.getPointer(CGF),
5238       UBLVal.getPointer(CGF),
5239       CGF.EmitLoadOfScalar(StLVal, Loc),
5240       llvm::ConstantInt::getSigned(
5241           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5242       llvm::ConstantInt::getSigned(
5243           CGF.IntTy, Data.Schedule.getPointer()
5244                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5245                          : NoSchedule),
5246       Data.Schedule.getPointer()
5247           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5248                                       /*isSigned=*/false)
5249           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5250       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5251                              Result.TaskDupFn, CGF.VoidPtrTy)
5252                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5253   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5254                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5255                       TaskArgs);
5256 }
5257 
5258 /// Emit reduction operation for each element of array (required for
5259 /// array sections) LHS op = RHS.
5260 /// \param Type Type of array.
5261 /// \param LHSVar Variable on the left side of the reduction operation
5262 /// (references element of array in original variable).
5263 /// \param RHSVar Variable on the right side of the reduction operation
5264 /// (references element of array in original variable).
5265 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5266 /// RHSVar.
5267 static void EmitOMPAggregateReduction(
5268     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5269     const VarDecl *RHSVar,
5270     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5271                                   const Expr *, const Expr *)> &RedOpGen,
5272     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5273     const Expr *UpExpr = nullptr) {
5274   // Perform element-by-element initialization.
5275   QualType ElementTy;
5276   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5277   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5278 
5279   // Drill down to the base element type on both arrays.
5280   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5281   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5282 
5283   llvm::Value *RHSBegin = RHSAddr.getPointer();
5284   llvm::Value *LHSBegin = LHSAddr.getPointer();
5285   // Cast from pointer to array type to pointer to single element.
5286   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5287   // The basic structure here is a while-do loop.
5288   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5289   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5290   llvm::Value *IsEmpty =
5291       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5292   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5293 
5294   // Enter the loop body, making that address the current address.
5295   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5296   CGF.EmitBlock(BodyBB);
5297 
5298   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5299 
5300   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5301       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5302   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5303   Address RHSElementCurrent =
5304       Address(RHSElementPHI,
5305               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5306 
5307   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5308       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5309   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5310   Address LHSElementCurrent =
5311       Address(LHSElementPHI,
5312               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5313 
5314   // Emit copy.
5315   CodeGenFunction::OMPPrivateScope Scope(CGF);
5316   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5317   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5318   Scope.Privatize();
5319   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5320   Scope.ForceCleanup();
5321 
5322   // Shift the address forward by one element.
5323   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5324       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5325   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5326       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5327   // Check whether we've reached the end.
5328   llvm::Value *Done =
5329       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5330   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5331   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5332   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5333 
5334   // Done.
5335   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5336 }
5337 
5338 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5339 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5340 /// UDR combiner function.
5341 static void emitReductionCombiner(CodeGenFunction &CGF,
5342                                   const Expr *ReductionOp) {
5343   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5344     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5345       if (const auto *DRE =
5346               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5347         if (const auto *DRD =
5348                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5349           std::pair<llvm::Function *, llvm::Function *> Reduction =
5350               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5351           RValue Func = RValue::get(Reduction.first);
5352           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5353           CGF.EmitIgnoredExpr(ReductionOp);
5354           return;
5355         }
5356   CGF.EmitIgnoredExpr(ReductionOp);
5357 }
5358 
5359 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5360     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5361     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5362     ArrayRef<const Expr *> ReductionOps) {
5363   ASTContext &C = CGM.getContext();
5364 
5365   // void reduction_func(void *LHSArg, void *RHSArg);
5366   FunctionArgList Args;
5367   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5368                            ImplicitParamDecl::Other);
5369   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5370                            ImplicitParamDecl::Other);
5371   Args.push_back(&LHSArg);
5372   Args.push_back(&RHSArg);
5373   const auto &CGFI =
5374       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5375   std::string Name = getName({"omp", "reduction", "reduction_func"});
5376   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5377                                     llvm::GlobalValue::InternalLinkage, Name,
5378                                     &CGM.getModule());
5379   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5380   Fn->setDoesNotRecurse();
5381   CodeGenFunction CGF(CGM);
5382   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5383 
5384   // Dst = (void*[n])(LHSArg);
5385   // Src = (void*[n])(RHSArg);
5386   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5387       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5388       ArgsType), CGF.getPointerAlign());
5389   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5390       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5391       ArgsType), CGF.getPointerAlign());
5392 
5393   //  ...
5394   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5395   //  ...
5396   CodeGenFunction::OMPPrivateScope Scope(CGF);
5397   auto IPriv = Privates.begin();
5398   unsigned Idx = 0;
5399   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5400     const auto *RHSVar =
5401         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5402     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5403       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5404     });
5405     const auto *LHSVar =
5406         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5407     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5408       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5409     });
5410     QualType PrivTy = (*IPriv)->getType();
5411     if (PrivTy->isVariablyModifiedType()) {
5412       // Get array size and emit VLA type.
5413       ++Idx;
5414       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5415       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5416       const VariableArrayType *VLA =
5417           CGF.getContext().getAsVariableArrayType(PrivTy);
5418       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5419       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5420           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5421       CGF.EmitVariablyModifiedType(PrivTy);
5422     }
5423   }
5424   Scope.Privatize();
5425   IPriv = Privates.begin();
5426   auto ILHS = LHSExprs.begin();
5427   auto IRHS = RHSExprs.begin();
5428   for (const Expr *E : ReductionOps) {
5429     if ((*IPriv)->getType()->isArrayType()) {
5430       // Emit reduction for array section.
5431       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5432       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5433       EmitOMPAggregateReduction(
5434           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5435           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5436             emitReductionCombiner(CGF, E);
5437           });
5438     } else {
5439       // Emit reduction for array subscript or single variable.
5440       emitReductionCombiner(CGF, E);
5441     }
5442     ++IPriv;
5443     ++ILHS;
5444     ++IRHS;
5445   }
5446   Scope.ForceCleanup();
5447   CGF.FinishFunction();
5448   return Fn;
5449 }
5450 
5451 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5452                                                   const Expr *ReductionOp,
5453                                                   const Expr *PrivateRef,
5454                                                   const DeclRefExpr *LHS,
5455                                                   const DeclRefExpr *RHS) {
5456   if (PrivateRef->getType()->isArrayType()) {
5457     // Emit reduction for array section.
5458     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5459     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5460     EmitOMPAggregateReduction(
5461         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5462         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5463           emitReductionCombiner(CGF, ReductionOp);
5464         });
5465   } else {
5466     // Emit reduction for array subscript or single variable.
5467     emitReductionCombiner(CGF, ReductionOp);
5468   }
5469 }
5470 
5471 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5472                                     ArrayRef<const Expr *> Privates,
5473                                     ArrayRef<const Expr *> LHSExprs,
5474                                     ArrayRef<const Expr *> RHSExprs,
5475                                     ArrayRef<const Expr *> ReductionOps,
5476                                     ReductionOptionsTy Options) {
5477   if (!CGF.HaveInsertPoint())
5478     return;
5479 
5480   bool WithNowait = Options.WithNowait;
5481   bool SimpleReduction = Options.SimpleReduction;
5482 
5483   // Next code should be emitted for reduction:
5484   //
5485   // static kmp_critical_name lock = { 0 };
5486   //
5487   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5488   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5489   //  ...
5490   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5491   //  *(Type<n>-1*)rhs[<n>-1]);
5492   // }
5493   //
5494   // ...
5495   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5496   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5497   // RedList, reduce_func, &<lock>)) {
5498   // case 1:
5499   //  ...
5500   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5501   //  ...
5502   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5503   // break;
5504   // case 2:
5505   //  ...
5506   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5507   //  ...
5508   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5509   // break;
5510   // default:;
5511   // }
5512   //
5513   // if SimpleReduction is true, only the next code is generated:
5514   //  ...
5515   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5516   //  ...
5517 
5518   ASTContext &C = CGM.getContext();
5519 
5520   if (SimpleReduction) {
5521     CodeGenFunction::RunCleanupsScope Scope(CGF);
5522     auto IPriv = Privates.begin();
5523     auto ILHS = LHSExprs.begin();
5524     auto IRHS = RHSExprs.begin();
5525     for (const Expr *E : ReductionOps) {
5526       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5527                                   cast<DeclRefExpr>(*IRHS));
5528       ++IPriv;
5529       ++ILHS;
5530       ++IRHS;
5531     }
5532     return;
5533   }
5534 
5535   // 1. Build a list of reduction variables.
5536   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5537   auto Size = RHSExprs.size();
5538   for (const Expr *E : Privates) {
5539     if (E->getType()->isVariablyModifiedType())
5540       // Reserve place for array size.
5541       ++Size;
5542   }
5543   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5544   QualType ReductionArrayTy =
5545       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5546                              /*IndexTypeQuals=*/0);
5547   Address ReductionList =
5548       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5549   auto IPriv = Privates.begin();
5550   unsigned Idx = 0;
5551   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5552     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5553     CGF.Builder.CreateStore(
5554         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5555             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5556         Elem);
5557     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5558       // Store array size.
5559       ++Idx;
5560       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5561       llvm::Value *Size = CGF.Builder.CreateIntCast(
5562           CGF.getVLASize(
5563                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5564               .NumElts,
5565           CGF.SizeTy, /*isSigned=*/false);
5566       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5567                               Elem);
5568     }
5569   }
5570 
5571   // 2. Emit reduce_func().
5572   llvm::Function *ReductionFn = emitReductionFunction(
5573       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5574       LHSExprs, RHSExprs, ReductionOps);
5575 
5576   // 3. Create static kmp_critical_name lock = { 0 };
5577   std::string Name = getName({"reduction"});
5578   llvm::Value *Lock = getCriticalRegionLock(Name);
5579 
5580   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5581   // RedList, reduce_func, &<lock>);
5582   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5583   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5584   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5585   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5586       ReductionList.getPointer(), CGF.VoidPtrTy);
5587   llvm::Value *Args[] = {
5588       IdentTLoc,                             // ident_t *<loc>
5589       ThreadId,                              // i32 <gtid>
5590       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5591       ReductionArrayTySize,                  // size_type sizeof(RedList)
5592       RL,                                    // void *RedList
5593       ReductionFn, // void (*) (void *, void *) <reduce_func>
5594       Lock         // kmp_critical_name *&<lock>
5595   };
5596   llvm::Value *Res = CGF.EmitRuntimeCall(
5597       OMPBuilder.getOrCreateRuntimeFunction(
5598           CGM.getModule(),
5599           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5600       Args);
5601 
5602   // 5. Build switch(res)
5603   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5604   llvm::SwitchInst *SwInst =
5605       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5606 
5607   // 6. Build case 1:
5608   //  ...
5609   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5610   //  ...
5611   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5612   // break;
5613   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5614   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5615   CGF.EmitBlock(Case1BB);
5616 
5617   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5618   llvm::Value *EndArgs[] = {
5619       IdentTLoc, // ident_t *<loc>
5620       ThreadId,  // i32 <gtid>
5621       Lock       // kmp_critical_name *&<lock>
5622   };
5623   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5624                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5625     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5626     auto IPriv = Privates.begin();
5627     auto ILHS = LHSExprs.begin();
5628     auto IRHS = RHSExprs.begin();
5629     for (const Expr *E : ReductionOps) {
5630       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5631                                      cast<DeclRefExpr>(*IRHS));
5632       ++IPriv;
5633       ++ILHS;
5634       ++IRHS;
5635     }
5636   };
5637   RegionCodeGenTy RCG(CodeGen);
5638   CommonActionTy Action(
5639       nullptr, llvm::None,
5640       OMPBuilder.getOrCreateRuntimeFunction(
5641           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5642                                       : OMPRTL___kmpc_end_reduce),
5643       EndArgs);
5644   RCG.setAction(Action);
5645   RCG(CGF);
5646 
5647   CGF.EmitBranch(DefaultBB);
5648 
5649   // 7. Build case 2:
5650   //  ...
5651   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5652   //  ...
5653   // break;
5654   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5655   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5656   CGF.EmitBlock(Case2BB);
5657 
5658   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5659                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5660     auto ILHS = LHSExprs.begin();
5661     auto IRHS = RHSExprs.begin();
5662     auto IPriv = Privates.begin();
5663     for (const Expr *E : ReductionOps) {
5664       const Expr *XExpr = nullptr;
5665       const Expr *EExpr = nullptr;
5666       const Expr *UpExpr = nullptr;
5667       BinaryOperatorKind BO = BO_Comma;
5668       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5669         if (BO->getOpcode() == BO_Assign) {
5670           XExpr = BO->getLHS();
5671           UpExpr = BO->getRHS();
5672         }
5673       }
5674       // Try to emit update expression as a simple atomic.
5675       const Expr *RHSExpr = UpExpr;
5676       if (RHSExpr) {
5677         // Analyze RHS part of the whole expression.
5678         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5679                 RHSExpr->IgnoreParenImpCasts())) {
5680           // If this is a conditional operator, analyze its condition for
5681           // min/max reduction operator.
5682           RHSExpr = ACO->getCond();
5683         }
5684         if (const auto *BORHS =
5685                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5686           EExpr = BORHS->getRHS();
5687           BO = BORHS->getOpcode();
5688         }
5689       }
5690       if (XExpr) {
5691         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5692         auto &&AtomicRedGen = [BO, VD,
5693                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5694                                     const Expr *EExpr, const Expr *UpExpr) {
5695           LValue X = CGF.EmitLValue(XExpr);
5696           RValue E;
5697           if (EExpr)
5698             E = CGF.EmitAnyExpr(EExpr);
5699           CGF.EmitOMPAtomicSimpleUpdateExpr(
5700               X, E, BO, /*IsXLHSInRHSPart=*/true,
5701               llvm::AtomicOrdering::Monotonic, Loc,
5702               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5703                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5704                 PrivateScope.addPrivate(
5705                     VD, [&CGF, VD, XRValue, Loc]() {
5706                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5707                       CGF.emitOMPSimpleStore(
5708                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5709                           VD->getType().getNonReferenceType(), Loc);
5710                       return LHSTemp;
5711                     });
5712                 (void)PrivateScope.Privatize();
5713                 return CGF.EmitAnyExpr(UpExpr);
5714               });
5715         };
5716         if ((*IPriv)->getType()->isArrayType()) {
5717           // Emit atomic reduction for array section.
5718           const auto *RHSVar =
5719               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5720           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5721                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5722         } else {
5723           // Emit atomic reduction for array subscript or single variable.
5724           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5725         }
5726       } else {
5727         // Emit as a critical region.
5728         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5729                                            const Expr *, const Expr *) {
5730           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5731           std::string Name = RT.getName({"atomic_reduction"});
5732           RT.emitCriticalRegion(
5733               CGF, Name,
5734               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5735                 Action.Enter(CGF);
5736                 emitReductionCombiner(CGF, E);
5737               },
5738               Loc);
5739         };
5740         if ((*IPriv)->getType()->isArrayType()) {
5741           const auto *LHSVar =
5742               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5743           const auto *RHSVar =
5744               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5745           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5746                                     CritRedGen);
5747         } else {
5748           CritRedGen(CGF, nullptr, nullptr, nullptr);
5749         }
5750       }
5751       ++ILHS;
5752       ++IRHS;
5753       ++IPriv;
5754     }
5755   };
5756   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5757   if (!WithNowait) {
5758     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5759     llvm::Value *EndArgs[] = {
5760         IdentTLoc, // ident_t *<loc>
5761         ThreadId,  // i32 <gtid>
5762         Lock       // kmp_critical_name *&<lock>
5763     };
5764     CommonActionTy Action(nullptr, llvm::None,
5765                           OMPBuilder.getOrCreateRuntimeFunction(
5766                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5767                           EndArgs);
5768     AtomicRCG.setAction(Action);
5769     AtomicRCG(CGF);
5770   } else {
5771     AtomicRCG(CGF);
5772   }
5773 
5774   CGF.EmitBranch(DefaultBB);
5775   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5776 }
5777 
5778 /// Generates unique name for artificial threadprivate variables.
5779 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5780 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5781                                       const Expr *Ref) {
5782   SmallString<256> Buffer;
5783   llvm::raw_svector_ostream Out(Buffer);
5784   const clang::DeclRefExpr *DE;
5785   const VarDecl *D = ::getBaseDecl(Ref, DE);
5786   if (!D)
5787     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5788   D = D->getCanonicalDecl();
5789   std::string Name = CGM.getOpenMPRuntime().getName(
5790       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5791   Out << Prefix << Name << "_"
5792       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5793   return std::string(Out.str());
5794 }
5795 
5796 /// Emits reduction initializer function:
5797 /// \code
5798 /// void @.red_init(void* %arg, void* %orig) {
5799 /// %0 = bitcast void* %arg to <type>*
5800 /// store <type> <init>, <type>* %0
5801 /// ret void
5802 /// }
5803 /// \endcode
5804 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5805                                            SourceLocation Loc,
5806                                            ReductionCodeGen &RCG, unsigned N) {
5807   ASTContext &C = CGM.getContext();
5808   QualType VoidPtrTy = C.VoidPtrTy;
5809   VoidPtrTy.addRestrict();
5810   FunctionArgList Args;
5811   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5812                           ImplicitParamDecl::Other);
5813   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5814                               ImplicitParamDecl::Other);
5815   Args.emplace_back(&Param);
5816   Args.emplace_back(&ParamOrig);
5817   const auto &FnInfo =
5818       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5819   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5820   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5821   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5822                                     Name, &CGM.getModule());
5823   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5824   Fn->setDoesNotRecurse();
5825   CodeGenFunction CGF(CGM);
5826   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5827   Address PrivateAddr = CGF.EmitLoadOfPointer(
5828       CGF.GetAddrOfLocalVar(&Param),
5829       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5830   llvm::Value *Size = nullptr;
5831   // If the size of the reduction item is non-constant, load it from global
5832   // threadprivate variable.
5833   if (RCG.getSizes(N).second) {
5834     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5835         CGF, CGM.getContext().getSizeType(),
5836         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5837     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5838                                 CGM.getContext().getSizeType(), Loc);
5839   }
5840   RCG.emitAggregateType(CGF, N, Size);
5841   LValue OrigLVal;
5842   // If initializer uses initializer from declare reduction construct, emit a
5843   // pointer to the address of the original reduction item (reuired by reduction
5844   // initializer)
5845   if (RCG.usesReductionInitializer(N)) {
5846     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5847     SharedAddr = CGF.EmitLoadOfPointer(
5848         SharedAddr,
5849         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5850     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5851   } else {
5852     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5853         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5854         CGM.getContext().VoidPtrTy);
5855   }
5856   // Emit the initializer:
5857   // %0 = bitcast void* %arg to <type>*
5858   // store <type> <init>, <type>* %0
5859   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5860                          [](CodeGenFunction &) { return false; });
5861   CGF.FinishFunction();
5862   return Fn;
5863 }
5864 
5865 /// Emits reduction combiner function:
5866 /// \code
5867 /// void @.red_comb(void* %arg0, void* %arg1) {
5868 /// %lhs = bitcast void* %arg0 to <type>*
5869 /// %rhs = bitcast void* %arg1 to <type>*
5870 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5871 /// store <type> %2, <type>* %lhs
5872 /// ret void
5873 /// }
5874 /// \endcode
5875 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5876                                            SourceLocation Loc,
5877                                            ReductionCodeGen &RCG, unsigned N,
5878                                            const Expr *ReductionOp,
5879                                            const Expr *LHS, const Expr *RHS,
5880                                            const Expr *PrivateRef) {
5881   ASTContext &C = CGM.getContext();
5882   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5883   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5884   FunctionArgList Args;
5885   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5886                                C.VoidPtrTy, ImplicitParamDecl::Other);
5887   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5888                             ImplicitParamDecl::Other);
5889   Args.emplace_back(&ParamInOut);
5890   Args.emplace_back(&ParamIn);
5891   const auto &FnInfo =
5892       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5893   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5894   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5895   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5896                                     Name, &CGM.getModule());
5897   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5898   Fn->setDoesNotRecurse();
5899   CodeGenFunction CGF(CGM);
5900   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5901   llvm::Value *Size = nullptr;
5902   // If the size of the reduction item is non-constant, load it from global
5903   // threadprivate variable.
5904   if (RCG.getSizes(N).second) {
5905     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5906         CGF, CGM.getContext().getSizeType(),
5907         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5908     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5909                                 CGM.getContext().getSizeType(), Loc);
5910   }
5911   RCG.emitAggregateType(CGF, N, Size);
5912   // Remap lhs and rhs variables to the addresses of the function arguments.
5913   // %lhs = bitcast void* %arg0 to <type>*
5914   // %rhs = bitcast void* %arg1 to <type>*
5915   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5916   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5917     // Pull out the pointer to the variable.
5918     Address PtrAddr = CGF.EmitLoadOfPointer(
5919         CGF.GetAddrOfLocalVar(&ParamInOut),
5920         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5921     return CGF.Builder.CreateElementBitCast(
5922         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5923   });
5924   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5925     // Pull out the pointer to the variable.
5926     Address PtrAddr = CGF.EmitLoadOfPointer(
5927         CGF.GetAddrOfLocalVar(&ParamIn),
5928         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5929     return CGF.Builder.CreateElementBitCast(
5930         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5931   });
5932   PrivateScope.Privatize();
5933   // Emit the combiner body:
5934   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5935   // store <type> %2, <type>* %lhs
5936   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5937       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5938       cast<DeclRefExpr>(RHS));
5939   CGF.FinishFunction();
5940   return Fn;
5941 }
5942 
5943 /// Emits reduction finalizer function:
5944 /// \code
5945 /// void @.red_fini(void* %arg) {
5946 /// %0 = bitcast void* %arg to <type>*
5947 /// <destroy>(<type>* %0)
5948 /// ret void
5949 /// }
5950 /// \endcode
5951 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5952                                            SourceLocation Loc,
5953                                            ReductionCodeGen &RCG, unsigned N) {
5954   if (!RCG.needCleanups(N))
5955     return nullptr;
5956   ASTContext &C = CGM.getContext();
5957   FunctionArgList Args;
5958   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5959                           ImplicitParamDecl::Other);
5960   Args.emplace_back(&Param);
5961   const auto &FnInfo =
5962       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5963   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5964   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5965   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5966                                     Name, &CGM.getModule());
5967   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5968   Fn->setDoesNotRecurse();
5969   CodeGenFunction CGF(CGM);
5970   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5971   Address PrivateAddr = CGF.EmitLoadOfPointer(
5972       CGF.GetAddrOfLocalVar(&Param),
5973       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5974   llvm::Value *Size = nullptr;
5975   // If the size of the reduction item is non-constant, load it from global
5976   // threadprivate variable.
5977   if (RCG.getSizes(N).second) {
5978     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5979         CGF, CGM.getContext().getSizeType(),
5980         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5981     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5982                                 CGM.getContext().getSizeType(), Loc);
5983   }
5984   RCG.emitAggregateType(CGF, N, Size);
5985   // Emit the finalizer body:
5986   // <destroy>(<type>* %0)
5987   RCG.emitCleanups(CGF, N, PrivateAddr);
5988   CGF.FinishFunction(Loc);
5989   return Fn;
5990 }
5991 
5992 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5993     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5994     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5995   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5996     return nullptr;
5997 
5998   // Build typedef struct:
5999   // kmp_taskred_input {
6000   //   void *reduce_shar; // shared reduction item
6001   //   void *reduce_orig; // original reduction item used for initialization
6002   //   size_t reduce_size; // size of data item
6003   //   void *reduce_init; // data initialization routine
6004   //   void *reduce_fini; // data finalization routine
6005   //   void *reduce_comb; // data combiner routine
6006   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6007   // } kmp_taskred_input_t;
6008   ASTContext &C = CGM.getContext();
6009   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6010   RD->startDefinition();
6011   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6012   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6013   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6014   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6015   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6016   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6017   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6018       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6019   RD->completeDefinition();
6020   QualType RDType = C.getRecordType(RD);
6021   unsigned Size = Data.ReductionVars.size();
6022   llvm::APInt ArraySize(/*numBits=*/64, Size);
6023   QualType ArrayRDType = C.getConstantArrayType(
6024       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6025   // kmp_task_red_input_t .rd_input.[Size];
6026   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6027   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6028                        Data.ReductionCopies, Data.ReductionOps);
6029   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6030     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6031     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6032                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6033     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6034         TaskRedInput.getPointer(), Idxs,
6035         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6036         ".rd_input.gep.");
6037     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6038     // ElemLVal.reduce_shar = &Shareds[Cnt];
6039     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6040     RCG.emitSharedOrigLValue(CGF, Cnt);
6041     llvm::Value *CastedShared =
6042         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6043     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6044     // ElemLVal.reduce_orig = &Origs[Cnt];
6045     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6046     llvm::Value *CastedOrig =
6047         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6048     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6049     RCG.emitAggregateType(CGF, Cnt);
6050     llvm::Value *SizeValInChars;
6051     llvm::Value *SizeVal;
6052     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6053     // We use delayed creation/initialization for VLAs and array sections. It is
6054     // required because runtime does not provide the way to pass the sizes of
6055     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6056     // threadprivate global variables are used to store these values and use
6057     // them in the functions.
6058     bool DelayedCreation = !!SizeVal;
6059     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6060                                                /*isSigned=*/false);
6061     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6062     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6063     // ElemLVal.reduce_init = init;
6064     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6065     llvm::Value *InitAddr =
6066         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6067     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6068     // ElemLVal.reduce_fini = fini;
6069     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6070     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6071     llvm::Value *FiniAddr = Fini
6072                                 ? CGF.EmitCastToVoidPtr(Fini)
6073                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6074     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6075     // ElemLVal.reduce_comb = comb;
6076     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6077     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6078         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6079         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6080     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6081     // ElemLVal.flags = 0;
6082     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6083     if (DelayedCreation) {
6084       CGF.EmitStoreOfScalar(
6085           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6086           FlagsLVal);
6087     } else
6088       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6089                                  FlagsLVal.getType());
6090   }
6091   if (Data.IsReductionWithTaskMod) {
6092     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6093     // is_ws, int num, void *data);
6094     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6095     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6096                                                   CGM.IntTy, /*isSigned=*/true);
6097     llvm::Value *Args[] = {
6098         IdentTLoc, GTid,
6099         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6100                                /*isSigned=*/true),
6101         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6102         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6103             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6104     return CGF.EmitRuntimeCall(
6105         OMPBuilder.getOrCreateRuntimeFunction(
6106             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6107         Args);
6108   }
6109   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6110   llvm::Value *Args[] = {
6111       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6112                                 /*isSigned=*/true),
6113       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6114       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6115                                                       CGM.VoidPtrTy)};
6116   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6117                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6118                              Args);
6119 }
6120 
6121 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6122                                             SourceLocation Loc,
6123                                             bool IsWorksharingReduction) {
6124   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6125   // is_ws, int num, void *data);
6126   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6127   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6128                                                 CGM.IntTy, /*isSigned=*/true);
6129   llvm::Value *Args[] = {IdentTLoc, GTid,
6130                          llvm::ConstantInt::get(CGM.IntTy,
6131                                                 IsWorksharingReduction ? 1 : 0,
6132                                                 /*isSigned=*/true)};
6133   (void)CGF.EmitRuntimeCall(
6134       OMPBuilder.getOrCreateRuntimeFunction(
6135           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6136       Args);
6137 }
6138 
6139 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6140                                               SourceLocation Loc,
6141                                               ReductionCodeGen &RCG,
6142                                               unsigned N) {
6143   auto Sizes = RCG.getSizes(N);
6144   // Emit threadprivate global variable if the type is non-constant
6145   // (Sizes.second = nullptr).
6146   if (Sizes.second) {
6147     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6148                                                      /*isSigned=*/false);
6149     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6150         CGF, CGM.getContext().getSizeType(),
6151         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6152     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6153   }
6154 }
6155 
6156 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6157                                               SourceLocation Loc,
6158                                               llvm::Value *ReductionsPtr,
6159                                               LValue SharedLVal) {
6160   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6161   // *d);
6162   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6163                                                    CGM.IntTy,
6164                                                    /*isSigned=*/true),
6165                          ReductionsPtr,
6166                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6167                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6168   return Address(
6169       CGF.EmitRuntimeCall(
6170           OMPBuilder.getOrCreateRuntimeFunction(
6171               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6172           Args),
6173       SharedLVal.getAlignment());
6174 }
6175 
6176 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6177                                        SourceLocation Loc) {
6178   if (!CGF.HaveInsertPoint())
6179     return;
6180 
6181   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6182     OMPBuilder.CreateTaskwait(CGF.Builder);
6183   } else {
6184     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6185     // global_tid);
6186     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6187     // Ignore return result until untied tasks are supported.
6188     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6189                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6190                         Args);
6191   }
6192 
6193   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6194     Region->emitUntiedSwitch(CGF);
6195 }
6196 
6197 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6198                                            OpenMPDirectiveKind InnerKind,
6199                                            const RegionCodeGenTy &CodeGen,
6200                                            bool HasCancel) {
6201   if (!CGF.HaveInsertPoint())
6202     return;
6203   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6204   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6205 }
6206 
6207 namespace {
6208 enum RTCancelKind {
6209   CancelNoreq = 0,
6210   CancelParallel = 1,
6211   CancelLoop = 2,
6212   CancelSections = 3,
6213   CancelTaskgroup = 4
6214 };
6215 } // anonymous namespace
6216 
6217 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6218   RTCancelKind CancelKind = CancelNoreq;
6219   if (CancelRegion == OMPD_parallel)
6220     CancelKind = CancelParallel;
6221   else if (CancelRegion == OMPD_for)
6222     CancelKind = CancelLoop;
6223   else if (CancelRegion == OMPD_sections)
6224     CancelKind = CancelSections;
6225   else {
6226     assert(CancelRegion == OMPD_taskgroup);
6227     CancelKind = CancelTaskgroup;
6228   }
6229   return CancelKind;
6230 }
6231 
6232 void CGOpenMPRuntime::emitCancellationPointCall(
6233     CodeGenFunction &CGF, SourceLocation Loc,
6234     OpenMPDirectiveKind CancelRegion) {
6235   if (!CGF.HaveInsertPoint())
6236     return;
6237   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6238   // global_tid, kmp_int32 cncl_kind);
6239   if (auto *OMPRegionInfo =
6240           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6241     // For 'cancellation point taskgroup', the task region info may not have a
6242     // cancel. This may instead happen in another adjacent task.
6243     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6244       llvm::Value *Args[] = {
6245           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6246           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6247       // Ignore return result until untied tasks are supported.
6248       llvm::Value *Result = CGF.EmitRuntimeCall(
6249           OMPBuilder.getOrCreateRuntimeFunction(
6250               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6251           Args);
6252       // if (__kmpc_cancellationpoint()) {
6253       //   exit from construct;
6254       // }
6255       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6256       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6257       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6258       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6259       CGF.EmitBlock(ExitBB);
6260       // exit from construct;
6261       CodeGenFunction::JumpDest CancelDest =
6262           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6263       CGF.EmitBranchThroughCleanup(CancelDest);
6264       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6265     }
6266   }
6267 }
6268 
6269 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6270                                      const Expr *IfCond,
6271                                      OpenMPDirectiveKind CancelRegion) {
6272   if (!CGF.HaveInsertPoint())
6273     return;
6274   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6275   // kmp_int32 cncl_kind);
6276   auto &M = CGM.getModule();
6277   if (auto *OMPRegionInfo =
6278           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6279     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6280                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6281       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6282       llvm::Value *Args[] = {
6283           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6284           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6285       // Ignore return result until untied tasks are supported.
6286       llvm::Value *Result = CGF.EmitRuntimeCall(
6287           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6288       // if (__kmpc_cancel()) {
6289       //   exit from construct;
6290       // }
6291       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6292       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6293       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6294       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6295       CGF.EmitBlock(ExitBB);
6296       // exit from construct;
6297       CodeGenFunction::JumpDest CancelDest =
6298           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6299       CGF.EmitBranchThroughCleanup(CancelDest);
6300       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6301     };
6302     if (IfCond) {
6303       emitIfClause(CGF, IfCond, ThenGen,
6304                    [](CodeGenFunction &, PrePostActionTy &) {});
6305     } else {
6306       RegionCodeGenTy ThenRCG(ThenGen);
6307       ThenRCG(CGF);
6308     }
6309   }
6310 }
6311 
6312 namespace {
6313 /// Cleanup action for uses_allocators support.
6314 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6315   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6316 
6317 public:
6318   OMPUsesAllocatorsActionTy(
6319       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6320       : Allocators(Allocators) {}
6321   void Enter(CodeGenFunction &CGF) override {
6322     if (!CGF.HaveInsertPoint())
6323       return;
6324     for (const auto &AllocatorData : Allocators) {
6325       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6326           CGF, AllocatorData.first, AllocatorData.second);
6327     }
6328   }
6329   void Exit(CodeGenFunction &CGF) override {
6330     if (!CGF.HaveInsertPoint())
6331       return;
6332     for (const auto &AllocatorData : Allocators) {
6333       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6334                                                         AllocatorData.first);
6335     }
6336   }
6337 };
6338 } // namespace
6339 
6340 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6341     const OMPExecutableDirective &D, StringRef ParentName,
6342     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6343     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6344   assert(!ParentName.empty() && "Invalid target region parent name!");
6345   HasEmittedTargetRegion = true;
6346   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6347   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6348     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6349       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6350       if (!D.AllocatorTraits)
6351         continue;
6352       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6353     }
6354   }
6355   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6356   CodeGen.setAction(UsesAllocatorAction);
6357   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6358                                    IsOffloadEntry, CodeGen);
6359 }
6360 
6361 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6362                                              const Expr *Allocator,
6363                                              const Expr *AllocatorTraits) {
6364   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6365   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6366   // Use default memspace handle.
6367   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6368   llvm::Value *NumTraits = llvm::ConstantInt::get(
6369       CGF.IntTy, cast<ConstantArrayType>(
6370                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6371                      ->getSize()
6372                      .getLimitedValue());
6373   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6374   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6375       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6376   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6377                                            AllocatorTraitsLVal.getBaseInfo(),
6378                                            AllocatorTraitsLVal.getTBAAInfo());
6379   llvm::Value *Traits =
6380       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6381 
6382   llvm::Value *AllocatorVal =
6383       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6384                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6385                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6386   // Store to allocator.
6387   CGF.EmitVarDecl(*cast<VarDecl>(
6388       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6389   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6390   AllocatorVal =
6391       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6392                                Allocator->getType(), Allocator->getExprLoc());
6393   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6394 }
6395 
6396 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6397                                              const Expr *Allocator) {
6398   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6399   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6400   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6401   llvm::Value *AllocatorVal =
6402       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6403   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6404                                           CGF.getContext().VoidPtrTy,
6405                                           Allocator->getExprLoc());
6406   (void)CGF.EmitRuntimeCall(
6407       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6408                                             OMPRTL___kmpc_destroy_allocator),
6409       {ThreadId, AllocatorVal});
6410 }
6411 
6412 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6413     const OMPExecutableDirective &D, StringRef ParentName,
6414     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6415     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6416   // Create a unique name for the entry function using the source location
6417   // information of the current target region. The name will be something like:
6418   //
6419   // __omp_offloading_DD_FFFF_PP_lBB
6420   //
6421   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6422   // mangled name of the function that encloses the target region and BB is the
6423   // line number of the target region.
6424 
6425   unsigned DeviceID;
6426   unsigned FileID;
6427   unsigned Line;
6428   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6429                            Line);
6430   SmallString<64> EntryFnName;
6431   {
6432     llvm::raw_svector_ostream OS(EntryFnName);
6433     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6434        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6435   }
6436 
6437   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6438 
6439   CodeGenFunction CGF(CGM, true);
6440   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6441   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6442 
6443   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6444 
6445   // If this target outline function is not an offload entry, we don't need to
6446   // register it.
6447   if (!IsOffloadEntry)
6448     return;
6449 
6450   // The target region ID is used by the runtime library to identify the current
6451   // target region, so it only has to be unique and not necessarily point to
6452   // anything. It could be the pointer to the outlined function that implements
6453   // the target region, but we aren't using that so that the compiler doesn't
6454   // need to keep that, and could therefore inline the host function if proven
6455   // worthwhile during optimization. In the other hand, if emitting code for the
6456   // device, the ID has to be the function address so that it can retrieved from
6457   // the offloading entry and launched by the runtime library. We also mark the
6458   // outlined function to have external linkage in case we are emitting code for
6459   // the device, because these functions will be entry points to the device.
6460 
6461   if (CGM.getLangOpts().OpenMPIsDevice) {
6462     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6463     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6464     OutlinedFn->setDSOLocal(false);
6465   } else {
6466     std::string Name = getName({EntryFnName, "region_id"});
6467     OutlinedFnID = new llvm::GlobalVariable(
6468         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6469         llvm::GlobalValue::WeakAnyLinkage,
6470         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6471   }
6472 
6473   // Register the information for the entry associated with this target region.
6474   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6475       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6476       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6477 }
6478 
6479 /// Checks if the expression is constant or does not have non-trivial function
6480 /// calls.
6481 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6482   // We can skip constant expressions.
6483   // We can skip expressions with trivial calls or simple expressions.
6484   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6485           !E->hasNonTrivialCall(Ctx)) &&
6486          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6487 }
6488 
6489 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6490                                                     const Stmt *Body) {
6491   const Stmt *Child = Body->IgnoreContainers();
6492   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6493     Child = nullptr;
6494     for (const Stmt *S : C->body()) {
6495       if (const auto *E = dyn_cast<Expr>(S)) {
6496         if (isTrivial(Ctx, E))
6497           continue;
6498       }
6499       // Some of the statements can be ignored.
6500       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6501           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6502         continue;
6503       // Analyze declarations.
6504       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6505         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6506               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6507                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6508                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6509                   isa<UsingDirectiveDecl>(D) ||
6510                   isa<OMPDeclareReductionDecl>(D) ||
6511                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6512                 return true;
6513               const auto *VD = dyn_cast<VarDecl>(D);
6514               if (!VD)
6515                 return false;
6516               return VD->isConstexpr() ||
6517                      ((VD->getType().isTrivialType(Ctx) ||
6518                        VD->getType()->isReferenceType()) &&
6519                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6520             }))
6521           continue;
6522       }
6523       // Found multiple children - cannot get the one child only.
6524       if (Child)
6525         return nullptr;
6526       Child = S;
6527     }
6528     if (Child)
6529       Child = Child->IgnoreContainers();
6530   }
6531   return Child;
6532 }
6533 
6534 /// Emit the number of teams for a target directive.  Inspect the num_teams
6535 /// clause associated with a teams construct combined or closely nested
6536 /// with the target directive.
6537 ///
6538 /// Emit a team of size one for directives such as 'target parallel' that
6539 /// have no associated teams construct.
6540 ///
6541 /// Otherwise, return nullptr.
6542 static llvm::Value *
6543 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6544                                const OMPExecutableDirective &D) {
6545   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6546          "Clauses associated with the teams directive expected to be emitted "
6547          "only for the host!");
6548   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6549   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6550          "Expected target-based executable directive.");
6551   CGBuilderTy &Bld = CGF.Builder;
6552   switch (DirectiveKind) {
6553   case OMPD_target: {
6554     const auto *CS = D.getInnermostCapturedStmt();
6555     const auto *Body =
6556         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6557     const Stmt *ChildStmt =
6558         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6559     if (const auto *NestedDir =
6560             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6561       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6562         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6563           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6564           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6565           const Expr *NumTeams =
6566               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6567           llvm::Value *NumTeamsVal =
6568               CGF.EmitScalarExpr(NumTeams,
6569                                  /*IgnoreResultAssign*/ true);
6570           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6571                                    /*isSigned=*/true);
6572         }
6573         return Bld.getInt32(0);
6574       }
6575       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6576           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6577         return Bld.getInt32(1);
6578       return Bld.getInt32(0);
6579     }
6580     return nullptr;
6581   }
6582   case OMPD_target_teams:
6583   case OMPD_target_teams_distribute:
6584   case OMPD_target_teams_distribute_simd:
6585   case OMPD_target_teams_distribute_parallel_for:
6586   case OMPD_target_teams_distribute_parallel_for_simd: {
6587     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6588       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6589       const Expr *NumTeams =
6590           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6591       llvm::Value *NumTeamsVal =
6592           CGF.EmitScalarExpr(NumTeams,
6593                              /*IgnoreResultAssign*/ true);
6594       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6595                                /*isSigned=*/true);
6596     }
6597     return Bld.getInt32(0);
6598   }
6599   case OMPD_target_parallel:
6600   case OMPD_target_parallel_for:
6601   case OMPD_target_parallel_for_simd:
6602   case OMPD_target_simd:
6603     return Bld.getInt32(1);
6604   case OMPD_parallel:
6605   case OMPD_for:
6606   case OMPD_parallel_for:
6607   case OMPD_parallel_master:
6608   case OMPD_parallel_sections:
6609   case OMPD_for_simd:
6610   case OMPD_parallel_for_simd:
6611   case OMPD_cancel:
6612   case OMPD_cancellation_point:
6613   case OMPD_ordered:
6614   case OMPD_threadprivate:
6615   case OMPD_allocate:
6616   case OMPD_task:
6617   case OMPD_simd:
6618   case OMPD_sections:
6619   case OMPD_section:
6620   case OMPD_single:
6621   case OMPD_master:
6622   case OMPD_critical:
6623   case OMPD_taskyield:
6624   case OMPD_barrier:
6625   case OMPD_taskwait:
6626   case OMPD_taskgroup:
6627   case OMPD_atomic:
6628   case OMPD_flush:
6629   case OMPD_depobj:
6630   case OMPD_scan:
6631   case OMPD_teams:
6632   case OMPD_target_data:
6633   case OMPD_target_exit_data:
6634   case OMPD_target_enter_data:
6635   case OMPD_distribute:
6636   case OMPD_distribute_simd:
6637   case OMPD_distribute_parallel_for:
6638   case OMPD_distribute_parallel_for_simd:
6639   case OMPD_teams_distribute:
6640   case OMPD_teams_distribute_simd:
6641   case OMPD_teams_distribute_parallel_for:
6642   case OMPD_teams_distribute_parallel_for_simd:
6643   case OMPD_target_update:
6644   case OMPD_declare_simd:
6645   case OMPD_declare_variant:
6646   case OMPD_begin_declare_variant:
6647   case OMPD_end_declare_variant:
6648   case OMPD_declare_target:
6649   case OMPD_end_declare_target:
6650   case OMPD_declare_reduction:
6651   case OMPD_declare_mapper:
6652   case OMPD_taskloop:
6653   case OMPD_taskloop_simd:
6654   case OMPD_master_taskloop:
6655   case OMPD_master_taskloop_simd:
6656   case OMPD_parallel_master_taskloop:
6657   case OMPD_parallel_master_taskloop_simd:
6658   case OMPD_requires:
6659   case OMPD_unknown:
6660     break;
6661   default:
6662     break;
6663   }
6664   llvm_unreachable("Unexpected directive kind.");
6665 }
6666 
6667 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6668                                   llvm::Value *DefaultThreadLimitVal) {
6669   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6670       CGF.getContext(), CS->getCapturedStmt());
6671   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6672     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6673       llvm::Value *NumThreads = nullptr;
6674       llvm::Value *CondVal = nullptr;
6675       // Handle if clause. If if clause present, the number of threads is
6676       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6677       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6678         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6679         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6680         const OMPIfClause *IfClause = nullptr;
6681         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6682           if (C->getNameModifier() == OMPD_unknown ||
6683               C->getNameModifier() == OMPD_parallel) {
6684             IfClause = C;
6685             break;
6686           }
6687         }
6688         if (IfClause) {
6689           const Expr *Cond = IfClause->getCondition();
6690           bool Result;
6691           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6692             if (!Result)
6693               return CGF.Builder.getInt32(1);
6694           } else {
6695             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6696             if (const auto *PreInit =
6697                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6698               for (const auto *I : PreInit->decls()) {
6699                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6700                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6701                 } else {
6702                   CodeGenFunction::AutoVarEmission Emission =
6703                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6704                   CGF.EmitAutoVarCleanups(Emission);
6705                 }
6706               }
6707             }
6708             CondVal = CGF.EvaluateExprAsBool(Cond);
6709           }
6710         }
6711       }
6712       // Check the value of num_threads clause iff if clause was not specified
6713       // or is not evaluated to false.
6714       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6715         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6716         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6717         const auto *NumThreadsClause =
6718             Dir->getSingleClause<OMPNumThreadsClause>();
6719         CodeGenFunction::LexicalScope Scope(
6720             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6721         if (const auto *PreInit =
6722                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6723           for (const auto *I : PreInit->decls()) {
6724             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6725               CGF.EmitVarDecl(cast<VarDecl>(*I));
6726             } else {
6727               CodeGenFunction::AutoVarEmission Emission =
6728                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6729               CGF.EmitAutoVarCleanups(Emission);
6730             }
6731           }
6732         }
6733         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6734         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6735                                                /*isSigned=*/false);
6736         if (DefaultThreadLimitVal)
6737           NumThreads = CGF.Builder.CreateSelect(
6738               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6739               DefaultThreadLimitVal, NumThreads);
6740       } else {
6741         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6742                                            : CGF.Builder.getInt32(0);
6743       }
6744       // Process condition of the if clause.
6745       if (CondVal) {
6746         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6747                                               CGF.Builder.getInt32(1));
6748       }
6749       return NumThreads;
6750     }
6751     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6752       return CGF.Builder.getInt32(1);
6753     return DefaultThreadLimitVal;
6754   }
6755   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6756                                : CGF.Builder.getInt32(0);
6757 }
6758 
6759 /// Emit the number of threads for a target directive.  Inspect the
6760 /// thread_limit clause associated with a teams construct combined or closely
6761 /// nested with the target directive.
6762 ///
6763 /// Emit the num_threads clause for directives such as 'target parallel' that
6764 /// have no associated teams construct.
6765 ///
6766 /// Otherwise, return nullptr.
6767 static llvm::Value *
6768 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6769                                  const OMPExecutableDirective &D) {
6770   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6771          "Clauses associated with the teams directive expected to be emitted "
6772          "only for the host!");
6773   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6774   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6775          "Expected target-based executable directive.");
6776   CGBuilderTy &Bld = CGF.Builder;
6777   llvm::Value *ThreadLimitVal = nullptr;
6778   llvm::Value *NumThreadsVal = nullptr;
6779   switch (DirectiveKind) {
6780   case OMPD_target: {
6781     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6782     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6783       return NumThreads;
6784     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6785         CGF.getContext(), CS->getCapturedStmt());
6786     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6787       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6788         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6789         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6790         const auto *ThreadLimitClause =
6791             Dir->getSingleClause<OMPThreadLimitClause>();
6792         CodeGenFunction::LexicalScope Scope(
6793             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6794         if (const auto *PreInit =
6795                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6796           for (const auto *I : PreInit->decls()) {
6797             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6798               CGF.EmitVarDecl(cast<VarDecl>(*I));
6799             } else {
6800               CodeGenFunction::AutoVarEmission Emission =
6801                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6802               CGF.EmitAutoVarCleanups(Emission);
6803             }
6804           }
6805         }
6806         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6807             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6808         ThreadLimitVal =
6809             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6810       }
6811       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6812           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6813         CS = Dir->getInnermostCapturedStmt();
6814         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6815             CGF.getContext(), CS->getCapturedStmt());
6816         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6817       }
6818       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6819           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6820         CS = Dir->getInnermostCapturedStmt();
6821         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6822           return NumThreads;
6823       }
6824       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6825         return Bld.getInt32(1);
6826     }
6827     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6828   }
6829   case OMPD_target_teams: {
6830     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6831       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6832       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6833       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6834           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6835       ThreadLimitVal =
6836           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6837     }
6838     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6839     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6840       return NumThreads;
6841     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6842         CGF.getContext(), CS->getCapturedStmt());
6843     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6844       if (Dir->getDirectiveKind() == OMPD_distribute) {
6845         CS = Dir->getInnermostCapturedStmt();
6846         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6847           return NumThreads;
6848       }
6849     }
6850     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6851   }
6852   case OMPD_target_teams_distribute:
6853     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6854       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6855       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6856       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6857           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6858       ThreadLimitVal =
6859           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6860     }
6861     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6862   case OMPD_target_parallel:
6863   case OMPD_target_parallel_for:
6864   case OMPD_target_parallel_for_simd:
6865   case OMPD_target_teams_distribute_parallel_for:
6866   case OMPD_target_teams_distribute_parallel_for_simd: {
6867     llvm::Value *CondVal = nullptr;
6868     // Handle if clause. If if clause present, the number of threads is
6869     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6870     if (D.hasClausesOfKind<OMPIfClause>()) {
6871       const OMPIfClause *IfClause = nullptr;
6872       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6873         if (C->getNameModifier() == OMPD_unknown ||
6874             C->getNameModifier() == OMPD_parallel) {
6875           IfClause = C;
6876           break;
6877         }
6878       }
6879       if (IfClause) {
6880         const Expr *Cond = IfClause->getCondition();
6881         bool Result;
6882         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6883           if (!Result)
6884             return Bld.getInt32(1);
6885         } else {
6886           CodeGenFunction::RunCleanupsScope Scope(CGF);
6887           CondVal = CGF.EvaluateExprAsBool(Cond);
6888         }
6889       }
6890     }
6891     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6892       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6893       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6894       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6895           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6896       ThreadLimitVal =
6897           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6898     }
6899     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6900       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6901       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6902       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6903           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6904       NumThreadsVal =
6905           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6906       ThreadLimitVal = ThreadLimitVal
6907                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6908                                                                 ThreadLimitVal),
6909                                               NumThreadsVal, ThreadLimitVal)
6910                            : NumThreadsVal;
6911     }
6912     if (!ThreadLimitVal)
6913       ThreadLimitVal = Bld.getInt32(0);
6914     if (CondVal)
6915       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6916     return ThreadLimitVal;
6917   }
6918   case OMPD_target_teams_distribute_simd:
6919   case OMPD_target_simd:
6920     return Bld.getInt32(1);
6921   case OMPD_parallel:
6922   case OMPD_for:
6923   case OMPD_parallel_for:
6924   case OMPD_parallel_master:
6925   case OMPD_parallel_sections:
6926   case OMPD_for_simd:
6927   case OMPD_parallel_for_simd:
6928   case OMPD_cancel:
6929   case OMPD_cancellation_point:
6930   case OMPD_ordered:
6931   case OMPD_threadprivate:
6932   case OMPD_allocate:
6933   case OMPD_task:
6934   case OMPD_simd:
6935   case OMPD_sections:
6936   case OMPD_section:
6937   case OMPD_single:
6938   case OMPD_master:
6939   case OMPD_critical:
6940   case OMPD_taskyield:
6941   case OMPD_barrier:
6942   case OMPD_taskwait:
6943   case OMPD_taskgroup:
6944   case OMPD_atomic:
6945   case OMPD_flush:
6946   case OMPD_depobj:
6947   case OMPD_scan:
6948   case OMPD_teams:
6949   case OMPD_target_data:
6950   case OMPD_target_exit_data:
6951   case OMPD_target_enter_data:
6952   case OMPD_distribute:
6953   case OMPD_distribute_simd:
6954   case OMPD_distribute_parallel_for:
6955   case OMPD_distribute_parallel_for_simd:
6956   case OMPD_teams_distribute:
6957   case OMPD_teams_distribute_simd:
6958   case OMPD_teams_distribute_parallel_for:
6959   case OMPD_teams_distribute_parallel_for_simd:
6960   case OMPD_target_update:
6961   case OMPD_declare_simd:
6962   case OMPD_declare_variant:
6963   case OMPD_begin_declare_variant:
6964   case OMPD_end_declare_variant:
6965   case OMPD_declare_target:
6966   case OMPD_end_declare_target:
6967   case OMPD_declare_reduction:
6968   case OMPD_declare_mapper:
6969   case OMPD_taskloop:
6970   case OMPD_taskloop_simd:
6971   case OMPD_master_taskloop:
6972   case OMPD_master_taskloop_simd:
6973   case OMPD_parallel_master_taskloop:
6974   case OMPD_parallel_master_taskloop_simd:
6975   case OMPD_requires:
6976   case OMPD_unknown:
6977     break;
6978   default:
6979     break;
6980   }
6981   llvm_unreachable("Unsupported directive kind.");
6982 }
6983 
6984 namespace {
6985 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6986 
6987 // Utility to handle information from clauses associated with a given
6988 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6989 // It provides a convenient interface to obtain the information and generate
6990 // code for that information.
6991 class MappableExprsHandler {
6992 public:
6993   /// Values for bit flags used to specify the mapping type for
6994   /// offloading.
6995   enum OpenMPOffloadMappingFlags : uint64_t {
6996     /// No flags
6997     OMP_MAP_NONE = 0x0,
6998     /// Allocate memory on the device and move data from host to device.
6999     OMP_MAP_TO = 0x01,
7000     /// Allocate memory on the device and move data from device to host.
7001     OMP_MAP_FROM = 0x02,
7002     /// Always perform the requested mapping action on the element, even
7003     /// if it was already mapped before.
7004     OMP_MAP_ALWAYS = 0x04,
7005     /// Delete the element from the device environment, ignoring the
7006     /// current reference count associated with the element.
7007     OMP_MAP_DELETE = 0x08,
7008     /// The element being mapped is a pointer-pointee pair; both the
7009     /// pointer and the pointee should be mapped.
7010     OMP_MAP_PTR_AND_OBJ = 0x10,
7011     /// This flags signals that the base address of an entry should be
7012     /// passed to the target kernel as an argument.
7013     OMP_MAP_TARGET_PARAM = 0x20,
7014     /// Signal that the runtime library has to return the device pointer
7015     /// in the current position for the data being mapped. Used when we have the
7016     /// use_device_ptr or use_device_addr clause.
7017     OMP_MAP_RETURN_PARAM = 0x40,
7018     /// This flag signals that the reference being passed is a pointer to
7019     /// private data.
7020     OMP_MAP_PRIVATE = 0x80,
7021     /// Pass the element to the device by value.
7022     OMP_MAP_LITERAL = 0x100,
7023     /// Implicit map
7024     OMP_MAP_IMPLICIT = 0x200,
7025     /// Close is a hint to the runtime to allocate memory close to
7026     /// the target device.
7027     OMP_MAP_CLOSE = 0x400,
7028     /// 0x800 is reserved for compatibility with XLC.
7029     /// Produce a runtime error if the data is not already allocated.
7030     OMP_MAP_PRESENT = 0x1000,
7031     /// The 16 MSBs of the flags indicate whether the entry is member of some
7032     /// struct/class.
7033     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7034     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7035   };
7036 
7037   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7038   static unsigned getFlagMemberOffset() {
7039     unsigned Offset = 0;
7040     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7041          Remain = Remain >> 1)
7042       Offset++;
7043     return Offset;
7044   }
7045 
7046   /// Class that associates information with a base pointer to be passed to the
7047   /// runtime library.
7048   class BasePointerInfo {
7049     /// The base pointer.
7050     llvm::Value *Ptr = nullptr;
7051     /// The base declaration that refers to this device pointer, or null if
7052     /// there is none.
7053     const ValueDecl *DevPtrDecl = nullptr;
7054 
7055   public:
7056     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7057         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7058     llvm::Value *operator*() const { return Ptr; }
7059     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7060     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7061   };
7062 
7063   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7064   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7065   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7066   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7067 
7068   /// This structure contains combined information generated for mappable
7069   /// clauses, including base pointers, pointers, sizes, map types, and
7070   /// user-defined mappers.
7071   struct MapCombinedInfoTy {
7072     MapBaseValuesArrayTy BasePointers;
7073     MapValuesArrayTy Pointers;
7074     MapValuesArrayTy Sizes;
7075     MapFlagsArrayTy Types;
7076     MapMappersArrayTy Mappers;
7077 
7078     /// Append arrays in \a CurInfo.
7079     void append(MapCombinedInfoTy &CurInfo) {
7080       BasePointers.append(CurInfo.BasePointers.begin(),
7081                           CurInfo.BasePointers.end());
7082       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7083       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7084       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7085       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7086     }
7087   };
7088 
7089   /// Map between a struct and the its lowest & highest elements which have been
7090   /// mapped.
7091   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7092   ///                    HE(FieldIndex, Pointer)}
7093   struct StructRangeInfoTy {
7094     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7095         0, Address::invalid()};
7096     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7097         0, Address::invalid()};
7098     Address Base = Address::invalid();
7099   };
7100 
7101 private:
7102   /// Kind that defines how a device pointer has to be returned.
7103   struct MapInfo {
7104     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7105     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7106     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7107     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7108     bool ReturnDevicePointer = false;
7109     bool IsImplicit = false;
7110     const ValueDecl *Mapper = nullptr;
7111     bool ForDeviceAddr = false;
7112 
7113     MapInfo() = default;
7114     MapInfo(
7115         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7116         OpenMPMapClauseKind MapType,
7117         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7118         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7119         bool ReturnDevicePointer, bool IsImplicit,
7120         const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false)
7121         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7122           MotionModifiers(MotionModifiers),
7123           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7124           Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {}
7125   };
7126 
7127   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7128   /// member and there is no map information about it, then emission of that
7129   /// entry is deferred until the whole struct has been processed.
7130   struct DeferredDevicePtrEntryTy {
7131     const Expr *IE = nullptr;
7132     const ValueDecl *VD = nullptr;
7133     bool ForDeviceAddr = false;
7134 
7135     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7136                              bool ForDeviceAddr)
7137         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7138   };
7139 
7140   /// The target directive from where the mappable clauses were extracted. It
7141   /// is either a executable directive or a user-defined mapper directive.
7142   llvm::PointerUnion<const OMPExecutableDirective *,
7143                      const OMPDeclareMapperDecl *>
7144       CurDir;
7145 
7146   /// Function the directive is being generated for.
7147   CodeGenFunction &CGF;
7148 
7149   /// Set of all first private variables in the current directive.
7150   /// bool data is set to true if the variable is implicitly marked as
7151   /// firstprivate, false otherwise.
7152   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7153 
7154   /// Map between device pointer declarations and their expression components.
7155   /// The key value for declarations in 'this' is null.
7156   llvm::DenseMap<
7157       const ValueDecl *,
7158       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7159       DevPointersMap;
7160 
7161   llvm::Value *getExprTypeSize(const Expr *E) const {
7162     QualType ExprTy = E->getType().getCanonicalType();
7163 
7164     // Calculate the size for array shaping expression.
7165     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7166       llvm::Value *Size =
7167           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7168       for (const Expr *SE : OAE->getDimensions()) {
7169         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7170         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7171                                       CGF.getContext().getSizeType(),
7172                                       SE->getExprLoc());
7173         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7174       }
7175       return Size;
7176     }
7177 
7178     // Reference types are ignored for mapping purposes.
7179     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7180       ExprTy = RefTy->getPointeeType().getCanonicalType();
7181 
7182     // Given that an array section is considered a built-in type, we need to
7183     // do the calculation based on the length of the section instead of relying
7184     // on CGF.getTypeSize(E->getType()).
7185     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7186       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7187                             OAE->getBase()->IgnoreParenImpCasts())
7188                             .getCanonicalType();
7189 
7190       // If there is no length associated with the expression and lower bound is
7191       // not specified too, that means we are using the whole length of the
7192       // base.
7193       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7194           !OAE->getLowerBound())
7195         return CGF.getTypeSize(BaseTy);
7196 
7197       llvm::Value *ElemSize;
7198       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7199         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7200       } else {
7201         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7202         assert(ATy && "Expecting array type if not a pointer type.");
7203         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7204       }
7205 
7206       // If we don't have a length at this point, that is because we have an
7207       // array section with a single element.
7208       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7209         return ElemSize;
7210 
7211       if (const Expr *LenExpr = OAE->getLength()) {
7212         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7213         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7214                                              CGF.getContext().getSizeType(),
7215                                              LenExpr->getExprLoc());
7216         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7217       }
7218       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7219              OAE->getLowerBound() && "expected array_section[lb:].");
7220       // Size = sizetype - lb * elemtype;
7221       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7222       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7223       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7224                                        CGF.getContext().getSizeType(),
7225                                        OAE->getLowerBound()->getExprLoc());
7226       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7227       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7228       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7229       LengthVal = CGF.Builder.CreateSelect(
7230           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7231       return LengthVal;
7232     }
7233     return CGF.getTypeSize(ExprTy);
7234   }
7235 
7236   /// Return the corresponding bits for a given map clause modifier. Add
7237   /// a flag marking the map as a pointer if requested. Add a flag marking the
7238   /// map as the first one of a series of maps that relate to the same map
7239   /// expression.
7240   OpenMPOffloadMappingFlags getMapTypeBits(
7241       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7242       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7243       bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7244     OpenMPOffloadMappingFlags Bits =
7245         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7246     switch (MapType) {
7247     case OMPC_MAP_alloc:
7248     case OMPC_MAP_release:
7249       // alloc and release is the default behavior in the runtime library,  i.e.
7250       // if we don't pass any bits alloc/release that is what the runtime is
7251       // going to do. Therefore, we don't need to signal anything for these two
7252       // type modifiers.
7253       break;
7254     case OMPC_MAP_to:
7255       Bits |= OMP_MAP_TO;
7256       break;
7257     case OMPC_MAP_from:
7258       Bits |= OMP_MAP_FROM;
7259       break;
7260     case OMPC_MAP_tofrom:
7261       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7262       break;
7263     case OMPC_MAP_delete:
7264       Bits |= OMP_MAP_DELETE;
7265       break;
7266     case OMPC_MAP_unknown:
7267       llvm_unreachable("Unexpected map type!");
7268     }
7269     if (AddPtrFlag)
7270       Bits |= OMP_MAP_PTR_AND_OBJ;
7271     if (AddIsTargetParamFlag)
7272       Bits |= OMP_MAP_TARGET_PARAM;
7273     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7274         != MapModifiers.end())
7275       Bits |= OMP_MAP_ALWAYS;
7276     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7277         != MapModifiers.end())
7278       Bits |= OMP_MAP_CLOSE;
7279     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
7280         != MapModifiers.end())
7281       Bits |= OMP_MAP_PRESENT;
7282     if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
7283         != MotionModifiers.end())
7284       Bits |= OMP_MAP_PRESENT;
7285     return Bits;
7286   }
7287 
7288   /// Return true if the provided expression is a final array section. A
7289   /// final array section, is one whose length can't be proved to be one.
7290   bool isFinalArraySectionExpression(const Expr *E) const {
7291     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7292 
7293     // It is not an array section and therefore not a unity-size one.
7294     if (!OASE)
7295       return false;
7296 
7297     // An array section with no colon always refer to a single element.
7298     if (OASE->getColonLocFirst().isInvalid())
7299       return false;
7300 
7301     const Expr *Length = OASE->getLength();
7302 
7303     // If we don't have a length we have to check if the array has size 1
7304     // for this dimension. Also, we should always expect a length if the
7305     // base type is pointer.
7306     if (!Length) {
7307       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7308                              OASE->getBase()->IgnoreParenImpCasts())
7309                              .getCanonicalType();
7310       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7311         return ATy->getSize().getSExtValue() != 1;
7312       // If we don't have a constant dimension length, we have to consider
7313       // the current section as having any size, so it is not necessarily
7314       // unitary. If it happen to be unity size, that's user fault.
7315       return true;
7316     }
7317 
7318     // Check if the length evaluates to 1.
7319     Expr::EvalResult Result;
7320     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7321       return true; // Can have more that size 1.
7322 
7323     llvm::APSInt ConstLength = Result.Val.getInt();
7324     return ConstLength.getSExtValue() != 1;
7325   }
7326 
7327   /// Generate the base pointers, section pointers, sizes, map type bits, and
7328   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7329   /// map type, map or motion modifiers, and expression components.
7330   /// \a IsFirstComponent should be set to true if the provided set of
7331   /// components is the first associated with a capture.
7332   void generateInfoForComponentList(
7333       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7334       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7335       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7336       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7337       bool IsFirstComponentList, bool IsImplicit,
7338       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7339       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7340           OverlappedElements = llvm::None) const {
7341     // The following summarizes what has to be generated for each map and the
7342     // types below. The generated information is expressed in this order:
7343     // base pointer, section pointer, size, flags
7344     // (to add to the ones that come from the map type and modifier).
7345     //
7346     // double d;
7347     // int i[100];
7348     // float *p;
7349     //
7350     // struct S1 {
7351     //   int i;
7352     //   float f[50];
7353     // }
7354     // struct S2 {
7355     //   int i;
7356     //   float f[50];
7357     //   S1 s;
7358     //   double *p;
7359     //   struct S2 *ps;
7360     // }
7361     // S2 s;
7362     // S2 *ps;
7363     //
7364     // map(d)
7365     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7366     //
7367     // map(i)
7368     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7369     //
7370     // map(i[1:23])
7371     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7372     //
7373     // map(p)
7374     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7375     //
7376     // map(p[1:24])
7377     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7378     // in unified shared memory mode or for local pointers
7379     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7380     //
7381     // map(s)
7382     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7383     //
7384     // map(s.i)
7385     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7386     //
7387     // map(s.s.f)
7388     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7389     //
7390     // map(s.p)
7391     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7392     //
7393     // map(to: s.p[:22])
7394     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7395     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7396     // &(s.p), &(s.p[0]), 22*sizeof(double),
7397     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7398     // (*) alloc space for struct members, only this is a target parameter
7399     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7400     //      optimizes this entry out, same in the examples below)
7401     // (***) map the pointee (map: to)
7402     //
7403     // map(s.ps)
7404     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7405     //
7406     // map(from: s.ps->s.i)
7407     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7408     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7409     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7410     //
7411     // map(to: s.ps->ps)
7412     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7413     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7414     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7415     //
7416     // map(s.ps->ps->ps)
7417     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7418     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7419     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7420     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7421     //
7422     // map(to: s.ps->ps->s.f[:22])
7423     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7424     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7425     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7426     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7427     //
7428     // map(ps)
7429     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7430     //
7431     // map(ps->i)
7432     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7433     //
7434     // map(ps->s.f)
7435     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7436     //
7437     // map(from: ps->p)
7438     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7439     //
7440     // map(to: ps->p[:22])
7441     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7442     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7443     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7444     //
7445     // map(ps->ps)
7446     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7447     //
7448     // map(from: ps->ps->s.i)
7449     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7450     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7451     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7452     //
7453     // map(from: ps->ps->ps)
7454     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7455     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7456     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7457     //
7458     // map(ps->ps->ps->ps)
7459     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7460     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7461     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7462     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7463     //
7464     // map(to: ps->ps->ps->s.f[:22])
7465     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7466     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7467     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7468     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7469     //
7470     // map(to: s.f[:22]) map(from: s.p[:33])
7471     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7472     //     sizeof(double*) (**), TARGET_PARAM
7473     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7474     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7475     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7476     // (*) allocate contiguous space needed to fit all mapped members even if
7477     //     we allocate space for members not mapped (in this example,
7478     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7479     //     them as well because they fall between &s.f[0] and &s.p)
7480     //
7481     // map(from: s.f[:22]) map(to: ps->p[:33])
7482     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7483     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7484     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7485     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7486     // (*) the struct this entry pertains to is the 2nd element in the list of
7487     //     arguments, hence MEMBER_OF(2)
7488     //
7489     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7490     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7491     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7492     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7493     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7494     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7495     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7496     // (*) the struct this entry pertains to is the 4th element in the list
7497     //     of arguments, hence MEMBER_OF(4)
7498 
7499     // Track if the map information being generated is the first for a capture.
7500     bool IsCaptureFirstInfo = IsFirstComponentList;
7501     // When the variable is on a declare target link or in a to clause with
7502     // unified memory, a reference is needed to hold the host/device address
7503     // of the variable.
7504     bool RequiresReference = false;
7505 
7506     // Scan the components from the base to the complete expression.
7507     auto CI = Components.rbegin();
7508     auto CE = Components.rend();
7509     auto I = CI;
7510 
7511     // Track if the map information being generated is the first for a list of
7512     // components.
7513     bool IsExpressionFirstInfo = true;
7514     bool FirstPointerInComplexData = false;
7515     Address BP = Address::invalid();
7516     const Expr *AssocExpr = I->getAssociatedExpression();
7517     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7518     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7519     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7520 
7521     if (isa<MemberExpr>(AssocExpr)) {
7522       // The base is the 'this' pointer. The content of the pointer is going
7523       // to be the base of the field being mapped.
7524       BP = CGF.LoadCXXThisAddress();
7525     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7526                (OASE &&
7527                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7528       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7529     } else if (OAShE &&
7530                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7531       BP = Address(
7532           CGF.EmitScalarExpr(OAShE->getBase()),
7533           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7534     } else {
7535       // The base is the reference to the variable.
7536       // BP = &Var.
7537       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7538       if (const auto *VD =
7539               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7540         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7541                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7542           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7543               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7544                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7545             RequiresReference = true;
7546             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7547           }
7548         }
7549       }
7550 
7551       // If the variable is a pointer and is being dereferenced (i.e. is not
7552       // the last component), the base has to be the pointer itself, not its
7553       // reference. References are ignored for mapping purposes.
7554       QualType Ty =
7555           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7556       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7557         // No need to generate individual map information for the pointer, it
7558         // can be associated with the combined storage if shared memory mode is
7559         // active or the base declaration is not global variable.
7560         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7561         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7562             !VD || VD->hasLocalStorage())
7563           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7564         else
7565           FirstPointerInComplexData = true;
7566         ++I;
7567       }
7568     }
7569 
7570     // Track whether a component of the list should be marked as MEMBER_OF some
7571     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7572     // in a component list should be marked as MEMBER_OF, all subsequent entries
7573     // do not belong to the base struct. E.g.
7574     // struct S2 s;
7575     // s.ps->ps->ps->f[:]
7576     //   (1) (2) (3) (4)
7577     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7578     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7579     // is the pointee of ps(2) which is not member of struct s, so it should not
7580     // be marked as such (it is still PTR_AND_OBJ).
7581     // The variable is initialized to false so that PTR_AND_OBJ entries which
7582     // are not struct members are not considered (e.g. array of pointers to
7583     // data).
7584     bool ShouldBeMemberOf = false;
7585 
7586     // Variable keeping track of whether or not we have encountered a component
7587     // in the component list which is a member expression. Useful when we have a
7588     // pointer or a final array section, in which case it is the previous
7589     // component in the list which tells us whether we have a member expression.
7590     // E.g. X.f[:]
7591     // While processing the final array section "[:]" it is "f" which tells us
7592     // whether we are dealing with a member of a declared struct.
7593     const MemberExpr *EncounteredME = nullptr;
7594 
7595     for (; I != CE; ++I) {
7596       // If the current component is member of a struct (parent struct) mark it.
7597       if (!EncounteredME) {
7598         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7599         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7600         // as MEMBER_OF the parent struct.
7601         if (EncounteredME) {
7602           ShouldBeMemberOf = true;
7603           // Do not emit as complex pointer if this is actually not array-like
7604           // expression.
7605           if (FirstPointerInComplexData) {
7606             QualType Ty = std::prev(I)
7607                               ->getAssociatedDeclaration()
7608                               ->getType()
7609                               .getNonReferenceType();
7610             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7611             FirstPointerInComplexData = false;
7612           }
7613         }
7614       }
7615 
7616       auto Next = std::next(I);
7617 
7618       // We need to generate the addresses and sizes if this is the last
7619       // component, if the component is a pointer or if it is an array section
7620       // whose length can't be proved to be one. If this is a pointer, it
7621       // becomes the base address for the following components.
7622 
7623       // A final array section, is one whose length can't be proved to be one.
7624       bool IsFinalArraySection =
7625           isFinalArraySectionExpression(I->getAssociatedExpression());
7626 
7627       // Get information on whether the element is a pointer. Have to do a
7628       // special treatment for array sections given that they are built-in
7629       // types.
7630       const auto *OASE =
7631           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7632       const auto *OAShE =
7633           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7634       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7635       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7636       bool IsPointer =
7637           OAShE ||
7638           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7639                        .getCanonicalType()
7640                        ->isAnyPointerType()) ||
7641           I->getAssociatedExpression()->getType()->isAnyPointerType();
7642       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7643 
7644       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7645         // If this is not the last component, we expect the pointer to be
7646         // associated with an array expression or member expression.
7647         assert((Next == CE ||
7648                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7649                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7650                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7651                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7652                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7653                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7654                "Unexpected expression");
7655 
7656         Address LB = Address::invalid();
7657         if (OAShE) {
7658           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7659                        CGF.getContext().getTypeAlignInChars(
7660                            OAShE->getBase()->getType()));
7661         } else {
7662           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7663                    .getAddress(CGF);
7664         }
7665 
7666         // If this component is a pointer inside the base struct then we don't
7667         // need to create any entry for it - it will be combined with the object
7668         // it is pointing to into a single PTR_AND_OBJ entry.
7669         bool IsMemberPointerOrAddr =
7670             (IsPointer || ForDeviceAddr) && EncounteredME &&
7671             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7672              EncounteredME);
7673         if (!OverlappedElements.empty()) {
7674           // Handle base element with the info for overlapped elements.
7675           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7676           assert(Next == CE &&
7677                  "Expected last element for the overlapped elements.");
7678           assert(!IsPointer &&
7679                  "Unexpected base element with the pointer type.");
7680           // Mark the whole struct as the struct that requires allocation on the
7681           // device.
7682           PartialStruct.LowestElem = {0, LB};
7683           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7684               I->getAssociatedExpression()->getType());
7685           Address HB = CGF.Builder.CreateConstGEP(
7686               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7687                                                               CGF.VoidPtrTy),
7688               TypeSize.getQuantity() - 1);
7689           PartialStruct.HighestElem = {
7690               std::numeric_limits<decltype(
7691                   PartialStruct.HighestElem.first)>::max(),
7692               HB};
7693           PartialStruct.Base = BP;
7694           // Emit data for non-overlapped data.
7695           OpenMPOffloadMappingFlags Flags =
7696               OMP_MAP_MEMBER_OF |
7697               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7698                              /*AddPtrFlag=*/false,
7699                              /*AddIsTargetParamFlag=*/false);
7700           LB = BP;
7701           llvm::Value *Size = nullptr;
7702           // Do bitcopy of all non-overlapped structure elements.
7703           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7704                    Component : OverlappedElements) {
7705             Address ComponentLB = Address::invalid();
7706             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7707                  Component) {
7708               if (MC.getAssociatedDeclaration()) {
7709                 ComponentLB =
7710                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7711                         .getAddress(CGF);
7712                 Size = CGF.Builder.CreatePtrDiff(
7713                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7714                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7715                 break;
7716               }
7717             }
7718             assert(Size && "Failed to determine structure size");
7719             CombinedInfo.BasePointers.push_back(BP.getPointer());
7720             CombinedInfo.Pointers.push_back(LB.getPointer());
7721             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7722                 Size, CGF.Int64Ty, /*isSigned=*/true));
7723             CombinedInfo.Types.push_back(Flags);
7724             CombinedInfo.Mappers.push_back(nullptr);
7725             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7726           }
7727           CombinedInfo.BasePointers.push_back(BP.getPointer());
7728           CombinedInfo.Pointers.push_back(LB.getPointer());
7729           Size = CGF.Builder.CreatePtrDiff(
7730               CGF.EmitCastToVoidPtr(
7731                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7732               CGF.EmitCastToVoidPtr(LB.getPointer()));
7733           CombinedInfo.Sizes.push_back(
7734               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7735           CombinedInfo.Types.push_back(Flags);
7736           CombinedInfo.Mappers.push_back(nullptr);
7737           break;
7738         }
7739         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7740         if (!IsMemberPointerOrAddr) {
7741           CombinedInfo.BasePointers.push_back(BP.getPointer());
7742           CombinedInfo.Pointers.push_back(LB.getPointer());
7743           CombinedInfo.Sizes.push_back(
7744               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7745 
7746           // If Mapper is valid, the last component inherits the mapper.
7747           bool HasMapper = Mapper && Next == CE;
7748           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7749 
7750           // We need to add a pointer flag for each map that comes from the
7751           // same expression except for the first one. We also need to signal
7752           // this map is the first one that relates with the current capture
7753           // (there is a set of entries for each capture).
7754           OpenMPOffloadMappingFlags Flags =
7755               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7756                              !IsExpressionFirstInfo || RequiresReference ||
7757                                  FirstPointerInComplexData,
7758                              IsCaptureFirstInfo && !RequiresReference);
7759 
7760           if (!IsExpressionFirstInfo) {
7761             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7762             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7763             if (IsPointer)
7764               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7765                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7766 
7767             if (ShouldBeMemberOf) {
7768               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7769               // should be later updated with the correct value of MEMBER_OF.
7770               Flags |= OMP_MAP_MEMBER_OF;
7771               // From now on, all subsequent PTR_AND_OBJ entries should not be
7772               // marked as MEMBER_OF.
7773               ShouldBeMemberOf = false;
7774             }
7775           }
7776 
7777           CombinedInfo.Types.push_back(Flags);
7778         }
7779 
7780         // If we have encountered a member expression so far, keep track of the
7781         // mapped member. If the parent is "*this", then the value declaration
7782         // is nullptr.
7783         if (EncounteredME) {
7784           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7785           unsigned FieldIndex = FD->getFieldIndex();
7786 
7787           // Update info about the lowest and highest elements for this struct
7788           if (!PartialStruct.Base.isValid()) {
7789             PartialStruct.LowestElem = {FieldIndex, LB};
7790             if (IsFinalArraySection) {
7791               Address HB =
7792                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7793                       .getAddress(CGF);
7794               PartialStruct.HighestElem = {FieldIndex, HB};
7795             } else {
7796               PartialStruct.HighestElem = {FieldIndex, LB};
7797             }
7798             PartialStruct.Base = BP;
7799           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7800             PartialStruct.LowestElem = {FieldIndex, LB};
7801           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7802             PartialStruct.HighestElem = {FieldIndex, LB};
7803           }
7804         }
7805 
7806         // If we have a final array section, we are done with this expression.
7807         if (IsFinalArraySection)
7808           break;
7809 
7810         // The pointer becomes the base for the next element.
7811         if (Next != CE)
7812           BP = LB;
7813 
7814         IsExpressionFirstInfo = false;
7815         IsCaptureFirstInfo = false;
7816         FirstPointerInComplexData = false;
7817       }
7818     }
7819   }
7820 
7821   /// Return the adjusted map modifiers if the declaration a capture refers to
7822   /// appears in a first-private clause. This is expected to be used only with
7823   /// directives that start with 'target'.
7824   MappableExprsHandler::OpenMPOffloadMappingFlags
7825   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7826     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7827 
7828     // A first private variable captured by reference will use only the
7829     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7830     // declaration is known as first-private in this handler.
7831     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7832       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7833           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7834         return MappableExprsHandler::OMP_MAP_ALWAYS |
7835                MappableExprsHandler::OMP_MAP_TO;
7836       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7837         return MappableExprsHandler::OMP_MAP_TO |
7838                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7839       return MappableExprsHandler::OMP_MAP_PRIVATE |
7840              MappableExprsHandler::OMP_MAP_TO;
7841     }
7842     return MappableExprsHandler::OMP_MAP_TO |
7843            MappableExprsHandler::OMP_MAP_FROM;
7844   }
7845 
7846   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7847     // Rotate by getFlagMemberOffset() bits.
7848     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7849                                                   << getFlagMemberOffset());
7850   }
7851 
7852   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7853                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7854     // If the entry is PTR_AND_OBJ but has not been marked with the special
7855     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7856     // marked as MEMBER_OF.
7857     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7858         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7859       return;
7860 
7861     // Reset the placeholder value to prepare the flag for the assignment of the
7862     // proper MEMBER_OF value.
7863     Flags &= ~OMP_MAP_MEMBER_OF;
7864     Flags |= MemberOfFlag;
7865   }
7866 
7867   void getPlainLayout(const CXXRecordDecl *RD,
7868                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7869                       bool AsBase) const {
7870     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7871 
7872     llvm::StructType *St =
7873         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7874 
7875     unsigned NumElements = St->getNumElements();
7876     llvm::SmallVector<
7877         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7878         RecordLayout(NumElements);
7879 
7880     // Fill bases.
7881     for (const auto &I : RD->bases()) {
7882       if (I.isVirtual())
7883         continue;
7884       const auto *Base = I.getType()->getAsCXXRecordDecl();
7885       // Ignore empty bases.
7886       if (Base->isEmpty() || CGF.getContext()
7887                                  .getASTRecordLayout(Base)
7888                                  .getNonVirtualSize()
7889                                  .isZero())
7890         continue;
7891 
7892       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7893       RecordLayout[FieldIndex] = Base;
7894     }
7895     // Fill in virtual bases.
7896     for (const auto &I : RD->vbases()) {
7897       const auto *Base = I.getType()->getAsCXXRecordDecl();
7898       // Ignore empty bases.
7899       if (Base->isEmpty())
7900         continue;
7901       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7902       if (RecordLayout[FieldIndex])
7903         continue;
7904       RecordLayout[FieldIndex] = Base;
7905     }
7906     // Fill in all the fields.
7907     assert(!RD->isUnion() && "Unexpected union.");
7908     for (const auto *Field : RD->fields()) {
7909       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7910       // will fill in later.)
7911       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7912         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7913         RecordLayout[FieldIndex] = Field;
7914       }
7915     }
7916     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7917              &Data : RecordLayout) {
7918       if (Data.isNull())
7919         continue;
7920       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7921         getPlainLayout(Base, Layout, /*AsBase=*/true);
7922       else
7923         Layout.push_back(Data.get<const FieldDecl *>());
7924     }
7925   }
7926 
7927 public:
7928   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7929       : CurDir(&Dir), CGF(CGF) {
7930     // Extract firstprivate clause information.
7931     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7932       for (const auto *D : C->varlists())
7933         FirstPrivateDecls.try_emplace(
7934             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7935     // Extract implicit firstprivates from uses_allocators clauses.
7936     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7937       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7938         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7939         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7940           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7941                                         /*Implicit=*/true);
7942         else if (const auto *VD = dyn_cast<VarDecl>(
7943                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7944                          ->getDecl()))
7945           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7946       }
7947     }
7948     // Extract device pointer clause information.
7949     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7950       for (auto L : C->component_lists())
7951         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
7952   }
7953 
7954   /// Constructor for the declare mapper directive.
7955   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7956       : CurDir(&Dir), CGF(CGF) {}
7957 
7958   /// Generate code for the combined entry if we have a partially mapped struct
7959   /// and take care of the mapping flags of the arguments corresponding to
7960   /// individual struct members.
7961   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
7962                          MapFlagsArrayTy &CurTypes,
7963                          const StructRangeInfoTy &PartialStruct,
7964                          bool NotTargetParams = false) const {
7965     // Base is the base of the struct
7966     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
7967     // Pointer is the address of the lowest element
7968     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7969     CombinedInfo.Pointers.push_back(LB);
7970     // There should not be a mapper for a combined entry.
7971     CombinedInfo.Mappers.push_back(nullptr);
7972     // Size is (addr of {highest+1} element) - (addr of lowest element)
7973     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7974     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7975     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7976     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7977     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7978     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7979                                                   /*isSigned=*/false);
7980     CombinedInfo.Sizes.push_back(Size);
7981     // Map type is always TARGET_PARAM, if generate info for captures.
7982     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
7983                                                  : OMP_MAP_TARGET_PARAM);
7984     // If any element has the present modifier, then make sure the runtime
7985     // doesn't attempt to allocate the struct.
7986     if (CurTypes.end() !=
7987         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
7988           return Type & OMP_MAP_PRESENT;
7989         }))
7990       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
7991     // Remove TARGET_PARAM flag from the first element
7992     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7993 
7994     // All other current entries will be MEMBER_OF the combined entry
7995     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7996     // 0xFFFF in the MEMBER_OF field).
7997     OpenMPOffloadMappingFlags MemberOfFlag =
7998         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
7999     for (auto &M : CurTypes)
8000       setCorrectMemberOfFlag(M, MemberOfFlag);
8001   }
8002 
8003   /// Generate all the base pointers, section pointers, sizes, map types, and
8004   /// mappers for the extracted mappable expressions (all included in \a
8005   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8006   /// pair of the relevant declaration and index where it occurs is appended to
8007   /// the device pointers info array.
8008   void generateAllInfo(
8009       MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false,
8010       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8011           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8012     // We have to process the component lists that relate with the same
8013     // declaration in a single chunk so that we can generate the map flags
8014     // correctly. Therefore, we organize all lists in a map.
8015     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8016 
8017     // Helper function to fill the information map for the different supported
8018     // clauses.
8019     auto &&InfoGen =
8020         [&Info, &SkipVarSet](
8021             const ValueDecl *D,
8022             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8023             OpenMPMapClauseKind MapType,
8024             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8025             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8026             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8027             bool ForDeviceAddr = false) {
8028           const ValueDecl *VD =
8029               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8030           if (SkipVarSet.count(VD))
8031             return;
8032           Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
8033                                 ReturnDevicePointer, IsImplicit, Mapper,
8034                                 ForDeviceAddr);
8035         };
8036 
8037     assert(CurDir.is<const OMPExecutableDirective *>() &&
8038            "Expect a executable directive");
8039     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8040     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8041       for (const auto L : C->component_lists()) {
8042         InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
8043                 C->getMapTypeModifiers(), llvm::None,
8044                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
8045       }
8046     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8047       for (const auto L : C->component_lists()) {
8048         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
8049                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8050                 C->isImplicit(), std::get<2>(L));
8051       }
8052     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8053       for (const auto L : C->component_lists()) {
8054         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
8055                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8056                 C->isImplicit(), std::get<2>(L));
8057       }
8058 
8059     // Look at the use_device_ptr clause information and mark the existing map
8060     // entries as such. If there is no map information for an entry in the
8061     // use_device_ptr list, we create one with map type 'alloc' and zero size
8062     // section. It is the user fault if that was not mapped before. If there is
8063     // no map information and the pointer is a struct member, then we defer the
8064     // emission of that entry until the whole struct has been processed.
8065     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8066         DeferredInfo;
8067     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8068 
8069     for (const auto *C :
8070          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8071       for (const auto L : C->component_lists()) {
8072         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8073             std::get<1>(L);
8074         assert(!Components.empty() &&
8075                "Not expecting empty list of components!");
8076         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8077         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8078         const Expr *IE = Components.back().getAssociatedExpression();
8079         // If the first component is a member expression, we have to look into
8080         // 'this', which maps to null in the map of map information. Otherwise
8081         // look directly for the information.
8082         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8083 
8084         // We potentially have map information for this declaration already.
8085         // Look for the first set of components that refer to it.
8086         if (It != Info.end()) {
8087           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8088             return MI.Components.back().getAssociatedDeclaration() == VD;
8089           });
8090           // If we found a map entry, signal that the pointer has to be returned
8091           // and move on to the next declaration.
8092           // Exclude cases where the base pointer is mapped as array subscript,
8093           // array section or array shaping. The base address is passed as a
8094           // pointer to base in this case and cannot be used as a base for
8095           // use_device_ptr list item.
8096           if (CI != It->second.end()) {
8097             auto PrevCI = std::next(CI->Components.rbegin());
8098             const auto *VarD = dyn_cast<VarDecl>(VD);
8099             if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8100                 isa<MemberExpr>(IE) ||
8101                 !VD->getType().getNonReferenceType()->isPointerType() ||
8102                 PrevCI == CI->Components.rend() ||
8103                 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8104                 VarD->hasLocalStorage()) {
8105               CI->ReturnDevicePointer = true;
8106               continue;
8107             }
8108           }
8109         }
8110 
8111         // We didn't find any match in our map information - generate a zero
8112         // size array section - if the pointer is a struct member we defer this
8113         // action until the whole struct has been processed.
8114         if (isa<MemberExpr>(IE)) {
8115           // Insert the pointer into Info to be processed by
8116           // generateInfoForComponentList. Because it is a member pointer
8117           // without a pointee, no entry will be generated for it, therefore
8118           // we need to generate one after the whole struct has been processed.
8119           // Nonetheless, generateInfoForComponentList must be called to take
8120           // the pointer into account for the calculation of the range of the
8121           // partial struct.
8122           InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
8123                   /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
8124           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8125         } else {
8126           llvm::Value *Ptr =
8127               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8128           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8129           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8130           UseDevicePtrCombinedInfo.Sizes.push_back(
8131               llvm::Constant::getNullValue(CGF.Int64Ty));
8132           UseDevicePtrCombinedInfo.Types.push_back(
8133               OMP_MAP_RETURN_PARAM |
8134               (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM));
8135           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8136         }
8137       }
8138     }
8139 
8140     // Look at the use_device_addr clause information and mark the existing map
8141     // entries as such. If there is no map information for an entry in the
8142     // use_device_addr list, we create one with map type 'alloc' and zero size
8143     // section. It is the user fault if that was not mapped before. If there is
8144     // no map information and the pointer is a struct member, then we defer the
8145     // emission of that entry until the whole struct has been processed.
8146     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8147     for (const auto *C :
8148          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8149       for (const auto L : C->component_lists()) {
8150         assert(!std::get<1>(L).empty() &&
8151                "Not expecting empty list of components!");
8152         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8153         if (!Processed.insert(VD).second)
8154           continue;
8155         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8156         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8157         // If the first component is a member expression, we have to look into
8158         // 'this', which maps to null in the map of map information. Otherwise
8159         // look directly for the information.
8160         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8161 
8162         // We potentially have map information for this declaration already.
8163         // Look for the first set of components that refer to it.
8164         if (It != Info.end()) {
8165           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8166             return MI.Components.back().getAssociatedDeclaration() == VD;
8167           });
8168           // If we found a map entry, signal that the pointer has to be returned
8169           // and move on to the next declaration.
8170           if (CI != It->second.end()) {
8171             CI->ReturnDevicePointer = true;
8172             continue;
8173           }
8174         }
8175 
8176         // We didn't find any match in our map information - generate a zero
8177         // size array section - if the pointer is a struct member we defer this
8178         // action until the whole struct has been processed.
8179         if (isa<MemberExpr>(IE)) {
8180           // Insert the pointer into Info to be processed by
8181           // generateInfoForComponentList. Because it is a member pointer
8182           // without a pointee, no entry will be generated for it, therefore
8183           // we need to generate one after the whole struct has been processed.
8184           // Nonetheless, generateInfoForComponentList must be called to take
8185           // the pointer into account for the calculation of the range of the
8186           // partial struct.
8187           InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8188                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8189                   nullptr, /*ForDeviceAddr=*/true);
8190           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8191         } else {
8192           llvm::Value *Ptr;
8193           if (IE->isGLValue())
8194             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8195           else
8196             Ptr = CGF.EmitScalarExpr(IE);
8197           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8198           CombinedInfo.Pointers.push_back(Ptr);
8199           CombinedInfo.Sizes.push_back(
8200               llvm::Constant::getNullValue(CGF.Int64Ty));
8201           CombinedInfo.Types.push_back(
8202               OMP_MAP_RETURN_PARAM |
8203               (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM));
8204           CombinedInfo.Mappers.push_back(nullptr);
8205         }
8206       }
8207     }
8208 
8209     for (const auto &M : Info) {
8210       // We need to know when we generate information for the first component
8211       // associated with a capture, because the mapping flags depend on it.
8212       bool IsFirstComponentList = !NotTargetParams;
8213 
8214       // Temporary generated information.
8215       MapCombinedInfoTy CurInfo;
8216       StructRangeInfoTy PartialStruct;
8217 
8218       for (const MapInfo &L : M.second) {
8219         assert(!L.Components.empty() &&
8220                "Not expecting declaration with no component lists.");
8221 
8222         // Remember the current base pointer index.
8223         unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8224         generateInfoForComponentList(L.MapType, L.MapModifiers,
8225                                      L.MotionModifiers, L.Components, CurInfo,
8226                                      PartialStruct, IsFirstComponentList,
8227                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8228 
8229         // If this entry relates with a device pointer, set the relevant
8230         // declaration and add the 'return pointer' flag.
8231         if (L.ReturnDevicePointer) {
8232           assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8233                  "Unexpected number of mapped base pointers.");
8234 
8235           const ValueDecl *RelevantVD =
8236               L.Components.back().getAssociatedDeclaration();
8237           assert(RelevantVD &&
8238                  "No relevant declaration related with device pointer??");
8239 
8240           CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8241               RelevantVD);
8242           CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8243         }
8244         IsFirstComponentList = false;
8245       }
8246 
8247       // Append any pending zero-length pointers which are struct members and
8248       // used with use_device_ptr or use_device_addr.
8249       auto CI = DeferredInfo.find(M.first);
8250       if (CI != DeferredInfo.end()) {
8251         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8252           llvm::Value *BasePtr;
8253           llvm::Value *Ptr;
8254           if (L.ForDeviceAddr) {
8255             if (L.IE->isGLValue())
8256               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8257             else
8258               Ptr = this->CGF.EmitScalarExpr(L.IE);
8259             BasePtr = Ptr;
8260             // Entry is RETURN_PARAM. Also, set the placeholder value
8261             // MEMBER_OF=FFFF so that the entry is later updated with the
8262             // correct value of MEMBER_OF.
8263             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8264           } else {
8265             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8266             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8267                                              L.IE->getExprLoc());
8268             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8269             // value MEMBER_OF=FFFF so that the entry is later updated with the
8270             // correct value of MEMBER_OF.
8271             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8272                                     OMP_MAP_MEMBER_OF);
8273           }
8274           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8275           CurInfo.Pointers.push_back(Ptr);
8276           CurInfo.Sizes.push_back(
8277               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8278           CurInfo.Mappers.push_back(nullptr);
8279         }
8280       }
8281 
8282       // If there is an entry in PartialStruct it means we have a struct with
8283       // individual members mapped. Emit an extra combined entry.
8284       if (PartialStruct.Base.isValid())
8285         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
8286                           NotTargetParams);
8287 
8288       // We need to append the results of this capture to what we already have.
8289       CombinedInfo.append(CurInfo);
8290     }
8291     // Append data for use_device_ptr clauses.
8292     CombinedInfo.append(UseDevicePtrCombinedInfo);
8293   }
8294 
8295   /// Generate all the base pointers, section pointers, sizes, map types, and
8296   /// mappers for the extracted map clauses of user-defined mapper (all included
8297   /// in \a CombinedInfo).
8298   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8299     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8300            "Expect a declare mapper directive");
8301     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8302     // We have to process the component lists that relate with the same
8303     // declaration in a single chunk so that we can generate the map flags
8304     // correctly. Therefore, we organize all lists in a map.
8305     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8306 
8307     // Fill the information map for map clauses.
8308     for (const auto *C : CurMapperDir->clauselists()) {
8309       const auto *MC = cast<OMPMapClause>(C);
8310       for (const auto L : MC->component_lists()) {
8311         const ValueDecl *VD =
8312             std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
8313                            : nullptr;
8314         // Get the corresponding user-defined mapper.
8315         Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
8316                               MC->getMapTypeModifiers(), llvm::None,
8317                               /*ReturnDevicePointer=*/false, MC->isImplicit(),
8318                               std::get<2>(L));
8319       }
8320     }
8321 
8322     for (const auto &M : Info) {
8323       // We need to know when we generate information for the first component
8324       // associated with a capture, because the mapping flags depend on it.
8325       bool IsFirstComponentList = true;
8326 
8327       // Temporary generated information.
8328       MapCombinedInfoTy CurInfo;
8329       StructRangeInfoTy PartialStruct;
8330 
8331       for (const MapInfo &L : M.second) {
8332         assert(!L.Components.empty() &&
8333                "Not expecting declaration with no component lists.");
8334         generateInfoForComponentList(L.MapType, L.MapModifiers,
8335                                      L.MotionModifiers, L.Components, CurInfo,
8336                                      PartialStruct, IsFirstComponentList,
8337                                      L.IsImplicit, L.Mapper, L.ForDeviceAddr);
8338         IsFirstComponentList = false;
8339       }
8340 
8341       // If there is an entry in PartialStruct it means we have a struct with
8342       // individual members mapped. Emit an extra combined entry.
8343       if (PartialStruct.Base.isValid())
8344         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
8345 
8346       // We need to append the results of this capture to what we already have.
8347       CombinedInfo.append(CurInfo);
8348     }
8349   }
8350 
8351   /// Emit capture info for lambdas for variables captured by reference.
8352   void generateInfoForLambdaCaptures(
8353       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8354       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8355     const auto *RD = VD->getType()
8356                          .getCanonicalType()
8357                          .getNonReferenceType()
8358                          ->getAsCXXRecordDecl();
8359     if (!RD || !RD->isLambda())
8360       return;
8361     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8362     LValue VDLVal = CGF.MakeAddrLValue(
8363         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8364     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8365     FieldDecl *ThisCapture = nullptr;
8366     RD->getCaptureFields(Captures, ThisCapture);
8367     if (ThisCapture) {
8368       LValue ThisLVal =
8369           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8370       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8371       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8372                                  VDLVal.getPointer(CGF));
8373       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8374       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8375       CombinedInfo.Sizes.push_back(
8376           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8377                                     CGF.Int64Ty, /*isSigned=*/true));
8378       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8379                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8380       CombinedInfo.Mappers.push_back(nullptr);
8381     }
8382     for (const LambdaCapture &LC : RD->captures()) {
8383       if (!LC.capturesVariable())
8384         continue;
8385       const VarDecl *VD = LC.getCapturedVar();
8386       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8387         continue;
8388       auto It = Captures.find(VD);
8389       assert(It != Captures.end() && "Found lambda capture without field.");
8390       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8391       if (LC.getCaptureKind() == LCK_ByRef) {
8392         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8393         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8394                                    VDLVal.getPointer(CGF));
8395         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8396         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8397         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8398             CGF.getTypeSize(
8399                 VD->getType().getCanonicalType().getNonReferenceType()),
8400             CGF.Int64Ty, /*isSigned=*/true));
8401       } else {
8402         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8403         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8404                                    VDLVal.getPointer(CGF));
8405         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8406         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8407         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8408       }
8409       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8410                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8411       CombinedInfo.Mappers.push_back(nullptr);
8412     }
8413   }
8414 
8415   /// Set correct indices for lambdas captures.
8416   void adjustMemberOfForLambdaCaptures(
8417       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8418       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8419       MapFlagsArrayTy &Types) const {
8420     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8421       // Set correct member_of idx for all implicit lambda captures.
8422       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8423                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8424         continue;
8425       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8426       assert(BasePtr && "Unable to find base lambda address.");
8427       int TgtIdx = -1;
8428       for (unsigned J = I; J > 0; --J) {
8429         unsigned Idx = J - 1;
8430         if (Pointers[Idx] != BasePtr)
8431           continue;
8432         TgtIdx = Idx;
8433         break;
8434       }
8435       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8436       // All other current entries will be MEMBER_OF the combined entry
8437       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8438       // 0xFFFF in the MEMBER_OF field).
8439       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8440       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8441     }
8442   }
8443 
8444   /// Generate the base pointers, section pointers, sizes, map types, and
8445   /// mappers associated to a given capture (all included in \a CombinedInfo).
8446   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8447                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8448                               StructRangeInfoTy &PartialStruct) const {
8449     assert(!Cap->capturesVariableArrayType() &&
8450            "Not expecting to generate map info for a variable array type!");
8451 
8452     // We need to know when we generating information for the first component
8453     const ValueDecl *VD = Cap->capturesThis()
8454                               ? nullptr
8455                               : Cap->getCapturedVar()->getCanonicalDecl();
8456 
8457     // If this declaration appears in a is_device_ptr clause we just have to
8458     // pass the pointer by value. If it is a reference to a declaration, we just
8459     // pass its value.
8460     if (DevPointersMap.count(VD)) {
8461       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8462       CombinedInfo.Pointers.push_back(Arg);
8463       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8464           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8465           /*isSigned=*/true));
8466       CombinedInfo.Types.push_back(
8467           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8468           OMP_MAP_TARGET_PARAM);
8469       CombinedInfo.Mappers.push_back(nullptr);
8470       return;
8471     }
8472 
8473     using MapData =
8474         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8475                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8476                    const ValueDecl *>;
8477     SmallVector<MapData, 4> DeclComponentLists;
8478     assert(CurDir.is<const OMPExecutableDirective *>() &&
8479            "Expect a executable directive");
8480     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8481     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8482       for (const auto L : C->decl_component_lists(VD)) {
8483         const ValueDecl *VDecl, *Mapper;
8484         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8485         std::tie(VDecl, Components, Mapper) = L;
8486         assert(VDecl == VD && "We got information for the wrong declaration??");
8487         assert(!Components.empty() &&
8488                "Not expecting declaration with no component lists.");
8489         DeclComponentLists.emplace_back(Components, C->getMapType(),
8490                                         C->getMapTypeModifiers(),
8491                                         C->isImplicit(), Mapper);
8492       }
8493     }
8494 
8495     // Find overlapping elements (including the offset from the base element).
8496     llvm::SmallDenseMap<
8497         const MapData *,
8498         llvm::SmallVector<
8499             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8500         4>
8501         OverlappedData;
8502     size_t Count = 0;
8503     for (const MapData &L : DeclComponentLists) {
8504       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8505       OpenMPMapClauseKind MapType;
8506       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8507       bool IsImplicit;
8508       const ValueDecl *Mapper;
8509       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8510       ++Count;
8511       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8512         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8513         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1;
8514         auto CI = Components.rbegin();
8515         auto CE = Components.rend();
8516         auto SI = Components1.rbegin();
8517         auto SE = Components1.rend();
8518         for (; CI != CE && SI != SE; ++CI, ++SI) {
8519           if (CI->getAssociatedExpression()->getStmtClass() !=
8520               SI->getAssociatedExpression()->getStmtClass())
8521             break;
8522           // Are we dealing with different variables/fields?
8523           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8524             break;
8525         }
8526         // Found overlapping if, at least for one component, reached the head of
8527         // the components list.
8528         if (CI == CE || SI == SE) {
8529           assert((CI != CE || SI != SE) &&
8530                  "Unexpected full match of the mapping components.");
8531           const MapData &BaseData = CI == CE ? L : L1;
8532           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8533               SI == SE ? Components : Components1;
8534           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8535           OverlappedElements.getSecond().push_back(SubData);
8536         }
8537       }
8538     }
8539     // Sort the overlapped elements for each item.
8540     llvm::SmallVector<const FieldDecl *, 4> Layout;
8541     if (!OverlappedData.empty()) {
8542       if (const auto *CRD =
8543               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8544         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8545       else {
8546         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8547         Layout.append(RD->field_begin(), RD->field_end());
8548       }
8549     }
8550     for (auto &Pair : OverlappedData) {
8551       llvm::sort(
8552           Pair.getSecond(),
8553           [&Layout](
8554               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8555               OMPClauseMappableExprCommon::MappableExprComponentListRef
8556                   Second) {
8557             auto CI = First.rbegin();
8558             auto CE = First.rend();
8559             auto SI = Second.rbegin();
8560             auto SE = Second.rend();
8561             for (; CI != CE && SI != SE; ++CI, ++SI) {
8562               if (CI->getAssociatedExpression()->getStmtClass() !=
8563                   SI->getAssociatedExpression()->getStmtClass())
8564                 break;
8565               // Are we dealing with different variables/fields?
8566               if (CI->getAssociatedDeclaration() !=
8567                   SI->getAssociatedDeclaration())
8568                 break;
8569             }
8570 
8571             // Lists contain the same elements.
8572             if (CI == CE && SI == SE)
8573               return false;
8574 
8575             // List with less elements is less than list with more elements.
8576             if (CI == CE || SI == SE)
8577               return CI == CE;
8578 
8579             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8580             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8581             if (FD1->getParent() == FD2->getParent())
8582               return FD1->getFieldIndex() < FD2->getFieldIndex();
8583             const auto It =
8584                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8585                   return FD == FD1 || FD == FD2;
8586                 });
8587             return *It == FD1;
8588           });
8589     }
8590 
8591     // Associated with a capture, because the mapping flags depend on it.
8592     // Go through all of the elements with the overlapped elements.
8593     for (const auto &Pair : OverlappedData) {
8594       const MapData &L = *Pair.getFirst();
8595       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8596       OpenMPMapClauseKind MapType;
8597       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8598       bool IsImplicit;
8599       const ValueDecl *Mapper;
8600       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8601       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8602           OverlappedComponents = Pair.getSecond();
8603       bool IsFirstComponentList = true;
8604       generateInfoForComponentList(
8605           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
8606           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8607           /*ForDeviceAddr=*/false, OverlappedComponents);
8608     }
8609     // Go through other elements without overlapped elements.
8610     bool IsFirstComponentList = OverlappedData.empty();
8611     for (const MapData &L : DeclComponentLists) {
8612       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8613       OpenMPMapClauseKind MapType;
8614       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8615       bool IsImplicit;
8616       const ValueDecl *Mapper;
8617       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
8618       auto It = OverlappedData.find(&L);
8619       if (It == OverlappedData.end())
8620         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
8621                                      Components, CombinedInfo, PartialStruct,
8622                                      IsFirstComponentList, IsImplicit, Mapper);
8623       IsFirstComponentList = false;
8624     }
8625   }
8626 
8627   /// Generate the default map information for a given capture \a CI,
8628   /// record field declaration \a RI and captured value \a CV.
8629   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8630                               const FieldDecl &RI, llvm::Value *CV,
8631                               MapCombinedInfoTy &CombinedInfo) const {
8632     bool IsImplicit = true;
8633     // Do the default mapping.
8634     if (CI.capturesThis()) {
8635       CombinedInfo.BasePointers.push_back(CV);
8636       CombinedInfo.Pointers.push_back(CV);
8637       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8638       CombinedInfo.Sizes.push_back(
8639           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8640                                     CGF.Int64Ty, /*isSigned=*/true));
8641       // Default map type.
8642       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8643     } else if (CI.capturesVariableByCopy()) {
8644       CombinedInfo.BasePointers.push_back(CV);
8645       CombinedInfo.Pointers.push_back(CV);
8646       if (!RI.getType()->isAnyPointerType()) {
8647         // We have to signal to the runtime captures passed by value that are
8648         // not pointers.
8649         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
8650         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8651             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8652       } else {
8653         // Pointers are implicitly mapped with a zero size and no flags
8654         // (other than first map that is added for all implicit maps).
8655         CombinedInfo.Types.push_back(OMP_MAP_NONE);
8656         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8657       }
8658       const VarDecl *VD = CI.getCapturedVar();
8659       auto I = FirstPrivateDecls.find(VD);
8660       if (I != FirstPrivateDecls.end())
8661         IsImplicit = I->getSecond();
8662     } else {
8663       assert(CI.capturesVariable() && "Expected captured reference.");
8664       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8665       QualType ElementType = PtrTy->getPointeeType();
8666       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8667           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8668       // The default map type for a scalar/complex type is 'to' because by
8669       // default the value doesn't have to be retrieved. For an aggregate
8670       // type, the default is 'tofrom'.
8671       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8672       const VarDecl *VD = CI.getCapturedVar();
8673       auto I = FirstPrivateDecls.find(VD);
8674       if (I != FirstPrivateDecls.end() &&
8675           VD->getType().isConstant(CGF.getContext())) {
8676         llvm::Constant *Addr =
8677             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8678         // Copy the value of the original variable to the new global copy.
8679         CGF.Builder.CreateMemCpy(
8680             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8681             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8682             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
8683         // Use new global variable as the base pointers.
8684         CombinedInfo.BasePointers.push_back(Addr);
8685         CombinedInfo.Pointers.push_back(Addr);
8686       } else {
8687         CombinedInfo.BasePointers.push_back(CV);
8688         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8689           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8690               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8691               AlignmentSource::Decl));
8692           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8693         } else {
8694           CombinedInfo.Pointers.push_back(CV);
8695         }
8696       }
8697       if (I != FirstPrivateDecls.end())
8698         IsImplicit = I->getSecond();
8699     }
8700     // Every default map produces a single argument which is a target parameter.
8701     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
8702 
8703     // Add flag stating this is an implicit map.
8704     if (IsImplicit)
8705       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
8706 
8707     // No user-defined mapper for default mapping.
8708     CombinedInfo.Mappers.push_back(nullptr);
8709   }
8710 };
8711 } // anonymous namespace
8712 
8713 /// Emit the arrays used to pass the captures and map information to the
8714 /// offloading runtime library. If there is no map or capture information,
8715 /// return nullptr by reference.
8716 static void
8717 emitOffloadingArrays(CodeGenFunction &CGF,
8718                      MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8719                      CGOpenMPRuntime::TargetDataInfo &Info) {
8720   CodeGenModule &CGM = CGF.CGM;
8721   ASTContext &Ctx = CGF.getContext();
8722 
8723   // Reset the array information.
8724   Info.clearArrayInfo();
8725   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8726 
8727   if (Info.NumberOfPtrs) {
8728     // Detect if we have any capture size requiring runtime evaluation of the
8729     // size so that a constant array could be eventually used.
8730     bool hasRuntimeEvaluationCaptureSize = false;
8731     for (llvm::Value *S : CombinedInfo.Sizes)
8732       if (!isa<llvm::Constant>(S)) {
8733         hasRuntimeEvaluationCaptureSize = true;
8734         break;
8735       }
8736 
8737     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8738     QualType PointerArrayType = Ctx.getConstantArrayType(
8739         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8740         /*IndexTypeQuals=*/0);
8741 
8742     Info.BasePointersArray =
8743         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8744     Info.PointersArray =
8745         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8746     Address MappersArray =
8747         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
8748     Info.MappersArray = MappersArray.getPointer();
8749 
8750     // If we don't have any VLA types or other types that require runtime
8751     // evaluation, we can use a constant array for the map sizes, otherwise we
8752     // need to fill up the arrays as we do for the pointers.
8753     QualType Int64Ty =
8754         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8755     if (hasRuntimeEvaluationCaptureSize) {
8756       QualType SizeArrayType = Ctx.getConstantArrayType(
8757           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8758           /*IndexTypeQuals=*/0);
8759       Info.SizesArray =
8760           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8761     } else {
8762       // We expect all the sizes to be constant, so we collect them to create
8763       // a constant array.
8764       SmallVector<llvm::Constant *, 16> ConstSizes;
8765       for (llvm::Value *S : CombinedInfo.Sizes)
8766         ConstSizes.push_back(cast<llvm::Constant>(S));
8767 
8768       auto *SizesArrayInit = llvm::ConstantArray::get(
8769           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8770       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8771       auto *SizesArrayGbl = new llvm::GlobalVariable(
8772           CGM.getModule(), SizesArrayInit->getType(),
8773           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8774           SizesArrayInit, Name);
8775       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8776       Info.SizesArray = SizesArrayGbl;
8777     }
8778 
8779     // The map types are always constant so we don't need to generate code to
8780     // fill arrays. Instead, we create an array constant.
8781     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
8782     llvm::copy(CombinedInfo.Types, Mapping.begin());
8783     llvm::Constant *MapTypesArrayInit =
8784         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8785     std::string MaptypesName =
8786         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8787     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8788         CGM.getModule(), MapTypesArrayInit->getType(),
8789         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8790         MapTypesArrayInit, MaptypesName);
8791     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8792     Info.MapTypesArray = MapTypesArrayGbl;
8793 
8794     // If there's a present map type modifier, it must not be applied to the end
8795     // of a region, so generate a separate map type array in that case.
8796     if (Info.separateBeginEndCalls()) {
8797       bool EndMapTypesDiffer = false;
8798       for (uint64_t &Type : Mapping) {
8799         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
8800           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
8801           EndMapTypesDiffer = true;
8802         }
8803       }
8804       if (EndMapTypesDiffer) {
8805         MapTypesArrayInit =
8806             llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8807         MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8808         MapTypesArrayGbl = new llvm::GlobalVariable(
8809             CGM.getModule(), MapTypesArrayInit->getType(),
8810             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8811             MapTypesArrayInit, MaptypesName);
8812         MapTypesArrayGbl->setUnnamedAddr(
8813             llvm::GlobalValue::UnnamedAddr::Global);
8814         Info.MapTypesArrayEnd = MapTypesArrayGbl;
8815       }
8816     }
8817 
8818     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8819       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
8820       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8821           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8822           Info.BasePointersArray, 0, I);
8823       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8824           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8825       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8826       CGF.Builder.CreateStore(BPVal, BPAddr);
8827 
8828       if (Info.requiresDevicePointerInfo())
8829         if (const ValueDecl *DevVD =
8830                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
8831           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8832 
8833       llvm::Value *PVal = CombinedInfo.Pointers[I];
8834       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8835           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8836           Info.PointersArray, 0, I);
8837       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8838           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8839       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8840       CGF.Builder.CreateStore(PVal, PAddr);
8841 
8842       if (hasRuntimeEvaluationCaptureSize) {
8843         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8844             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8845             Info.SizesArray,
8846             /*Idx0=*/0,
8847             /*Idx1=*/I);
8848         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8849         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
8850                                                           CGM.Int64Ty,
8851                                                           /*isSigned=*/true),
8852                                 SAddr);
8853       }
8854 
8855       // Fill up the mapper array.
8856       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
8857       if (CombinedInfo.Mappers[I]) {
8858         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8859             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8860         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
8861         Info.HasMapper = true;
8862       }
8863       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
8864       CGF.Builder.CreateStore(MFunc, MAddr);
8865     }
8866   }
8867 }
8868 
8869 namespace {
8870 /// Additional arguments for emitOffloadingArraysArgument function.
8871 struct ArgumentsOptions {
8872   bool ForEndCall = false;
8873   bool IsTask = false;
8874   ArgumentsOptions() = default;
8875   ArgumentsOptions(bool ForEndCall, bool IsTask)
8876       : ForEndCall(ForEndCall), IsTask(IsTask) {}
8877 };
8878 } // namespace
8879 
8880 /// Emit the arguments to be passed to the runtime library based on the
8881 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
8882 /// ForEndCall, emit map types to be passed for the end of the region instead of
8883 /// the beginning.
8884 static void emitOffloadingArraysArgument(
8885     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8886     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8887     llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
8888     CGOpenMPRuntime::TargetDataInfo &Info,
8889     const ArgumentsOptions &Options = ArgumentsOptions()) {
8890   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
8891          "expected region end call to runtime only when end call is separate");
8892   CodeGenModule &CGM = CGF.CGM;
8893   if (Info.NumberOfPtrs) {
8894     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8895         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8896         Info.BasePointersArray,
8897         /*Idx0=*/0, /*Idx1=*/0);
8898     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8899         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8900         Info.PointersArray,
8901         /*Idx0=*/0,
8902         /*Idx1=*/0);
8903     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8904         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8905         /*Idx0=*/0, /*Idx1=*/0);
8906     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8907         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8908         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
8909                                                     : Info.MapTypesArray,
8910         /*Idx0=*/0,
8911         /*Idx1=*/0);
8912     // Always emit the mapper array address in case of a target task for
8913     // privatization.
8914     if (!Options.IsTask && !Info.HasMapper)
8915       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8916     else
8917       MappersArrayArg =
8918           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
8919   } else {
8920     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8921     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8922     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8923     MapTypesArrayArg =
8924         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8925     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8926   }
8927 }
8928 
8929 /// Check for inner distribute directive.
8930 static const OMPExecutableDirective *
8931 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8932   const auto *CS = D.getInnermostCapturedStmt();
8933   const auto *Body =
8934       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8935   const Stmt *ChildStmt =
8936       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8937 
8938   if (const auto *NestedDir =
8939           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8940     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8941     switch (D.getDirectiveKind()) {
8942     case OMPD_target:
8943       if (isOpenMPDistributeDirective(DKind))
8944         return NestedDir;
8945       if (DKind == OMPD_teams) {
8946         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8947             /*IgnoreCaptured=*/true);
8948         if (!Body)
8949           return nullptr;
8950         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8951         if (const auto *NND =
8952                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8953           DKind = NND->getDirectiveKind();
8954           if (isOpenMPDistributeDirective(DKind))
8955             return NND;
8956         }
8957       }
8958       return nullptr;
8959     case OMPD_target_teams:
8960       if (isOpenMPDistributeDirective(DKind))
8961         return NestedDir;
8962       return nullptr;
8963     case OMPD_target_parallel:
8964     case OMPD_target_simd:
8965     case OMPD_target_parallel_for:
8966     case OMPD_target_parallel_for_simd:
8967       return nullptr;
8968     case OMPD_target_teams_distribute:
8969     case OMPD_target_teams_distribute_simd:
8970     case OMPD_target_teams_distribute_parallel_for:
8971     case OMPD_target_teams_distribute_parallel_for_simd:
8972     case OMPD_parallel:
8973     case OMPD_for:
8974     case OMPD_parallel_for:
8975     case OMPD_parallel_master:
8976     case OMPD_parallel_sections:
8977     case OMPD_for_simd:
8978     case OMPD_parallel_for_simd:
8979     case OMPD_cancel:
8980     case OMPD_cancellation_point:
8981     case OMPD_ordered:
8982     case OMPD_threadprivate:
8983     case OMPD_allocate:
8984     case OMPD_task:
8985     case OMPD_simd:
8986     case OMPD_sections:
8987     case OMPD_section:
8988     case OMPD_single:
8989     case OMPD_master:
8990     case OMPD_critical:
8991     case OMPD_taskyield:
8992     case OMPD_barrier:
8993     case OMPD_taskwait:
8994     case OMPD_taskgroup:
8995     case OMPD_atomic:
8996     case OMPD_flush:
8997     case OMPD_depobj:
8998     case OMPD_scan:
8999     case OMPD_teams:
9000     case OMPD_target_data:
9001     case OMPD_target_exit_data:
9002     case OMPD_target_enter_data:
9003     case OMPD_distribute:
9004     case OMPD_distribute_simd:
9005     case OMPD_distribute_parallel_for:
9006     case OMPD_distribute_parallel_for_simd:
9007     case OMPD_teams_distribute:
9008     case OMPD_teams_distribute_simd:
9009     case OMPD_teams_distribute_parallel_for:
9010     case OMPD_teams_distribute_parallel_for_simd:
9011     case OMPD_target_update:
9012     case OMPD_declare_simd:
9013     case OMPD_declare_variant:
9014     case OMPD_begin_declare_variant:
9015     case OMPD_end_declare_variant:
9016     case OMPD_declare_target:
9017     case OMPD_end_declare_target:
9018     case OMPD_declare_reduction:
9019     case OMPD_declare_mapper:
9020     case OMPD_taskloop:
9021     case OMPD_taskloop_simd:
9022     case OMPD_master_taskloop:
9023     case OMPD_master_taskloop_simd:
9024     case OMPD_parallel_master_taskloop:
9025     case OMPD_parallel_master_taskloop_simd:
9026     case OMPD_requires:
9027     case OMPD_unknown:
9028     default:
9029       llvm_unreachable("Unexpected directive.");
9030     }
9031   }
9032 
9033   return nullptr;
9034 }
9035 
9036 /// Emit the user-defined mapper function. The code generation follows the
9037 /// pattern in the example below.
9038 /// \code
9039 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9040 ///                                           void *base, void *begin,
9041 ///                                           int64_t size, int64_t type) {
9042 ///   // Allocate space for an array section first.
9043 ///   if (size > 1 && !maptype.IsDelete)
9044 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9045 ///                                 size*sizeof(Ty), clearToFrom(type));
9046 ///   // Map members.
9047 ///   for (unsigned i = 0; i < size; i++) {
9048 ///     // For each component specified by this mapper:
9049 ///     for (auto c : all_components) {
9050 ///       if (c.hasMapper())
9051 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9052 ///                       c.arg_type);
9053 ///       else
9054 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9055 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9056 ///     }
9057 ///   }
9058 ///   // Delete the array section.
9059 ///   if (size > 1 && maptype.IsDelete)
9060 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9061 ///                                 size*sizeof(Ty), clearToFrom(type));
9062 /// }
9063 /// \endcode
9064 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9065                                             CodeGenFunction *CGF) {
9066   if (UDMMap.count(D) > 0)
9067     return;
9068   ASTContext &C = CGM.getContext();
9069   QualType Ty = D->getType();
9070   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9071   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9072   auto *MapperVarDecl =
9073       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9074   SourceLocation Loc = D->getLocation();
9075   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9076 
9077   // Prepare mapper function arguments and attributes.
9078   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9079                               C.VoidPtrTy, ImplicitParamDecl::Other);
9080   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9081                             ImplicitParamDecl::Other);
9082   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9083                              C.VoidPtrTy, ImplicitParamDecl::Other);
9084   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9085                             ImplicitParamDecl::Other);
9086   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9087                             ImplicitParamDecl::Other);
9088   FunctionArgList Args;
9089   Args.push_back(&HandleArg);
9090   Args.push_back(&BaseArg);
9091   Args.push_back(&BeginArg);
9092   Args.push_back(&SizeArg);
9093   Args.push_back(&TypeArg);
9094   const CGFunctionInfo &FnInfo =
9095       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9096   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9097   SmallString<64> TyStr;
9098   llvm::raw_svector_ostream Out(TyStr);
9099   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9100   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9101   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9102                                     Name, &CGM.getModule());
9103   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9104   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9105   // Start the mapper function code generation.
9106   CodeGenFunction MapperCGF(CGM);
9107   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9108   // Compute the starting and end addreses of array elements.
9109   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9110       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9111       C.getPointerType(Int64Ty), Loc);
9112   // Convert the size in bytes into the number of array elements.
9113   Size = MapperCGF.Builder.CreateExactUDiv(
9114       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9115   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9116       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9117       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9118   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9119   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9120       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9121       C.getPointerType(Int64Ty), Loc);
9122   // Prepare common arguments for array initiation and deletion.
9123   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9124       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9125       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9126   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9127       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9128       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9129   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9130       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9131       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9132 
9133   // Emit array initiation if this is an array section and \p MapType indicates
9134   // that memory allocation is required.
9135   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9136   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9137                              ElementSize, HeadBB, /*IsInit=*/true);
9138 
9139   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9140 
9141   // Emit the loop header block.
9142   MapperCGF.EmitBlock(HeadBB);
9143   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9144   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9145   // Evaluate whether the initial condition is satisfied.
9146   llvm::Value *IsEmpty =
9147       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9148   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9149   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9150 
9151   // Emit the loop body block.
9152   MapperCGF.EmitBlock(BodyBB);
9153   llvm::BasicBlock *LastBB = BodyBB;
9154   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9155       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9156   PtrPHI->addIncoming(PtrBegin, EntryBB);
9157   Address PtrCurrent =
9158       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9159                           .getAlignment()
9160                           .alignmentOfArrayElement(ElementSize));
9161   // Privatize the declared variable of mapper to be the current array element.
9162   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9163   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9164     return MapperCGF
9165         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9166         .getAddress(MapperCGF);
9167   });
9168   (void)Scope.Privatize();
9169 
9170   // Get map clause information. Fill up the arrays with all mapped variables.
9171   MappableExprsHandler::MapCombinedInfoTy Info;
9172   MappableExprsHandler MEHandler(*D, MapperCGF);
9173   MEHandler.generateAllInfoForMapper(Info);
9174 
9175   // Call the runtime API __tgt_mapper_num_components to get the number of
9176   // pre-existing components.
9177   llvm::Value *OffloadingArgs[] = {Handle};
9178   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9179       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9180                                             OMPRTL___tgt_mapper_num_components),
9181       OffloadingArgs);
9182   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9183       PreviousSize,
9184       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9185 
9186   // Fill up the runtime mapper handle for all components.
9187   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9188     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9189         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9190     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9191         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9192     llvm::Value *CurSizeArg = Info.Sizes[I];
9193 
9194     // Extract the MEMBER_OF field from the map type.
9195     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9196     MapperCGF.EmitBlock(MemberBB);
9197     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9198     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9199         OriMapType,
9200         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9201     llvm::BasicBlock *MemberCombineBB =
9202         MapperCGF.createBasicBlock("omp.member.combine");
9203     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9204     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9205     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9206     // Add the number of pre-existing components to the MEMBER_OF field if it
9207     // is valid.
9208     MapperCGF.EmitBlock(MemberCombineBB);
9209     llvm::Value *CombinedMember =
9210         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9211     // Do nothing if it is not a member of previous components.
9212     MapperCGF.EmitBlock(TypeBB);
9213     llvm::PHINode *MemberMapType =
9214         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9215     MemberMapType->addIncoming(OriMapType, MemberBB);
9216     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9217 
9218     // Combine the map type inherited from user-defined mapper with that
9219     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9220     // bits of the \a MapType, which is the input argument of the mapper
9221     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9222     // bits of MemberMapType.
9223     // [OpenMP 5.0], 1.2.6. map-type decay.
9224     //        | alloc |  to   | from  | tofrom | release | delete
9225     // ----------------------------------------------------------
9226     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9227     // to     | alloc |  to   | alloc |   to   | release | delete
9228     // from   | alloc | alloc | from  |  from  | release | delete
9229     // tofrom | alloc |  to   | from  | tofrom | release | delete
9230     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9231         MapType,
9232         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9233                                    MappableExprsHandler::OMP_MAP_FROM));
9234     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9235     llvm::BasicBlock *AllocElseBB =
9236         MapperCGF.createBasicBlock("omp.type.alloc.else");
9237     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9238     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9239     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9240     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9241     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9242     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9243     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9244     MapperCGF.EmitBlock(AllocBB);
9245     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9246         MemberMapType,
9247         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9248                                      MappableExprsHandler::OMP_MAP_FROM)));
9249     MapperCGF.Builder.CreateBr(EndBB);
9250     MapperCGF.EmitBlock(AllocElseBB);
9251     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9252         LeftToFrom,
9253         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9254     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9255     // In case of to, clear OMP_MAP_FROM.
9256     MapperCGF.EmitBlock(ToBB);
9257     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9258         MemberMapType,
9259         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9260     MapperCGF.Builder.CreateBr(EndBB);
9261     MapperCGF.EmitBlock(ToElseBB);
9262     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9263         LeftToFrom,
9264         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9265     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9266     // In case of from, clear OMP_MAP_TO.
9267     MapperCGF.EmitBlock(FromBB);
9268     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9269         MemberMapType,
9270         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9271     // In case of tofrom, do nothing.
9272     MapperCGF.EmitBlock(EndBB);
9273     LastBB = EndBB;
9274     llvm::PHINode *CurMapType =
9275         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9276     CurMapType->addIncoming(AllocMapType, AllocBB);
9277     CurMapType->addIncoming(ToMapType, ToBB);
9278     CurMapType->addIncoming(FromMapType, FromBB);
9279     CurMapType->addIncoming(MemberMapType, ToElseBB);
9280 
9281     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9282                                      CurSizeArg, CurMapType};
9283     if (Info.Mappers[I]) {
9284       // Call the corresponding mapper function.
9285       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9286           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9287       assert(MapperFunc && "Expect a valid mapper function is available.");
9288       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9289     } else {
9290       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9291       // data structure.
9292       MapperCGF.EmitRuntimeCall(
9293           OMPBuilder.getOrCreateRuntimeFunction(
9294               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9295           OffloadingArgs);
9296     }
9297   }
9298 
9299   // Update the pointer to point to the next element that needs to be mapped,
9300   // and check whether we have mapped all elements.
9301   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9302       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9303   PtrPHI->addIncoming(PtrNext, LastBB);
9304   llvm::Value *IsDone =
9305       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9306   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9307   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9308 
9309   MapperCGF.EmitBlock(ExitBB);
9310   // Emit array deletion if this is an array section and \p MapType indicates
9311   // that deletion is required.
9312   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9313                              ElementSize, DoneBB, /*IsInit=*/false);
9314 
9315   // Emit the function exit block.
9316   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9317   MapperCGF.FinishFunction();
9318   UDMMap.try_emplace(D, Fn);
9319   if (CGF) {
9320     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9321     Decls.second.push_back(D);
9322   }
9323 }
9324 
9325 /// Emit the array initialization or deletion portion for user-defined mapper
9326 /// code generation. First, it evaluates whether an array section is mapped and
9327 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9328 /// true, and \a MapType indicates to not delete this array, array
9329 /// initialization code is generated. If \a IsInit is false, and \a MapType
9330 /// indicates to not this array, array deletion code is generated.
9331 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9332     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9333     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9334     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9335   StringRef Prefix = IsInit ? ".init" : ".del";
9336 
9337   // Evaluate if this is an array section.
9338   llvm::BasicBlock *IsDeleteBB =
9339       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9340   llvm::BasicBlock *BodyBB =
9341       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9342   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9343       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9344   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9345 
9346   // Evaluate if we are going to delete this section.
9347   MapperCGF.EmitBlock(IsDeleteBB);
9348   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9349       MapType,
9350       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9351   llvm::Value *DeleteCond;
9352   if (IsInit) {
9353     DeleteCond = MapperCGF.Builder.CreateIsNull(
9354         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9355   } else {
9356     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9357         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9358   }
9359   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9360 
9361   MapperCGF.EmitBlock(BodyBB);
9362   // Get the array size by multiplying element size and element number (i.e., \p
9363   // Size).
9364   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9365       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9366   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9367   // memory allocation/deletion purpose only.
9368   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9369       MapType,
9370       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9371                                    MappableExprsHandler::OMP_MAP_FROM)));
9372   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9373   // data structure.
9374   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9375   MapperCGF.EmitRuntimeCall(
9376       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9377                                             OMPRTL___tgt_push_mapper_component),
9378       OffloadingArgs);
9379 }
9380 
9381 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9382     const OMPDeclareMapperDecl *D) {
9383   auto I = UDMMap.find(D);
9384   if (I != UDMMap.end())
9385     return I->second;
9386   emitUserDefinedMapper(D);
9387   return UDMMap.lookup(D);
9388 }
9389 
9390 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9391     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9392     llvm::Value *DeviceID,
9393     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9394                                      const OMPLoopDirective &D)>
9395         SizeEmitter) {
9396   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9397   const OMPExecutableDirective *TD = &D;
9398   // Get nested teams distribute kind directive, if any.
9399   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9400     TD = getNestedDistributeDirective(CGM.getContext(), D);
9401   if (!TD)
9402     return;
9403   const auto *LD = cast<OMPLoopDirective>(TD);
9404   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9405                                                      PrePostActionTy &) {
9406     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9407       llvm::Value *Args[] = {DeviceID, NumIterations};
9408       CGF.EmitRuntimeCall(
9409           OMPBuilder.getOrCreateRuntimeFunction(
9410               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9411           Args);
9412     }
9413   };
9414   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9415 }
9416 
9417 void CGOpenMPRuntime::emitTargetCall(
9418     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9419     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9420     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9421     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9422                                      const OMPLoopDirective &D)>
9423         SizeEmitter) {
9424   if (!CGF.HaveInsertPoint())
9425     return;
9426 
9427   assert(OutlinedFn && "Invalid outlined function!");
9428 
9429   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9430                                  D.hasClausesOfKind<OMPNowaitClause>();
9431   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9432   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9433   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9434                                             PrePostActionTy &) {
9435     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9436   };
9437   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9438 
9439   CodeGenFunction::OMPTargetDataInfo InputInfo;
9440   llvm::Value *MapTypesArray = nullptr;
9441   // Fill up the pointer arrays and transfer execution to the device.
9442   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9443                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9444                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9445     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9446       // Reverse offloading is not supported, so just execute on the host.
9447       if (RequiresOuterTask) {
9448         CapturedVars.clear();
9449         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9450       }
9451       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9452       return;
9453     }
9454 
9455     // On top of the arrays that were filled up, the target offloading call
9456     // takes as arguments the device id as well as the host pointer. The host
9457     // pointer is used by the runtime library to identify the current target
9458     // region, so it only has to be unique and not necessarily point to
9459     // anything. It could be the pointer to the outlined function that
9460     // implements the target region, but we aren't using that so that the
9461     // compiler doesn't need to keep that, and could therefore inline the host
9462     // function if proven worthwhile during optimization.
9463 
9464     // From this point on, we need to have an ID of the target region defined.
9465     assert(OutlinedFnID && "Invalid outlined function ID!");
9466 
9467     // Emit device ID if any.
9468     llvm::Value *DeviceID;
9469     if (Device.getPointer()) {
9470       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9471               Device.getInt() == OMPC_DEVICE_device_num) &&
9472              "Expected device_num modifier.");
9473       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9474       DeviceID =
9475           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9476     } else {
9477       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9478     }
9479 
9480     // Emit the number of elements in the offloading arrays.
9481     llvm::Value *PointerNum =
9482         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9483 
9484     // Return value of the runtime offloading call.
9485     llvm::Value *Return;
9486 
9487     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9488     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9489 
9490     // Emit tripcount for the target loop-based directive.
9491     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9492 
9493     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9494     // The target region is an outlined function launched by the runtime
9495     // via calls __tgt_target() or __tgt_target_teams().
9496     //
9497     // __tgt_target() launches a target region with one team and one thread,
9498     // executing a serial region.  This master thread may in turn launch
9499     // more threads within its team upon encountering a parallel region,
9500     // however, no additional teams can be launched on the device.
9501     //
9502     // __tgt_target_teams() launches a target region with one or more teams,
9503     // each with one or more threads.  This call is required for target
9504     // constructs such as:
9505     //  'target teams'
9506     //  'target' / 'teams'
9507     //  'target teams distribute parallel for'
9508     //  'target parallel'
9509     // and so on.
9510     //
9511     // Note that on the host and CPU targets, the runtime implementation of
9512     // these calls simply call the outlined function without forking threads.
9513     // The outlined functions themselves have runtime calls to
9514     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9515     // the compiler in emitTeamsCall() and emitParallelCall().
9516     //
9517     // In contrast, on the NVPTX target, the implementation of
9518     // __tgt_target_teams() launches a GPU kernel with the requested number
9519     // of teams and threads so no additional calls to the runtime are required.
9520     if (NumTeams) {
9521       // If we have NumTeams defined this means that we have an enclosed teams
9522       // region. Therefore we also expect to have NumThreads defined. These two
9523       // values should be defined in the presence of a teams directive,
9524       // regardless of having any clauses associated. If the user is using teams
9525       // but no clauses, these two values will be the default that should be
9526       // passed to the runtime library - a 32-bit integer with the value zero.
9527       assert(NumThreads && "Thread limit expression should be available along "
9528                            "with number of teams.");
9529       llvm::Value *OffloadingArgs[] = {DeviceID,
9530                                        OutlinedFnID,
9531                                        PointerNum,
9532                                        InputInfo.BasePointersArray.getPointer(),
9533                                        InputInfo.PointersArray.getPointer(),
9534                                        InputInfo.SizesArray.getPointer(),
9535                                        MapTypesArray,
9536                                        InputInfo.MappersArray.getPointer(),
9537                                        NumTeams,
9538                                        NumThreads};
9539       Return = CGF.EmitRuntimeCall(
9540           OMPBuilder.getOrCreateRuntimeFunction(
9541               CGM.getModule(), HasNowait
9542                                    ? OMPRTL___tgt_target_teams_nowait_mapper
9543                                    : OMPRTL___tgt_target_teams_mapper),
9544           OffloadingArgs);
9545     } else {
9546       llvm::Value *OffloadingArgs[] = {DeviceID,
9547                                        OutlinedFnID,
9548                                        PointerNum,
9549                                        InputInfo.BasePointersArray.getPointer(),
9550                                        InputInfo.PointersArray.getPointer(),
9551                                        InputInfo.SizesArray.getPointer(),
9552                                        MapTypesArray,
9553                                        InputInfo.MappersArray.getPointer()};
9554       Return = CGF.EmitRuntimeCall(
9555           OMPBuilder.getOrCreateRuntimeFunction(
9556               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
9557                                          : OMPRTL___tgt_target_mapper),
9558           OffloadingArgs);
9559     }
9560 
9561     // Check the error code and execute the host version if required.
9562     llvm::BasicBlock *OffloadFailedBlock =
9563         CGF.createBasicBlock("omp_offload.failed");
9564     llvm::BasicBlock *OffloadContBlock =
9565         CGF.createBasicBlock("omp_offload.cont");
9566     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9567     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9568 
9569     CGF.EmitBlock(OffloadFailedBlock);
9570     if (RequiresOuterTask) {
9571       CapturedVars.clear();
9572       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9573     }
9574     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9575     CGF.EmitBranch(OffloadContBlock);
9576 
9577     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9578   };
9579 
9580   // Notify that the host version must be executed.
9581   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9582                     RequiresOuterTask](CodeGenFunction &CGF,
9583                                        PrePostActionTy &) {
9584     if (RequiresOuterTask) {
9585       CapturedVars.clear();
9586       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9587     }
9588     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9589   };
9590 
9591   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9592                           &CapturedVars, RequiresOuterTask,
9593                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9594     // Fill up the arrays with all the captured variables.
9595     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9596 
9597     // Get mappable expression information.
9598     MappableExprsHandler MEHandler(D, CGF);
9599     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9600     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9601 
9602     auto RI = CS.getCapturedRecordDecl()->field_begin();
9603     auto CV = CapturedVars.begin();
9604     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9605                                               CE = CS.capture_end();
9606          CI != CE; ++CI, ++RI, ++CV) {
9607       MappableExprsHandler::MapCombinedInfoTy CurInfo;
9608       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9609 
9610       // VLA sizes are passed to the outlined region by copy and do not have map
9611       // information associated.
9612       if (CI->capturesVariableArrayType()) {
9613         CurInfo.BasePointers.push_back(*CV);
9614         CurInfo.Pointers.push_back(*CV);
9615         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9616             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9617         // Copy to the device as an argument. No need to retrieve it.
9618         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9619                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9620                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
9621         CurInfo.Mappers.push_back(nullptr);
9622       } else {
9623         // If we have any information in the map clause, we use it, otherwise we
9624         // just do a default mapping.
9625         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9626         if (!CI->capturesThis())
9627           MappedVarSet.insert(CI->getCapturedVar());
9628         else
9629           MappedVarSet.insert(nullptr);
9630         if (CurInfo.BasePointers.empty())
9631           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9632         // Generate correct mapping for variables captured by reference in
9633         // lambdas.
9634         if (CI->capturesVariable())
9635           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9636                                                   CurInfo, LambdaPointers);
9637       }
9638       // We expect to have at least an element of information for this capture.
9639       assert(!CurInfo.BasePointers.empty() &&
9640              "Non-existing map pointer for capture!");
9641       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9642              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9643              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9644              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9645              "Inconsistent map information sizes!");
9646 
9647       // If there is an entry in PartialStruct it means we have a struct with
9648       // individual members mapped. Emit an extra combined entry.
9649       if (PartialStruct.Base.isValid())
9650         MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
9651 
9652       // We need to append the results of this capture to what we already have.
9653       CombinedInfo.append(CurInfo);
9654     }
9655     // Adjust MEMBER_OF flags for the lambdas captures.
9656     MEHandler.adjustMemberOfForLambdaCaptures(
9657         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
9658         CombinedInfo.Types);
9659     // Map any list items in a map clause that were not captures because they
9660     // weren't referenced within the construct.
9661     MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true,
9662                               MappedVarSet);
9663 
9664     TargetDataInfo Info;
9665     // Fill up the arrays and create the arguments.
9666     emitOffloadingArrays(CGF, CombinedInfo, Info);
9667     bool HasDependClauses = D.hasClausesOfKind<OMPDependClause>();
9668     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9669                                  Info.PointersArray, Info.SizesArray,
9670                                  Info.MapTypesArray, Info.MappersArray, Info,
9671                                  {/*ForEndTask=*/false, HasDependClauses});
9672     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9673     InputInfo.BasePointersArray =
9674         Address(Info.BasePointersArray, CGM.getPointerAlign());
9675     InputInfo.PointersArray =
9676         Address(Info.PointersArray, CGM.getPointerAlign());
9677     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9678     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
9679     MapTypesArray = Info.MapTypesArray;
9680     if (RequiresOuterTask)
9681       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9682     else
9683       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9684   };
9685 
9686   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9687                              CodeGenFunction &CGF, PrePostActionTy &) {
9688     if (RequiresOuterTask) {
9689       CodeGenFunction::OMPTargetDataInfo InputInfo;
9690       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9691     } else {
9692       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9693     }
9694   };
9695 
9696   // If we have a target function ID it means that we need to support
9697   // offloading, otherwise, just execute on the host. We need to execute on host
9698   // regardless of the conditional in the if clause if, e.g., the user do not
9699   // specify target triples.
9700   if (OutlinedFnID) {
9701     if (IfCond) {
9702       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9703     } else {
9704       RegionCodeGenTy ThenRCG(TargetThenGen);
9705       ThenRCG(CGF);
9706     }
9707   } else {
9708     RegionCodeGenTy ElseRCG(TargetElseGen);
9709     ElseRCG(CGF);
9710   }
9711 }
9712 
9713 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9714                                                     StringRef ParentName) {
9715   if (!S)
9716     return;
9717 
9718   // Codegen OMP target directives that offload compute to the device.
9719   bool RequiresDeviceCodegen =
9720       isa<OMPExecutableDirective>(S) &&
9721       isOpenMPTargetExecutionDirective(
9722           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9723 
9724   if (RequiresDeviceCodegen) {
9725     const auto &E = *cast<OMPExecutableDirective>(S);
9726     unsigned DeviceID;
9727     unsigned FileID;
9728     unsigned Line;
9729     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9730                              FileID, Line);
9731 
9732     // Is this a target region that should not be emitted as an entry point? If
9733     // so just signal we are done with this target region.
9734     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9735                                                             ParentName, Line))
9736       return;
9737 
9738     switch (E.getDirectiveKind()) {
9739     case OMPD_target:
9740       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9741                                                    cast<OMPTargetDirective>(E));
9742       break;
9743     case OMPD_target_parallel:
9744       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9745           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9746       break;
9747     case OMPD_target_teams:
9748       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9749           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9750       break;
9751     case OMPD_target_teams_distribute:
9752       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9753           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9754       break;
9755     case OMPD_target_teams_distribute_simd:
9756       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9757           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9758       break;
9759     case OMPD_target_parallel_for:
9760       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9761           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9762       break;
9763     case OMPD_target_parallel_for_simd:
9764       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9765           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9766       break;
9767     case OMPD_target_simd:
9768       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9769           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9770       break;
9771     case OMPD_target_teams_distribute_parallel_for:
9772       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9773           CGM, ParentName,
9774           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9775       break;
9776     case OMPD_target_teams_distribute_parallel_for_simd:
9777       CodeGenFunction::
9778           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9779               CGM, ParentName,
9780               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9781       break;
9782     case OMPD_parallel:
9783     case OMPD_for:
9784     case OMPD_parallel_for:
9785     case OMPD_parallel_master:
9786     case OMPD_parallel_sections:
9787     case OMPD_for_simd:
9788     case OMPD_parallel_for_simd:
9789     case OMPD_cancel:
9790     case OMPD_cancellation_point:
9791     case OMPD_ordered:
9792     case OMPD_threadprivate:
9793     case OMPD_allocate:
9794     case OMPD_task:
9795     case OMPD_simd:
9796     case OMPD_sections:
9797     case OMPD_section:
9798     case OMPD_single:
9799     case OMPD_master:
9800     case OMPD_critical:
9801     case OMPD_taskyield:
9802     case OMPD_barrier:
9803     case OMPD_taskwait:
9804     case OMPD_taskgroup:
9805     case OMPD_atomic:
9806     case OMPD_flush:
9807     case OMPD_depobj:
9808     case OMPD_scan:
9809     case OMPD_teams:
9810     case OMPD_target_data:
9811     case OMPD_target_exit_data:
9812     case OMPD_target_enter_data:
9813     case OMPD_distribute:
9814     case OMPD_distribute_simd:
9815     case OMPD_distribute_parallel_for:
9816     case OMPD_distribute_parallel_for_simd:
9817     case OMPD_teams_distribute:
9818     case OMPD_teams_distribute_simd:
9819     case OMPD_teams_distribute_parallel_for:
9820     case OMPD_teams_distribute_parallel_for_simd:
9821     case OMPD_target_update:
9822     case OMPD_declare_simd:
9823     case OMPD_declare_variant:
9824     case OMPD_begin_declare_variant:
9825     case OMPD_end_declare_variant:
9826     case OMPD_declare_target:
9827     case OMPD_end_declare_target:
9828     case OMPD_declare_reduction:
9829     case OMPD_declare_mapper:
9830     case OMPD_taskloop:
9831     case OMPD_taskloop_simd:
9832     case OMPD_master_taskloop:
9833     case OMPD_master_taskloop_simd:
9834     case OMPD_parallel_master_taskloop:
9835     case OMPD_parallel_master_taskloop_simd:
9836     case OMPD_requires:
9837     case OMPD_unknown:
9838     default:
9839       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9840     }
9841     return;
9842   }
9843 
9844   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9845     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9846       return;
9847 
9848     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9849     return;
9850   }
9851 
9852   // If this is a lambda function, look into its body.
9853   if (const auto *L = dyn_cast<LambdaExpr>(S))
9854     S = L->getBody();
9855 
9856   // Keep looking for target regions recursively.
9857   for (const Stmt *II : S->children())
9858     scanForTargetRegionsFunctions(II, ParentName);
9859 }
9860 
9861 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9862   // If emitting code for the host, we do not process FD here. Instead we do
9863   // the normal code generation.
9864   if (!CGM.getLangOpts().OpenMPIsDevice) {
9865     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9866       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9867           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9868       // Do not emit device_type(nohost) functions for the host.
9869       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9870         return true;
9871     }
9872     return false;
9873   }
9874 
9875   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9876   // Try to detect target regions in the function.
9877   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9878     StringRef Name = CGM.getMangledName(GD);
9879     scanForTargetRegionsFunctions(FD->getBody(), Name);
9880     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9881         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9882     // Do not emit device_type(nohost) functions for the host.
9883     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9884       return true;
9885   }
9886 
9887   // Do not to emit function if it is not marked as declare target.
9888   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9889          AlreadyEmittedTargetDecls.count(VD) == 0;
9890 }
9891 
9892 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9893   if (!CGM.getLangOpts().OpenMPIsDevice)
9894     return false;
9895 
9896   // Check if there are Ctors/Dtors in this declaration and look for target
9897   // regions in it. We use the complete variant to produce the kernel name
9898   // mangling.
9899   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9900   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9901     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9902       StringRef ParentName =
9903           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9904       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9905     }
9906     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9907       StringRef ParentName =
9908           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9909       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9910     }
9911   }
9912 
9913   // Do not to emit variable if it is not marked as declare target.
9914   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9915       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9916           cast<VarDecl>(GD.getDecl()));
9917   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9918       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9919        HasRequiresUnifiedSharedMemory)) {
9920     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9921     return true;
9922   }
9923   return false;
9924 }
9925 
9926 llvm::Constant *
9927 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9928                                                 const VarDecl *VD) {
9929   assert(VD->getType().isConstant(CGM.getContext()) &&
9930          "Expected constant variable.");
9931   StringRef VarName;
9932   llvm::Constant *Addr;
9933   llvm::GlobalValue::LinkageTypes Linkage;
9934   QualType Ty = VD->getType();
9935   SmallString<128> Buffer;
9936   {
9937     unsigned DeviceID;
9938     unsigned FileID;
9939     unsigned Line;
9940     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9941                              FileID, Line);
9942     llvm::raw_svector_ostream OS(Buffer);
9943     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9944        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9945     VarName = OS.str();
9946   }
9947   Linkage = llvm::GlobalValue::InternalLinkage;
9948   Addr =
9949       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9950                                   getDefaultFirstprivateAddressSpace());
9951   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9952   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9953   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9954   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9955       VarName, Addr, VarSize,
9956       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9957   return Addr;
9958 }
9959 
9960 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9961                                                    llvm::Constant *Addr) {
9962   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9963       !CGM.getLangOpts().OpenMPIsDevice)
9964     return;
9965   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9966       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9967   if (!Res) {
9968     if (CGM.getLangOpts().OpenMPIsDevice) {
9969       // Register non-target variables being emitted in device code (debug info
9970       // may cause this).
9971       StringRef VarName = CGM.getMangledName(VD);
9972       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9973     }
9974     return;
9975   }
9976   // Register declare target variables.
9977   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9978   StringRef VarName;
9979   CharUnits VarSize;
9980   llvm::GlobalValue::LinkageTypes Linkage;
9981 
9982   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9983       !HasRequiresUnifiedSharedMemory) {
9984     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9985     VarName = CGM.getMangledName(VD);
9986     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9987       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9988       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9989     } else {
9990       VarSize = CharUnits::Zero();
9991     }
9992     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9993     // Temp solution to prevent optimizations of the internal variables.
9994     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9995       std::string RefName = getName({VarName, "ref"});
9996       if (!CGM.GetGlobalValue(RefName)) {
9997         llvm::Constant *AddrRef =
9998             getOrCreateInternalVariable(Addr->getType(), RefName);
9999         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10000         GVAddrRef->setConstant(/*Val=*/true);
10001         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10002         GVAddrRef->setInitializer(Addr);
10003         CGM.addCompilerUsedGlobal(GVAddrRef);
10004       }
10005     }
10006   } else {
10007     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10008             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10009              HasRequiresUnifiedSharedMemory)) &&
10010            "Declare target attribute must link or to with unified memory.");
10011     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10012       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10013     else
10014       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10015 
10016     if (CGM.getLangOpts().OpenMPIsDevice) {
10017       VarName = Addr->getName();
10018       Addr = nullptr;
10019     } else {
10020       VarName = getAddrOfDeclareTargetVar(VD).getName();
10021       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10022     }
10023     VarSize = CGM.getPointerSize();
10024     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10025   }
10026 
10027   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10028       VarName, Addr, VarSize, Flags, Linkage);
10029 }
10030 
10031 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10032   if (isa<FunctionDecl>(GD.getDecl()) ||
10033       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10034     return emitTargetFunctions(GD);
10035 
10036   return emitTargetGlobalVariable(GD);
10037 }
10038 
10039 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10040   for (const VarDecl *VD : DeferredGlobalVariables) {
10041     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10042         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10043     if (!Res)
10044       continue;
10045     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10046         !HasRequiresUnifiedSharedMemory) {
10047       CGM.EmitGlobal(VD);
10048     } else {
10049       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10050               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10051                HasRequiresUnifiedSharedMemory)) &&
10052              "Expected link clause or to clause with unified memory.");
10053       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10054     }
10055   }
10056 }
10057 
10058 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10059     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10060   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10061          " Expected target-based directive.");
10062 }
10063 
10064 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10065   for (const OMPClause *Clause : D->clauselists()) {
10066     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10067       HasRequiresUnifiedSharedMemory = true;
10068     } else if (const auto *AC =
10069                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10070       switch (AC->getAtomicDefaultMemOrderKind()) {
10071       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10072         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10073         break;
10074       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10075         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10076         break;
10077       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10078         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10079         break;
10080       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10081         break;
10082       }
10083     }
10084   }
10085 }
10086 
10087 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10088   return RequiresAtomicOrdering;
10089 }
10090 
10091 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10092                                                        LangAS &AS) {
10093   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10094     return false;
10095   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10096   switch(A->getAllocatorType()) {
10097   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10098   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10099   // Not supported, fallback to the default mem space.
10100   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10101   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10102   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10103   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10104   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10105   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10106   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10107     AS = LangAS::Default;
10108     return true;
10109   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10110     llvm_unreachable("Expected predefined allocator for the variables with the "
10111                      "static storage.");
10112   }
10113   return false;
10114 }
10115 
10116 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10117   return HasRequiresUnifiedSharedMemory;
10118 }
10119 
10120 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10121     CodeGenModule &CGM)
10122     : CGM(CGM) {
10123   if (CGM.getLangOpts().OpenMPIsDevice) {
10124     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10125     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10126   }
10127 }
10128 
10129 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10130   if (CGM.getLangOpts().OpenMPIsDevice)
10131     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10132 }
10133 
10134 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10135   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10136     return true;
10137 
10138   const auto *D = cast<FunctionDecl>(GD.getDecl());
10139   // Do not to emit function if it is marked as declare target as it was already
10140   // emitted.
10141   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10142     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10143       if (auto *F = dyn_cast_or_null<llvm::Function>(
10144               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10145         return !F->isDeclaration();
10146       return false;
10147     }
10148     return true;
10149   }
10150 
10151   return !AlreadyEmittedTargetDecls.insert(D).second;
10152 }
10153 
10154 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10155   // If we don't have entries or if we are emitting code for the device, we
10156   // don't need to do anything.
10157   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10158       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10159       (OffloadEntriesInfoManager.empty() &&
10160        !HasEmittedDeclareTargetRegion &&
10161        !HasEmittedTargetRegion))
10162     return nullptr;
10163 
10164   // Create and register the function that handles the requires directives.
10165   ASTContext &C = CGM.getContext();
10166 
10167   llvm::Function *RequiresRegFn;
10168   {
10169     CodeGenFunction CGF(CGM);
10170     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10171     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10172     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10173     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10174     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10175     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10176     // TODO: check for other requires clauses.
10177     // The requires directive takes effect only when a target region is
10178     // present in the compilation unit. Otherwise it is ignored and not
10179     // passed to the runtime. This avoids the runtime from throwing an error
10180     // for mismatching requires clauses across compilation units that don't
10181     // contain at least 1 target region.
10182     assert((HasEmittedTargetRegion ||
10183             HasEmittedDeclareTargetRegion ||
10184             !OffloadEntriesInfoManager.empty()) &&
10185            "Target or declare target region expected.");
10186     if (HasRequiresUnifiedSharedMemory)
10187       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10188     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10189                             CGM.getModule(), OMPRTL___tgt_register_requires),
10190                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10191     CGF.FinishFunction();
10192   }
10193   return RequiresRegFn;
10194 }
10195 
10196 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10197                                     const OMPExecutableDirective &D,
10198                                     SourceLocation Loc,
10199                                     llvm::Function *OutlinedFn,
10200                                     ArrayRef<llvm::Value *> CapturedVars) {
10201   if (!CGF.HaveInsertPoint())
10202     return;
10203 
10204   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10205   CodeGenFunction::RunCleanupsScope Scope(CGF);
10206 
10207   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10208   llvm::Value *Args[] = {
10209       RTLoc,
10210       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10211       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10212   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10213   RealArgs.append(std::begin(Args), std::end(Args));
10214   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10215 
10216   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10217       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10218   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10219 }
10220 
10221 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10222                                          const Expr *NumTeams,
10223                                          const Expr *ThreadLimit,
10224                                          SourceLocation Loc) {
10225   if (!CGF.HaveInsertPoint())
10226     return;
10227 
10228   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10229 
10230   llvm::Value *NumTeamsVal =
10231       NumTeams
10232           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10233                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10234           : CGF.Builder.getInt32(0);
10235 
10236   llvm::Value *ThreadLimitVal =
10237       ThreadLimit
10238           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10239                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10240           : CGF.Builder.getInt32(0);
10241 
10242   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10243   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10244                                      ThreadLimitVal};
10245   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10246                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10247                       PushNumTeamsArgs);
10248 }
10249 
10250 void CGOpenMPRuntime::emitTargetDataCalls(
10251     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10252     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10253   if (!CGF.HaveInsertPoint())
10254     return;
10255 
10256   // Action used to replace the default codegen action and turn privatization
10257   // off.
10258   PrePostActionTy NoPrivAction;
10259 
10260   // Generate the code for the opening of the data environment. Capture all the
10261   // arguments of the runtime call by reference because they are used in the
10262   // closing of the region.
10263   auto &&BeginThenGen = [this, &D, Device, &Info,
10264                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10265     // Fill up the arrays with all the mapped variables.
10266     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10267 
10268     // Get map clause information.
10269     MappableExprsHandler MEHandler(D, CGF);
10270     MEHandler.generateAllInfo(CombinedInfo);
10271 
10272     // Fill up the arrays and create the arguments.
10273     emitOffloadingArrays(CGF, CombinedInfo, Info);
10274 
10275     llvm::Value *BasePointersArrayArg = nullptr;
10276     llvm::Value *PointersArrayArg = nullptr;
10277     llvm::Value *SizesArrayArg = nullptr;
10278     llvm::Value *MapTypesArrayArg = nullptr;
10279     llvm::Value *MappersArrayArg = nullptr;
10280     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10281                                  SizesArrayArg, MapTypesArrayArg,
10282                                  MappersArrayArg, Info);
10283 
10284     // Emit device ID if any.
10285     llvm::Value *DeviceID = nullptr;
10286     if (Device) {
10287       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10288                                            CGF.Int64Ty, /*isSigned=*/true);
10289     } else {
10290       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10291     }
10292 
10293     // Emit the number of elements in the offloading arrays.
10294     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10295 
10296     llvm::Value *OffloadingArgs[] = {
10297         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10298         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10299     CGF.EmitRuntimeCall(
10300         OMPBuilder.getOrCreateRuntimeFunction(
10301             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10302         OffloadingArgs);
10303 
10304     // If device pointer privatization is required, emit the body of the region
10305     // here. It will have to be duplicated: with and without privatization.
10306     if (!Info.CaptureDeviceAddrMap.empty())
10307       CodeGen(CGF);
10308   };
10309 
10310   // Generate code for the closing of the data region.
10311   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10312                                             PrePostActionTy &) {
10313     assert(Info.isValid() && "Invalid data environment closing arguments.");
10314 
10315     llvm::Value *BasePointersArrayArg = nullptr;
10316     llvm::Value *PointersArrayArg = nullptr;
10317     llvm::Value *SizesArrayArg = nullptr;
10318     llvm::Value *MapTypesArrayArg = nullptr;
10319     llvm::Value *MappersArrayArg = nullptr;
10320     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10321                                  SizesArrayArg, MapTypesArrayArg,
10322                                  MappersArrayArg, Info,
10323                                  {/*ForEndCall=*/true, /*IsTask=*/false});
10324 
10325     // Emit device ID if any.
10326     llvm::Value *DeviceID = nullptr;
10327     if (Device) {
10328       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10329                                            CGF.Int64Ty, /*isSigned=*/true);
10330     } else {
10331       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10332     }
10333 
10334     // Emit the number of elements in the offloading arrays.
10335     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10336 
10337     llvm::Value *OffloadingArgs[] = {
10338         DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
10339         SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
10340     CGF.EmitRuntimeCall(
10341         OMPBuilder.getOrCreateRuntimeFunction(
10342             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10343         OffloadingArgs);
10344   };
10345 
10346   // If we need device pointer privatization, we need to emit the body of the
10347   // region with no privatization in the 'else' branch of the conditional.
10348   // Otherwise, we don't have to do anything.
10349   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10350                                                          PrePostActionTy &) {
10351     if (!Info.CaptureDeviceAddrMap.empty()) {
10352       CodeGen.setAction(NoPrivAction);
10353       CodeGen(CGF);
10354     }
10355   };
10356 
10357   // We don't have to do anything to close the region if the if clause evaluates
10358   // to false.
10359   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10360 
10361   if (IfCond) {
10362     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10363   } else {
10364     RegionCodeGenTy RCG(BeginThenGen);
10365     RCG(CGF);
10366   }
10367 
10368   // If we don't require privatization of device pointers, we emit the body in
10369   // between the runtime calls. This avoids duplicating the body code.
10370   if (Info.CaptureDeviceAddrMap.empty()) {
10371     CodeGen.setAction(NoPrivAction);
10372     CodeGen(CGF);
10373   }
10374 
10375   if (IfCond) {
10376     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10377   } else {
10378     RegionCodeGenTy RCG(EndThenGen);
10379     RCG(CGF);
10380   }
10381 }
10382 
10383 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10384     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10385     const Expr *Device) {
10386   if (!CGF.HaveInsertPoint())
10387     return;
10388 
10389   assert((isa<OMPTargetEnterDataDirective>(D) ||
10390           isa<OMPTargetExitDataDirective>(D) ||
10391           isa<OMPTargetUpdateDirective>(D)) &&
10392          "Expecting either target enter, exit data, or update directives.");
10393 
10394   CodeGenFunction::OMPTargetDataInfo InputInfo;
10395   llvm::Value *MapTypesArray = nullptr;
10396   // Generate the code for the opening of the data environment.
10397   auto &&ThenGen = [this, &D, Device, &InputInfo,
10398                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10399     // Emit device ID if any.
10400     llvm::Value *DeviceID = nullptr;
10401     if (Device) {
10402       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10403                                            CGF.Int64Ty, /*isSigned=*/true);
10404     } else {
10405       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10406     }
10407 
10408     // Emit the number of elements in the offloading arrays.
10409     llvm::Constant *PointerNum =
10410         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10411 
10412     llvm::Value *OffloadingArgs[] = {DeviceID,
10413                                      PointerNum,
10414                                      InputInfo.BasePointersArray.getPointer(),
10415                                      InputInfo.PointersArray.getPointer(),
10416                                      InputInfo.SizesArray.getPointer(),
10417                                      MapTypesArray,
10418                                      InputInfo.MappersArray.getPointer()};
10419 
10420     // Select the right runtime function call for each standalone
10421     // directive.
10422     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10423     RuntimeFunction RTLFn;
10424     switch (D.getDirectiveKind()) {
10425     case OMPD_target_enter_data:
10426       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10427                         : OMPRTL___tgt_target_data_begin_mapper;
10428       break;
10429     case OMPD_target_exit_data:
10430       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10431                         : OMPRTL___tgt_target_data_end_mapper;
10432       break;
10433     case OMPD_target_update:
10434       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10435                         : OMPRTL___tgt_target_data_update_mapper;
10436       break;
10437     case OMPD_parallel:
10438     case OMPD_for:
10439     case OMPD_parallel_for:
10440     case OMPD_parallel_master:
10441     case OMPD_parallel_sections:
10442     case OMPD_for_simd:
10443     case OMPD_parallel_for_simd:
10444     case OMPD_cancel:
10445     case OMPD_cancellation_point:
10446     case OMPD_ordered:
10447     case OMPD_threadprivate:
10448     case OMPD_allocate:
10449     case OMPD_task:
10450     case OMPD_simd:
10451     case OMPD_sections:
10452     case OMPD_section:
10453     case OMPD_single:
10454     case OMPD_master:
10455     case OMPD_critical:
10456     case OMPD_taskyield:
10457     case OMPD_barrier:
10458     case OMPD_taskwait:
10459     case OMPD_taskgroup:
10460     case OMPD_atomic:
10461     case OMPD_flush:
10462     case OMPD_depobj:
10463     case OMPD_scan:
10464     case OMPD_teams:
10465     case OMPD_target_data:
10466     case OMPD_distribute:
10467     case OMPD_distribute_simd:
10468     case OMPD_distribute_parallel_for:
10469     case OMPD_distribute_parallel_for_simd:
10470     case OMPD_teams_distribute:
10471     case OMPD_teams_distribute_simd:
10472     case OMPD_teams_distribute_parallel_for:
10473     case OMPD_teams_distribute_parallel_for_simd:
10474     case OMPD_declare_simd:
10475     case OMPD_declare_variant:
10476     case OMPD_begin_declare_variant:
10477     case OMPD_end_declare_variant:
10478     case OMPD_declare_target:
10479     case OMPD_end_declare_target:
10480     case OMPD_declare_reduction:
10481     case OMPD_declare_mapper:
10482     case OMPD_taskloop:
10483     case OMPD_taskloop_simd:
10484     case OMPD_master_taskloop:
10485     case OMPD_master_taskloop_simd:
10486     case OMPD_parallel_master_taskloop:
10487     case OMPD_parallel_master_taskloop_simd:
10488     case OMPD_target:
10489     case OMPD_target_simd:
10490     case OMPD_target_teams_distribute:
10491     case OMPD_target_teams_distribute_simd:
10492     case OMPD_target_teams_distribute_parallel_for:
10493     case OMPD_target_teams_distribute_parallel_for_simd:
10494     case OMPD_target_teams:
10495     case OMPD_target_parallel:
10496     case OMPD_target_parallel_for:
10497     case OMPD_target_parallel_for_simd:
10498     case OMPD_requires:
10499     case OMPD_unknown:
10500     default:
10501       llvm_unreachable("Unexpected standalone target data directive.");
10502       break;
10503     }
10504     CGF.EmitRuntimeCall(
10505         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10506         OffloadingArgs);
10507   };
10508 
10509   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10510                              CodeGenFunction &CGF, PrePostActionTy &) {
10511     // Fill up the arrays with all the mapped variables.
10512     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10513 
10514     // Get map clause information.
10515     MappableExprsHandler MEHandler(D, CGF);
10516     MEHandler.generateAllInfo(CombinedInfo);
10517 
10518     TargetDataInfo Info;
10519     // Fill up the arrays and create the arguments.
10520     emitOffloadingArrays(CGF, CombinedInfo, Info);
10521     bool HasDependClauses = D.hasClausesOfKind<OMPDependClause>();
10522     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10523                                  Info.PointersArray, Info.SizesArray,
10524                                  Info.MapTypesArray, Info.MappersArray, Info,
10525                                  {/*ForEndTask=*/false, HasDependClauses});
10526     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10527     InputInfo.BasePointersArray =
10528         Address(Info.BasePointersArray, CGM.getPointerAlign());
10529     InputInfo.PointersArray =
10530         Address(Info.PointersArray, CGM.getPointerAlign());
10531     InputInfo.SizesArray =
10532         Address(Info.SizesArray, CGM.getPointerAlign());
10533     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10534     MapTypesArray = Info.MapTypesArray;
10535     if (HasDependClauses)
10536       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10537     else
10538       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10539   };
10540 
10541   if (IfCond) {
10542     emitIfClause(CGF, IfCond, TargetThenGen,
10543                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10544   } else {
10545     RegionCodeGenTy ThenRCG(TargetThenGen);
10546     ThenRCG(CGF);
10547   }
10548 }
10549 
10550 namespace {
10551   /// Kind of parameter in a function with 'declare simd' directive.
10552   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10553   /// Attribute set of the parameter.
10554   struct ParamAttrTy {
10555     ParamKindTy Kind = Vector;
10556     llvm::APSInt StrideOrArg;
10557     llvm::APSInt Alignment;
10558   };
10559 } // namespace
10560 
10561 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10562                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10563   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10564   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10565   // of that clause. The VLEN value must be power of 2.
10566   // In other case the notion of the function`s "characteristic data type" (CDT)
10567   // is used to compute the vector length.
10568   // CDT is defined in the following order:
10569   //   a) For non-void function, the CDT is the return type.
10570   //   b) If the function has any non-uniform, non-linear parameters, then the
10571   //   CDT is the type of the first such parameter.
10572   //   c) If the CDT determined by a) or b) above is struct, union, or class
10573   //   type which is pass-by-value (except for the type that maps to the
10574   //   built-in complex data type), the characteristic data type is int.
10575   //   d) If none of the above three cases is applicable, the CDT is int.
10576   // The VLEN is then determined based on the CDT and the size of vector
10577   // register of that ISA for which current vector version is generated. The
10578   // VLEN is computed using the formula below:
10579   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10580   // where vector register size specified in section 3.2.1 Registers and the
10581   // Stack Frame of original AMD64 ABI document.
10582   QualType RetType = FD->getReturnType();
10583   if (RetType.isNull())
10584     return 0;
10585   ASTContext &C = FD->getASTContext();
10586   QualType CDT;
10587   if (!RetType.isNull() && !RetType->isVoidType()) {
10588     CDT = RetType;
10589   } else {
10590     unsigned Offset = 0;
10591     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10592       if (ParamAttrs[Offset].Kind == Vector)
10593         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10594       ++Offset;
10595     }
10596     if (CDT.isNull()) {
10597       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10598         if (ParamAttrs[I + Offset].Kind == Vector) {
10599           CDT = FD->getParamDecl(I)->getType();
10600           break;
10601         }
10602       }
10603     }
10604   }
10605   if (CDT.isNull())
10606     CDT = C.IntTy;
10607   CDT = CDT->getCanonicalTypeUnqualified();
10608   if (CDT->isRecordType() || CDT->isUnionType())
10609     CDT = C.IntTy;
10610   return C.getTypeSize(CDT);
10611 }
10612 
10613 static void
10614 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10615                            const llvm::APSInt &VLENVal,
10616                            ArrayRef<ParamAttrTy> ParamAttrs,
10617                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10618   struct ISADataTy {
10619     char ISA;
10620     unsigned VecRegSize;
10621   };
10622   ISADataTy ISAData[] = {
10623       {
10624           'b', 128
10625       }, // SSE
10626       {
10627           'c', 256
10628       }, // AVX
10629       {
10630           'd', 256
10631       }, // AVX2
10632       {
10633           'e', 512
10634       }, // AVX512
10635   };
10636   llvm::SmallVector<char, 2> Masked;
10637   switch (State) {
10638   case OMPDeclareSimdDeclAttr::BS_Undefined:
10639     Masked.push_back('N');
10640     Masked.push_back('M');
10641     break;
10642   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10643     Masked.push_back('N');
10644     break;
10645   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10646     Masked.push_back('M');
10647     break;
10648   }
10649   for (char Mask : Masked) {
10650     for (const ISADataTy &Data : ISAData) {
10651       SmallString<256> Buffer;
10652       llvm::raw_svector_ostream Out(Buffer);
10653       Out << "_ZGV" << Data.ISA << Mask;
10654       if (!VLENVal) {
10655         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10656         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10657         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10658       } else {
10659         Out << VLENVal;
10660       }
10661       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10662         switch (ParamAttr.Kind){
10663         case LinearWithVarStride:
10664           Out << 's' << ParamAttr.StrideOrArg;
10665           break;
10666         case Linear:
10667           Out << 'l';
10668           if (ParamAttr.StrideOrArg != 1)
10669             Out << ParamAttr.StrideOrArg;
10670           break;
10671         case Uniform:
10672           Out << 'u';
10673           break;
10674         case Vector:
10675           Out << 'v';
10676           break;
10677         }
10678         if (!!ParamAttr.Alignment)
10679           Out << 'a' << ParamAttr.Alignment;
10680       }
10681       Out << '_' << Fn->getName();
10682       Fn->addFnAttr(Out.str());
10683     }
10684   }
10685 }
10686 
10687 // This are the Functions that are needed to mangle the name of the
10688 // vector functions generated by the compiler, according to the rules
10689 // defined in the "Vector Function ABI specifications for AArch64",
10690 // available at
10691 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10692 
10693 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10694 ///
10695 /// TODO: Need to implement the behavior for reference marked with a
10696 /// var or no linear modifiers (1.b in the section). For this, we
10697 /// need to extend ParamKindTy to support the linear modifiers.
10698 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10699   QT = QT.getCanonicalType();
10700 
10701   if (QT->isVoidType())
10702     return false;
10703 
10704   if (Kind == ParamKindTy::Uniform)
10705     return false;
10706 
10707   if (Kind == ParamKindTy::Linear)
10708     return false;
10709 
10710   // TODO: Handle linear references with modifiers
10711 
10712   if (Kind == ParamKindTy::LinearWithVarStride)
10713     return false;
10714 
10715   return true;
10716 }
10717 
10718 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10719 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10720   QT = QT.getCanonicalType();
10721   unsigned Size = C.getTypeSize(QT);
10722 
10723   // Only scalars and complex within 16 bytes wide set PVB to true.
10724   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10725     return false;
10726 
10727   if (QT->isFloatingType())
10728     return true;
10729 
10730   if (QT->isIntegerType())
10731     return true;
10732 
10733   if (QT->isPointerType())
10734     return true;
10735 
10736   // TODO: Add support for complex types (section 3.1.2, item 2).
10737 
10738   return false;
10739 }
10740 
10741 /// Computes the lane size (LS) of a return type or of an input parameter,
10742 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10743 /// TODO: Add support for references, section 3.2.1, item 1.
10744 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10745   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10746     QualType PTy = QT.getCanonicalType()->getPointeeType();
10747     if (getAArch64PBV(PTy, C))
10748       return C.getTypeSize(PTy);
10749   }
10750   if (getAArch64PBV(QT, C))
10751     return C.getTypeSize(QT);
10752 
10753   return C.getTypeSize(C.getUIntPtrType());
10754 }
10755 
10756 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10757 // signature of the scalar function, as defined in 3.2.2 of the
10758 // AAVFABI.
10759 static std::tuple<unsigned, unsigned, bool>
10760 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10761   QualType RetType = FD->getReturnType().getCanonicalType();
10762 
10763   ASTContext &C = FD->getASTContext();
10764 
10765   bool OutputBecomesInput = false;
10766 
10767   llvm::SmallVector<unsigned, 8> Sizes;
10768   if (!RetType->isVoidType()) {
10769     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10770     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10771       OutputBecomesInput = true;
10772   }
10773   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10774     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10775     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10776   }
10777 
10778   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10779   // The LS of a function parameter / return value can only be a power
10780   // of 2, starting from 8 bits, up to 128.
10781   assert(std::all_of(Sizes.begin(), Sizes.end(),
10782                      [](unsigned Size) {
10783                        return Size == 8 || Size == 16 || Size == 32 ||
10784                               Size == 64 || Size == 128;
10785                      }) &&
10786          "Invalid size");
10787 
10788   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10789                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10790                          OutputBecomesInput);
10791 }
10792 
10793 /// Mangle the parameter part of the vector function name according to
10794 /// their OpenMP classification. The mangling function is defined in
10795 /// section 3.5 of the AAVFABI.
10796 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10797   SmallString<256> Buffer;
10798   llvm::raw_svector_ostream Out(Buffer);
10799   for (const auto &ParamAttr : ParamAttrs) {
10800     switch (ParamAttr.Kind) {
10801     case LinearWithVarStride:
10802       Out << "ls" << ParamAttr.StrideOrArg;
10803       break;
10804     case Linear:
10805       Out << 'l';
10806       // Don't print the step value if it is not present or if it is
10807       // equal to 1.
10808       if (ParamAttr.StrideOrArg != 1)
10809         Out << ParamAttr.StrideOrArg;
10810       break;
10811     case Uniform:
10812       Out << 'u';
10813       break;
10814     case Vector:
10815       Out << 'v';
10816       break;
10817     }
10818 
10819     if (!!ParamAttr.Alignment)
10820       Out << 'a' << ParamAttr.Alignment;
10821   }
10822 
10823   return std::string(Out.str());
10824 }
10825 
10826 // Function used to add the attribute. The parameter `VLEN` is
10827 // templated to allow the use of "x" when targeting scalable functions
10828 // for SVE.
10829 template <typename T>
10830 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10831                                  char ISA, StringRef ParSeq,
10832                                  StringRef MangledName, bool OutputBecomesInput,
10833                                  llvm::Function *Fn) {
10834   SmallString<256> Buffer;
10835   llvm::raw_svector_ostream Out(Buffer);
10836   Out << Prefix << ISA << LMask << VLEN;
10837   if (OutputBecomesInput)
10838     Out << "v";
10839   Out << ParSeq << "_" << MangledName;
10840   Fn->addFnAttr(Out.str());
10841 }
10842 
10843 // Helper function to generate the Advanced SIMD names depending on
10844 // the value of the NDS when simdlen is not present.
10845 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10846                                       StringRef Prefix, char ISA,
10847                                       StringRef ParSeq, StringRef MangledName,
10848                                       bool OutputBecomesInput,
10849                                       llvm::Function *Fn) {
10850   switch (NDS) {
10851   case 8:
10852     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10853                          OutputBecomesInput, Fn);
10854     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10855                          OutputBecomesInput, Fn);
10856     break;
10857   case 16:
10858     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10859                          OutputBecomesInput, Fn);
10860     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10861                          OutputBecomesInput, Fn);
10862     break;
10863   case 32:
10864     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10865                          OutputBecomesInput, Fn);
10866     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10867                          OutputBecomesInput, Fn);
10868     break;
10869   case 64:
10870   case 128:
10871     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10872                          OutputBecomesInput, Fn);
10873     break;
10874   default:
10875     llvm_unreachable("Scalar type is too wide.");
10876   }
10877 }
10878 
10879 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10880 static void emitAArch64DeclareSimdFunction(
10881     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10882     ArrayRef<ParamAttrTy> ParamAttrs,
10883     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10884     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10885 
10886   // Get basic data for building the vector signature.
10887   const auto Data = getNDSWDS(FD, ParamAttrs);
10888   const unsigned NDS = std::get<0>(Data);
10889   const unsigned WDS = std::get<1>(Data);
10890   const bool OutputBecomesInput = std::get<2>(Data);
10891 
10892   // Check the values provided via `simdlen` by the user.
10893   // 1. A `simdlen(1)` doesn't produce vector signatures,
10894   if (UserVLEN == 1) {
10895     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10896         DiagnosticsEngine::Warning,
10897         "The clause simdlen(1) has no effect when targeting aarch64.");
10898     CGM.getDiags().Report(SLoc, DiagID);
10899     return;
10900   }
10901 
10902   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10903   // Advanced SIMD output.
10904   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10905     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10906         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10907                                     "power of 2 when targeting Advanced SIMD.");
10908     CGM.getDiags().Report(SLoc, DiagID);
10909     return;
10910   }
10911 
10912   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10913   // limits.
10914   if (ISA == 's' && UserVLEN != 0) {
10915     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10916       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10917           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10918                                       "lanes in the architectural constraints "
10919                                       "for SVE (min is 128-bit, max is "
10920                                       "2048-bit, by steps of 128-bit)");
10921       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10922       return;
10923     }
10924   }
10925 
10926   // Sort out parameter sequence.
10927   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10928   StringRef Prefix = "_ZGV";
10929   // Generate simdlen from user input (if any).
10930   if (UserVLEN) {
10931     if (ISA == 's') {
10932       // SVE generates only a masked function.
10933       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10934                            OutputBecomesInput, Fn);
10935     } else {
10936       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10937       // Advanced SIMD generates one or two functions, depending on
10938       // the `[not]inbranch` clause.
10939       switch (State) {
10940       case OMPDeclareSimdDeclAttr::BS_Undefined:
10941         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10942                              OutputBecomesInput, Fn);
10943         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10944                              OutputBecomesInput, Fn);
10945         break;
10946       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10947         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10948                              OutputBecomesInput, Fn);
10949         break;
10950       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10951         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10952                              OutputBecomesInput, Fn);
10953         break;
10954       }
10955     }
10956   } else {
10957     // If no user simdlen is provided, follow the AAVFABI rules for
10958     // generating the vector length.
10959     if (ISA == 's') {
10960       // SVE, section 3.4.1, item 1.
10961       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10962                            OutputBecomesInput, Fn);
10963     } else {
10964       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10965       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10966       // two vector names depending on the use of the clause
10967       // `[not]inbranch`.
10968       switch (State) {
10969       case OMPDeclareSimdDeclAttr::BS_Undefined:
10970         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10971                                   OutputBecomesInput, Fn);
10972         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10973                                   OutputBecomesInput, Fn);
10974         break;
10975       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10976         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10977                                   OutputBecomesInput, Fn);
10978         break;
10979       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10980         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10981                                   OutputBecomesInput, Fn);
10982         break;
10983       }
10984     }
10985   }
10986 }
10987 
10988 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10989                                               llvm::Function *Fn) {
10990   ASTContext &C = CGM.getContext();
10991   FD = FD->getMostRecentDecl();
10992   // Map params to their positions in function decl.
10993   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10994   if (isa<CXXMethodDecl>(FD))
10995     ParamPositions.try_emplace(FD, 0);
10996   unsigned ParamPos = ParamPositions.size();
10997   for (const ParmVarDecl *P : FD->parameters()) {
10998     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10999     ++ParamPos;
11000   }
11001   while (FD) {
11002     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11003       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11004       // Mark uniform parameters.
11005       for (const Expr *E : Attr->uniforms()) {
11006         E = E->IgnoreParenImpCasts();
11007         unsigned Pos;
11008         if (isa<CXXThisExpr>(E)) {
11009           Pos = ParamPositions[FD];
11010         } else {
11011           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11012                                 ->getCanonicalDecl();
11013           Pos = ParamPositions[PVD];
11014         }
11015         ParamAttrs[Pos].Kind = Uniform;
11016       }
11017       // Get alignment info.
11018       auto NI = Attr->alignments_begin();
11019       for (const Expr *E : Attr->aligneds()) {
11020         E = E->IgnoreParenImpCasts();
11021         unsigned Pos;
11022         QualType ParmTy;
11023         if (isa<CXXThisExpr>(E)) {
11024           Pos = ParamPositions[FD];
11025           ParmTy = E->getType();
11026         } else {
11027           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11028                                 ->getCanonicalDecl();
11029           Pos = ParamPositions[PVD];
11030           ParmTy = PVD->getType();
11031         }
11032         ParamAttrs[Pos].Alignment =
11033             (*NI)
11034                 ? (*NI)->EvaluateKnownConstInt(C)
11035                 : llvm::APSInt::getUnsigned(
11036                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11037                           .getQuantity());
11038         ++NI;
11039       }
11040       // Mark linear parameters.
11041       auto SI = Attr->steps_begin();
11042       auto MI = Attr->modifiers_begin();
11043       for (const Expr *E : Attr->linears()) {
11044         E = E->IgnoreParenImpCasts();
11045         unsigned Pos;
11046         // Rescaling factor needed to compute the linear parameter
11047         // value in the mangled name.
11048         unsigned PtrRescalingFactor = 1;
11049         if (isa<CXXThisExpr>(E)) {
11050           Pos = ParamPositions[FD];
11051         } else {
11052           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11053                                 ->getCanonicalDecl();
11054           Pos = ParamPositions[PVD];
11055           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11056             PtrRescalingFactor = CGM.getContext()
11057                                      .getTypeSizeInChars(P->getPointeeType())
11058                                      .getQuantity();
11059         }
11060         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11061         ParamAttr.Kind = Linear;
11062         // Assuming a stride of 1, for `linear` without modifiers.
11063         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11064         if (*SI) {
11065           Expr::EvalResult Result;
11066           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11067             if (const auto *DRE =
11068                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11069               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11070                 ParamAttr.Kind = LinearWithVarStride;
11071                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11072                     ParamPositions[StridePVD->getCanonicalDecl()]);
11073               }
11074             }
11075           } else {
11076             ParamAttr.StrideOrArg = Result.Val.getInt();
11077           }
11078         }
11079         // If we are using a linear clause on a pointer, we need to
11080         // rescale the value of linear_step with the byte size of the
11081         // pointee type.
11082         if (Linear == ParamAttr.Kind)
11083           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11084         ++SI;
11085         ++MI;
11086       }
11087       llvm::APSInt VLENVal;
11088       SourceLocation ExprLoc;
11089       const Expr *VLENExpr = Attr->getSimdlen();
11090       if (VLENExpr) {
11091         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11092         ExprLoc = VLENExpr->getExprLoc();
11093       }
11094       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11095       if (CGM.getTriple().isX86()) {
11096         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11097       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11098         unsigned VLEN = VLENVal.getExtValue();
11099         StringRef MangledName = Fn->getName();
11100         if (CGM.getTarget().hasFeature("sve"))
11101           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11102                                          MangledName, 's', 128, Fn, ExprLoc);
11103         if (CGM.getTarget().hasFeature("neon"))
11104           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11105                                          MangledName, 'n', 128, Fn, ExprLoc);
11106       }
11107     }
11108     FD = FD->getPreviousDecl();
11109   }
11110 }
11111 
11112 namespace {
11113 /// Cleanup action for doacross support.
11114 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11115 public:
11116   static const int DoacrossFinArgs = 2;
11117 
11118 private:
11119   llvm::FunctionCallee RTLFn;
11120   llvm::Value *Args[DoacrossFinArgs];
11121 
11122 public:
11123   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11124                     ArrayRef<llvm::Value *> CallArgs)
11125       : RTLFn(RTLFn) {
11126     assert(CallArgs.size() == DoacrossFinArgs);
11127     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11128   }
11129   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11130     if (!CGF.HaveInsertPoint())
11131       return;
11132     CGF.EmitRuntimeCall(RTLFn, Args);
11133   }
11134 };
11135 } // namespace
11136 
11137 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11138                                        const OMPLoopDirective &D,
11139                                        ArrayRef<Expr *> NumIterations) {
11140   if (!CGF.HaveInsertPoint())
11141     return;
11142 
11143   ASTContext &C = CGM.getContext();
11144   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11145   RecordDecl *RD;
11146   if (KmpDimTy.isNull()) {
11147     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11148     //  kmp_int64 lo; // lower
11149     //  kmp_int64 up; // upper
11150     //  kmp_int64 st; // stride
11151     // };
11152     RD = C.buildImplicitRecord("kmp_dim");
11153     RD->startDefinition();
11154     addFieldToRecordDecl(C, RD, Int64Ty);
11155     addFieldToRecordDecl(C, RD, Int64Ty);
11156     addFieldToRecordDecl(C, RD, Int64Ty);
11157     RD->completeDefinition();
11158     KmpDimTy = C.getRecordType(RD);
11159   } else {
11160     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11161   }
11162   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11163   QualType ArrayTy =
11164       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11165 
11166   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11167   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11168   enum { LowerFD = 0, UpperFD, StrideFD };
11169   // Fill dims with data.
11170   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11171     LValue DimsLVal = CGF.MakeAddrLValue(
11172         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11173     // dims.upper = num_iterations;
11174     LValue UpperLVal = CGF.EmitLValueForField(
11175         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11176     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11177         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11178         Int64Ty, NumIterations[I]->getExprLoc());
11179     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11180     // dims.stride = 1;
11181     LValue StrideLVal = CGF.EmitLValueForField(
11182         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11183     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11184                           StrideLVal);
11185   }
11186 
11187   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11188   // kmp_int32 num_dims, struct kmp_dim * dims);
11189   llvm::Value *Args[] = {
11190       emitUpdateLocation(CGF, D.getBeginLoc()),
11191       getThreadID(CGF, D.getBeginLoc()),
11192       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11193       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11194           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11195           CGM.VoidPtrTy)};
11196 
11197   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11198       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11199   CGF.EmitRuntimeCall(RTLFn, Args);
11200   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11201       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11202   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11203       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11204   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11205                                              llvm::makeArrayRef(FiniArgs));
11206 }
11207 
11208 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11209                                           const OMPDependClause *C) {
11210   QualType Int64Ty =
11211       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11212   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11213   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11214       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11215   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11216   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11217     const Expr *CounterVal = C->getLoopData(I);
11218     assert(CounterVal);
11219     llvm::Value *CntVal = CGF.EmitScalarConversion(
11220         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11221         CounterVal->getExprLoc());
11222     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11223                           /*Volatile=*/false, Int64Ty);
11224   }
11225   llvm::Value *Args[] = {
11226       emitUpdateLocation(CGF, C->getBeginLoc()),
11227       getThreadID(CGF, C->getBeginLoc()),
11228       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11229   llvm::FunctionCallee RTLFn;
11230   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11231     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11232                                                   OMPRTL___kmpc_doacross_post);
11233   } else {
11234     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11235     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11236                                                   OMPRTL___kmpc_doacross_wait);
11237   }
11238   CGF.EmitRuntimeCall(RTLFn, Args);
11239 }
11240 
11241 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11242                                llvm::FunctionCallee Callee,
11243                                ArrayRef<llvm::Value *> Args) const {
11244   assert(Loc.isValid() && "Outlined function call location must be valid.");
11245   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11246 
11247   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11248     if (Fn->doesNotThrow()) {
11249       CGF.EmitNounwindRuntimeCall(Fn, Args);
11250       return;
11251     }
11252   }
11253   CGF.EmitRuntimeCall(Callee, Args);
11254 }
11255 
11256 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11257     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11258     ArrayRef<llvm::Value *> Args) const {
11259   emitCall(CGF, Loc, OutlinedFn, Args);
11260 }
11261 
11262 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11263   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11264     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11265       HasEmittedDeclareTargetRegion = true;
11266 }
11267 
11268 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11269                                              const VarDecl *NativeParam,
11270                                              const VarDecl *TargetParam) const {
11271   return CGF.GetAddrOfLocalVar(NativeParam);
11272 }
11273 
11274 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11275                                                    const VarDecl *VD) {
11276   if (!VD)
11277     return Address::invalid();
11278   Address UntiedAddr = Address::invalid();
11279   Address UntiedRealAddr = Address::invalid();
11280   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11281   if (It != FunctionToUntiedTaskStackMap.end()) {
11282     const UntiedLocalVarsAddressesMap &UntiedData =
11283         UntiedLocalVarsStack[It->second];
11284     auto I = UntiedData.find(VD);
11285     if (I != UntiedData.end()) {
11286       UntiedAddr = I->second.first;
11287       UntiedRealAddr = I->second.second;
11288     }
11289   }
11290   const VarDecl *CVD = VD->getCanonicalDecl();
11291   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11292     // Use the default allocation.
11293     if (!isAllocatableDecl(VD))
11294       return UntiedAddr;
11295     llvm::Value *Size;
11296     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11297     if (CVD->getType()->isVariablyModifiedType()) {
11298       Size = CGF.getTypeSize(CVD->getType());
11299       // Align the size: ((size + align - 1) / align) * align
11300       Size = CGF.Builder.CreateNUWAdd(
11301           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11302       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11303       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11304     } else {
11305       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11306       Size = CGM.getSize(Sz.alignTo(Align));
11307     }
11308     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11309     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11310     assert(AA->getAllocator() &&
11311            "Expected allocator expression for non-default allocator.");
11312     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11313     // According to the standard, the original allocator type is a enum
11314     // (integer). Convert to pointer type, if required.
11315     Allocator = CGF.EmitScalarConversion(
11316         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
11317         AA->getAllocator()->getExprLoc());
11318     llvm::Value *Args[] = {ThreadID, Size, Allocator};
11319 
11320     llvm::Value *Addr =
11321         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11322                                 CGM.getModule(), OMPRTL___kmpc_alloc),
11323                             Args, getName({CVD->getName(), ".void.addr"}));
11324     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11325         CGM.getModule(), OMPRTL___kmpc_free);
11326     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11327     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11328         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11329     if (UntiedAddr.isValid())
11330       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11331 
11332     // Cleanup action for allocate support.
11333     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11334       llvm::FunctionCallee RTLFn;
11335       unsigned LocEncoding;
11336       Address Addr;
11337       const Expr *Allocator;
11338 
11339     public:
11340       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
11341                            Address Addr, const Expr *Allocator)
11342           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11343             Allocator(Allocator) {}
11344       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11345         if (!CGF.HaveInsertPoint())
11346           return;
11347         llvm::Value *Args[3];
11348         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11349             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11350         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11351             Addr.getPointer(), CGF.VoidPtrTy);
11352         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
11353         // According to the standard, the original allocator type is a enum
11354         // (integer). Convert to pointer type, if required.
11355         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11356                                             CGF.getContext().VoidPtrTy,
11357                                             Allocator->getExprLoc());
11358         Args[2] = AllocVal;
11359 
11360         CGF.EmitRuntimeCall(RTLFn, Args);
11361       }
11362     };
11363     Address VDAddr =
11364         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
11365     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11366         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11367         VDAddr, AA->getAllocator());
11368     if (UntiedRealAddr.isValid())
11369       if (auto *Region =
11370               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11371         Region->emitUntiedSwitch(CGF);
11372     return VDAddr;
11373   }
11374   return UntiedAddr;
11375 }
11376 
11377 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11378                                              const VarDecl *VD) const {
11379   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11380   if (It == FunctionToUntiedTaskStackMap.end())
11381     return false;
11382   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11383 }
11384 
11385 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11386     CodeGenModule &CGM, const OMPLoopDirective &S)
11387     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11388   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11389   if (!NeedToPush)
11390     return;
11391   NontemporalDeclsSet &DS =
11392       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11393   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11394     for (const Stmt *Ref : C->private_refs()) {
11395       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11396       const ValueDecl *VD;
11397       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11398         VD = DRE->getDecl();
11399       } else {
11400         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11401         assert((ME->isImplicitCXXThis() ||
11402                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11403                "Expected member of current class.");
11404         VD = ME->getMemberDecl();
11405       }
11406       DS.insert(VD);
11407     }
11408   }
11409 }
11410 
11411 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11412   if (!NeedToPush)
11413     return;
11414   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11415 }
11416 
11417 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11418     CodeGenFunction &CGF,
11419     const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
11420                          std::pair<Address, Address>> &LocalVars)
11421     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11422   if (!NeedToPush)
11423     return;
11424   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11425       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11426   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11427 }
11428 
11429 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11430   if (!NeedToPush)
11431     return;
11432   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11433 }
11434 
11435 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11436   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11437 
11438   return llvm::any_of(
11439       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11440       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11441 }
11442 
11443 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11444     const OMPExecutableDirective &S,
11445     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11446     const {
11447   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11448   // Vars in target/task regions must be excluded completely.
11449   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11450       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11451     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11452     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11453     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11454     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11455       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11456         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11457     }
11458   }
11459   // Exclude vars in private clauses.
11460   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11461     for (const Expr *Ref : C->varlists()) {
11462       if (!Ref->getType()->isScalarType())
11463         continue;
11464       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11465       if (!DRE)
11466         continue;
11467       NeedToCheckForLPCs.insert(DRE->getDecl());
11468     }
11469   }
11470   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11471     for (const Expr *Ref : C->varlists()) {
11472       if (!Ref->getType()->isScalarType())
11473         continue;
11474       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11475       if (!DRE)
11476         continue;
11477       NeedToCheckForLPCs.insert(DRE->getDecl());
11478     }
11479   }
11480   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11481     for (const Expr *Ref : C->varlists()) {
11482       if (!Ref->getType()->isScalarType())
11483         continue;
11484       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11485       if (!DRE)
11486         continue;
11487       NeedToCheckForLPCs.insert(DRE->getDecl());
11488     }
11489   }
11490   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11491     for (const Expr *Ref : C->varlists()) {
11492       if (!Ref->getType()->isScalarType())
11493         continue;
11494       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11495       if (!DRE)
11496         continue;
11497       NeedToCheckForLPCs.insert(DRE->getDecl());
11498     }
11499   }
11500   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11501     for (const Expr *Ref : C->varlists()) {
11502       if (!Ref->getType()->isScalarType())
11503         continue;
11504       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11505       if (!DRE)
11506         continue;
11507       NeedToCheckForLPCs.insert(DRE->getDecl());
11508     }
11509   }
11510   for (const Decl *VD : NeedToCheckForLPCs) {
11511     for (const LastprivateConditionalData &Data :
11512          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11513       if (Data.DeclToUniqueName.count(VD) > 0) {
11514         if (!Data.Disabled)
11515           NeedToAddForLPCsAsDisabled.insert(VD);
11516         break;
11517       }
11518     }
11519   }
11520 }
11521 
11522 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11523     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11524     : CGM(CGF.CGM),
11525       Action((CGM.getLangOpts().OpenMP >= 50 &&
11526               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11527                            [](const OMPLastprivateClause *C) {
11528                              return C->getKind() ==
11529                                     OMPC_LASTPRIVATE_conditional;
11530                            }))
11531                  ? ActionToDo::PushAsLastprivateConditional
11532                  : ActionToDo::DoNotPush) {
11533   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11534   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11535     return;
11536   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11537          "Expected a push action.");
11538   LastprivateConditionalData &Data =
11539       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11540   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11541     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11542       continue;
11543 
11544     for (const Expr *Ref : C->varlists()) {
11545       Data.DeclToUniqueName.insert(std::make_pair(
11546           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11547           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11548     }
11549   }
11550   Data.IVLVal = IVLVal;
11551   Data.Fn = CGF.CurFn;
11552 }
11553 
11554 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11555     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11556     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11557   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11558   if (CGM.getLangOpts().OpenMP < 50)
11559     return;
11560   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11561   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11562   if (!NeedToAddForLPCsAsDisabled.empty()) {
11563     Action = ActionToDo::DisableLastprivateConditional;
11564     LastprivateConditionalData &Data =
11565         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11566     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11567       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11568     Data.Fn = CGF.CurFn;
11569     Data.Disabled = true;
11570   }
11571 }
11572 
11573 CGOpenMPRuntime::LastprivateConditionalRAII
11574 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11575     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11576   return LastprivateConditionalRAII(CGF, S);
11577 }
11578 
11579 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11580   if (CGM.getLangOpts().OpenMP < 50)
11581     return;
11582   if (Action == ActionToDo::DisableLastprivateConditional) {
11583     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11584            "Expected list of disabled private vars.");
11585     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11586   }
11587   if (Action == ActionToDo::PushAsLastprivateConditional) {
11588     assert(
11589         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11590         "Expected list of lastprivate conditional vars.");
11591     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11592   }
11593 }
11594 
11595 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11596                                                         const VarDecl *VD) {
11597   ASTContext &C = CGM.getContext();
11598   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11599   if (I == LastprivateConditionalToTypes.end())
11600     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11601   QualType NewType;
11602   const FieldDecl *VDField;
11603   const FieldDecl *FiredField;
11604   LValue BaseLVal;
11605   auto VI = I->getSecond().find(VD);
11606   if (VI == I->getSecond().end()) {
11607     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11608     RD->startDefinition();
11609     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11610     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11611     RD->completeDefinition();
11612     NewType = C.getRecordType(RD);
11613     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11614     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11615     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11616   } else {
11617     NewType = std::get<0>(VI->getSecond());
11618     VDField = std::get<1>(VI->getSecond());
11619     FiredField = std::get<2>(VI->getSecond());
11620     BaseLVal = std::get<3>(VI->getSecond());
11621   }
11622   LValue FiredLVal =
11623       CGF.EmitLValueForField(BaseLVal, FiredField);
11624   CGF.EmitStoreOfScalar(
11625       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11626       FiredLVal);
11627   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11628 }
11629 
11630 namespace {
11631 /// Checks if the lastprivate conditional variable is referenced in LHS.
11632 class LastprivateConditionalRefChecker final
11633     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11634   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11635   const Expr *FoundE = nullptr;
11636   const Decl *FoundD = nullptr;
11637   StringRef UniqueDeclName;
11638   LValue IVLVal;
11639   llvm::Function *FoundFn = nullptr;
11640   SourceLocation Loc;
11641 
11642 public:
11643   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11644     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11645          llvm::reverse(LPM)) {
11646       auto It = D.DeclToUniqueName.find(E->getDecl());
11647       if (It == D.DeclToUniqueName.end())
11648         continue;
11649       if (D.Disabled)
11650         return false;
11651       FoundE = E;
11652       FoundD = E->getDecl()->getCanonicalDecl();
11653       UniqueDeclName = It->second;
11654       IVLVal = D.IVLVal;
11655       FoundFn = D.Fn;
11656       break;
11657     }
11658     return FoundE == E;
11659   }
11660   bool VisitMemberExpr(const MemberExpr *E) {
11661     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11662       return false;
11663     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11664          llvm::reverse(LPM)) {
11665       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11666       if (It == D.DeclToUniqueName.end())
11667         continue;
11668       if (D.Disabled)
11669         return false;
11670       FoundE = E;
11671       FoundD = E->getMemberDecl()->getCanonicalDecl();
11672       UniqueDeclName = It->second;
11673       IVLVal = D.IVLVal;
11674       FoundFn = D.Fn;
11675       break;
11676     }
11677     return FoundE == E;
11678   }
11679   bool VisitStmt(const Stmt *S) {
11680     for (const Stmt *Child : S->children()) {
11681       if (!Child)
11682         continue;
11683       if (const auto *E = dyn_cast<Expr>(Child))
11684         if (!E->isGLValue())
11685           continue;
11686       if (Visit(Child))
11687         return true;
11688     }
11689     return false;
11690   }
11691   explicit LastprivateConditionalRefChecker(
11692       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11693       : LPM(LPM) {}
11694   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11695   getFoundData() const {
11696     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11697   }
11698 };
11699 } // namespace
11700 
11701 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11702                                                        LValue IVLVal,
11703                                                        StringRef UniqueDeclName,
11704                                                        LValue LVal,
11705                                                        SourceLocation Loc) {
11706   // Last updated loop counter for the lastprivate conditional var.
11707   // int<xx> last_iv = 0;
11708   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11709   llvm::Constant *LastIV =
11710       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11711   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11712       IVLVal.getAlignment().getAsAlign());
11713   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11714 
11715   // Last value of the lastprivate conditional.
11716   // decltype(priv_a) last_a;
11717   llvm::Constant *Last = getOrCreateInternalVariable(
11718       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11719   cast<llvm::GlobalVariable>(Last)->setAlignment(
11720       LVal.getAlignment().getAsAlign());
11721   LValue LastLVal =
11722       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11723 
11724   // Global loop counter. Required to handle inner parallel-for regions.
11725   // iv
11726   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11727 
11728   // #pragma omp critical(a)
11729   // if (last_iv <= iv) {
11730   //   last_iv = iv;
11731   //   last_a = priv_a;
11732   // }
11733   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11734                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11735     Action.Enter(CGF);
11736     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11737     // (last_iv <= iv) ? Check if the variable is updated and store new
11738     // value in global var.
11739     llvm::Value *CmpRes;
11740     if (IVLVal.getType()->isSignedIntegerType()) {
11741       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11742     } else {
11743       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11744              "Loop iteration variable must be integer.");
11745       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11746     }
11747     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11748     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11749     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11750     // {
11751     CGF.EmitBlock(ThenBB);
11752 
11753     //   last_iv = iv;
11754     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11755 
11756     //   last_a = priv_a;
11757     switch (CGF.getEvaluationKind(LVal.getType())) {
11758     case TEK_Scalar: {
11759       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11760       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11761       break;
11762     }
11763     case TEK_Complex: {
11764       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11765       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11766       break;
11767     }
11768     case TEK_Aggregate:
11769       llvm_unreachable(
11770           "Aggregates are not supported in lastprivate conditional.");
11771     }
11772     // }
11773     CGF.EmitBranch(ExitBB);
11774     // There is no need to emit line number for unconditional branch.
11775     (void)ApplyDebugLocation::CreateEmpty(CGF);
11776     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11777   };
11778 
11779   if (CGM.getLangOpts().OpenMPSimd) {
11780     // Do not emit as a critical region as no parallel region could be emitted.
11781     RegionCodeGenTy ThenRCG(CodeGen);
11782     ThenRCG(CGF);
11783   } else {
11784     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11785   }
11786 }
11787 
11788 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11789                                                          const Expr *LHS) {
11790   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11791     return;
11792   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11793   if (!Checker.Visit(LHS))
11794     return;
11795   const Expr *FoundE;
11796   const Decl *FoundD;
11797   StringRef UniqueDeclName;
11798   LValue IVLVal;
11799   llvm::Function *FoundFn;
11800   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11801       Checker.getFoundData();
11802   if (FoundFn != CGF.CurFn) {
11803     // Special codegen for inner parallel regions.
11804     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11805     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11806     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11807            "Lastprivate conditional is not found in outer region.");
11808     QualType StructTy = std::get<0>(It->getSecond());
11809     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11810     LValue PrivLVal = CGF.EmitLValue(FoundE);
11811     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11812         PrivLVal.getAddress(CGF),
11813         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11814     LValue BaseLVal =
11815         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11816     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11817     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11818                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11819                         FiredLVal, llvm::AtomicOrdering::Unordered,
11820                         /*IsVolatile=*/true, /*isInit=*/false);
11821     return;
11822   }
11823 
11824   // Private address of the lastprivate conditional in the current context.
11825   // priv_a
11826   LValue LVal = CGF.EmitLValue(FoundE);
11827   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11828                                    FoundE->getExprLoc());
11829 }
11830 
11831 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11832     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11833     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11834   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11835     return;
11836   auto Range = llvm::reverse(LastprivateConditionalStack);
11837   auto It = llvm::find_if(
11838       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11839   if (It == Range.end() || It->Fn != CGF.CurFn)
11840     return;
11841   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11842   assert(LPCI != LastprivateConditionalToTypes.end() &&
11843          "Lastprivates must be registered already.");
11844   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11845   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11846   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11847   for (const auto &Pair : It->DeclToUniqueName) {
11848     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11849     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11850       continue;
11851     auto I = LPCI->getSecond().find(Pair.first);
11852     assert(I != LPCI->getSecond().end() &&
11853            "Lastprivate must be rehistered already.");
11854     // bool Cmp = priv_a.Fired != 0;
11855     LValue BaseLVal = std::get<3>(I->getSecond());
11856     LValue FiredLVal =
11857         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11858     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11859     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11860     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11861     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11862     // if (Cmp) {
11863     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11864     CGF.EmitBlock(ThenBB);
11865     Address Addr = CGF.GetAddrOfLocalVar(VD);
11866     LValue LVal;
11867     if (VD->getType()->isReferenceType())
11868       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11869                                            AlignmentSource::Decl);
11870     else
11871       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11872                                 AlignmentSource::Decl);
11873     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11874                                      D.getBeginLoc());
11875     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11876     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11877     // }
11878   }
11879 }
11880 
11881 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11882     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11883     SourceLocation Loc) {
11884   if (CGF.getLangOpts().OpenMP < 50)
11885     return;
11886   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11887   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11888          "Unknown lastprivate conditional variable.");
11889   StringRef UniqueName = It->second;
11890   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11891   // The variable was not updated in the region - exit.
11892   if (!GV)
11893     return;
11894   LValue LPLVal = CGF.MakeAddrLValue(
11895       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11896   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11897   CGF.EmitStoreOfScalar(Res, PrivLVal);
11898 }
11899 
11900 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11901     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11902     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11903   llvm_unreachable("Not supported in SIMD-only mode");
11904 }
11905 
11906 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11907     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11908     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11909   llvm_unreachable("Not supported in SIMD-only mode");
11910 }
11911 
11912 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11913     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11914     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11915     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11916     bool Tied, unsigned &NumberOfParts) {
11917   llvm_unreachable("Not supported in SIMD-only mode");
11918 }
11919 
11920 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11921                                            SourceLocation Loc,
11922                                            llvm::Function *OutlinedFn,
11923                                            ArrayRef<llvm::Value *> CapturedVars,
11924                                            const Expr *IfCond) {
11925   llvm_unreachable("Not supported in SIMD-only mode");
11926 }
11927 
11928 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11929     CodeGenFunction &CGF, StringRef CriticalName,
11930     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11931     const Expr *Hint) {
11932   llvm_unreachable("Not supported in SIMD-only mode");
11933 }
11934 
11935 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11936                                            const RegionCodeGenTy &MasterOpGen,
11937                                            SourceLocation Loc) {
11938   llvm_unreachable("Not supported in SIMD-only mode");
11939 }
11940 
11941 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11942                                             SourceLocation Loc) {
11943   llvm_unreachable("Not supported in SIMD-only mode");
11944 }
11945 
11946 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11947     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11948     SourceLocation Loc) {
11949   llvm_unreachable("Not supported in SIMD-only mode");
11950 }
11951 
11952 void CGOpenMPSIMDRuntime::emitSingleRegion(
11953     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11954     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11955     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11956     ArrayRef<const Expr *> AssignmentOps) {
11957   llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959 
11960 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11961                                             const RegionCodeGenTy &OrderedOpGen,
11962                                             SourceLocation Loc,
11963                                             bool IsThreads) {
11964   llvm_unreachable("Not supported in SIMD-only mode");
11965 }
11966 
11967 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11968                                           SourceLocation Loc,
11969                                           OpenMPDirectiveKind Kind,
11970                                           bool EmitChecks,
11971                                           bool ForceSimpleCall) {
11972   llvm_unreachable("Not supported in SIMD-only mode");
11973 }
11974 
11975 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11976     CodeGenFunction &CGF, SourceLocation Loc,
11977     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11978     bool Ordered, const DispatchRTInput &DispatchValues) {
11979   llvm_unreachable("Not supported in SIMD-only mode");
11980 }
11981 
11982 void CGOpenMPSIMDRuntime::emitForStaticInit(
11983     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11984     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11985   llvm_unreachable("Not supported in SIMD-only mode");
11986 }
11987 
11988 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11989     CodeGenFunction &CGF, SourceLocation Loc,
11990     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11991   llvm_unreachable("Not supported in SIMD-only mode");
11992 }
11993 
11994 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11995                                                      SourceLocation Loc,
11996                                                      unsigned IVSize,
11997                                                      bool IVSigned) {
11998   llvm_unreachable("Not supported in SIMD-only mode");
11999 }
12000 
12001 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12002                                               SourceLocation Loc,
12003                                               OpenMPDirectiveKind DKind) {
12004   llvm_unreachable("Not supported in SIMD-only mode");
12005 }
12006 
12007 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12008                                               SourceLocation Loc,
12009                                               unsigned IVSize, bool IVSigned,
12010                                               Address IL, Address LB,
12011                                               Address UB, Address ST) {
12012   llvm_unreachable("Not supported in SIMD-only mode");
12013 }
12014 
12015 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12016                                                llvm::Value *NumThreads,
12017                                                SourceLocation Loc) {
12018   llvm_unreachable("Not supported in SIMD-only mode");
12019 }
12020 
12021 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12022                                              ProcBindKind ProcBind,
12023                                              SourceLocation Loc) {
12024   llvm_unreachable("Not supported in SIMD-only mode");
12025 }
12026 
12027 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12028                                                     const VarDecl *VD,
12029                                                     Address VDAddr,
12030                                                     SourceLocation Loc) {
12031   llvm_unreachable("Not supported in SIMD-only mode");
12032 }
12033 
12034 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12035     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12036     CodeGenFunction *CGF) {
12037   llvm_unreachable("Not supported in SIMD-only mode");
12038 }
12039 
12040 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12041     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12042   llvm_unreachable("Not supported in SIMD-only mode");
12043 }
12044 
12045 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12046                                     ArrayRef<const Expr *> Vars,
12047                                     SourceLocation Loc,
12048                                     llvm::AtomicOrdering AO) {
12049   llvm_unreachable("Not supported in SIMD-only mode");
12050 }
12051 
12052 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12053                                        const OMPExecutableDirective &D,
12054                                        llvm::Function *TaskFunction,
12055                                        QualType SharedsTy, Address Shareds,
12056                                        const Expr *IfCond,
12057                                        const OMPTaskDataTy &Data) {
12058   llvm_unreachable("Not supported in SIMD-only mode");
12059 }
12060 
12061 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12062     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12063     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12064     const Expr *IfCond, const OMPTaskDataTy &Data) {
12065   llvm_unreachable("Not supported in SIMD-only mode");
12066 }
12067 
12068 void CGOpenMPSIMDRuntime::emitReduction(
12069     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12070     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12071     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12072   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12073   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12074                                  ReductionOps, Options);
12075 }
12076 
12077 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12078     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12079     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12080   llvm_unreachable("Not supported in SIMD-only mode");
12081 }
12082 
12083 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12084                                                 SourceLocation Loc,
12085                                                 bool IsWorksharingReduction) {
12086   llvm_unreachable("Not supported in SIMD-only mode");
12087 }
12088 
12089 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12090                                                   SourceLocation Loc,
12091                                                   ReductionCodeGen &RCG,
12092                                                   unsigned N) {
12093   llvm_unreachable("Not supported in SIMD-only mode");
12094 }
12095 
12096 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12097                                                   SourceLocation Loc,
12098                                                   llvm::Value *ReductionsPtr,
12099                                                   LValue SharedLVal) {
12100   llvm_unreachable("Not supported in SIMD-only mode");
12101 }
12102 
12103 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12104                                            SourceLocation Loc) {
12105   llvm_unreachable("Not supported in SIMD-only mode");
12106 }
12107 
12108 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12109     CodeGenFunction &CGF, SourceLocation Loc,
12110     OpenMPDirectiveKind CancelRegion) {
12111   llvm_unreachable("Not supported in SIMD-only mode");
12112 }
12113 
12114 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12115                                          SourceLocation Loc, const Expr *IfCond,
12116                                          OpenMPDirectiveKind CancelRegion) {
12117   llvm_unreachable("Not supported in SIMD-only mode");
12118 }
12119 
12120 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12121     const OMPExecutableDirective &D, StringRef ParentName,
12122     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12123     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12124   llvm_unreachable("Not supported in SIMD-only mode");
12125 }
12126 
12127 void CGOpenMPSIMDRuntime::emitTargetCall(
12128     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12129     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12130     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12131     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12132                                      const OMPLoopDirective &D)>
12133         SizeEmitter) {
12134   llvm_unreachable("Not supported in SIMD-only mode");
12135 }
12136 
12137 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12138   llvm_unreachable("Not supported in SIMD-only mode");
12139 }
12140 
12141 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12142   llvm_unreachable("Not supported in SIMD-only mode");
12143 }
12144 
12145 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12146   return false;
12147 }
12148 
12149 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12150                                         const OMPExecutableDirective &D,
12151                                         SourceLocation Loc,
12152                                         llvm::Function *OutlinedFn,
12153                                         ArrayRef<llvm::Value *> CapturedVars) {
12154   llvm_unreachable("Not supported in SIMD-only mode");
12155 }
12156 
12157 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12158                                              const Expr *NumTeams,
12159                                              const Expr *ThreadLimit,
12160                                              SourceLocation Loc) {
12161   llvm_unreachable("Not supported in SIMD-only mode");
12162 }
12163 
12164 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12165     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12166     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12167   llvm_unreachable("Not supported in SIMD-only mode");
12168 }
12169 
12170 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12171     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12172     const Expr *Device) {
12173   llvm_unreachable("Not supported in SIMD-only mode");
12174 }
12175 
12176 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12177                                            const OMPLoopDirective &D,
12178                                            ArrayRef<Expr *> NumIterations) {
12179   llvm_unreachable("Not supported in SIMD-only mode");
12180 }
12181 
12182 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12183                                               const OMPDependClause *C) {
12184   llvm_unreachable("Not supported in SIMD-only mode");
12185 }
12186 
12187 const VarDecl *
12188 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12189                                         const VarDecl *NativeParam) const {
12190   llvm_unreachable("Not supported in SIMD-only mode");
12191 }
12192 
12193 Address
12194 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12195                                          const VarDecl *NativeParam,
12196                                          const VarDecl *TargetParam) const {
12197   llvm_unreachable("Not supported in SIMD-only mode");
12198 }
12199