1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
573 /// region.
574 class CleanupTy final : public EHScopeStack::Cleanup {
575   PrePostActionTy *Action;
576 
577 public:
578   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
579   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
580     if (!CGF.HaveInsertPoint())
581       return;
582     Action->Exit(CGF);
583   }
584 };
585 
586 } // anonymous namespace
587 
588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
589   CodeGenFunction::RunCleanupsScope Scope(CGF);
590   if (PrePostAction) {
591     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
592     Callback(CodeGen, CGF, *PrePostAction);
593   } else {
594     PrePostActionTy Action;
595     Callback(CodeGen, CGF, Action);
596   }
597 }
598 
599 /// Check if the combiner is a call to UDR combiner and if it is so return the
600 /// UDR decl used for reduction.
601 static const OMPDeclareReductionDecl *
602 getReductionInit(const Expr *ReductionOp) {
603   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
604     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
605       if (const auto *DRE =
606               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
607         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
608           return DRD;
609   return nullptr;
610 }
611 
612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
613                                              const OMPDeclareReductionDecl *DRD,
614                                              const Expr *InitOp,
615                                              Address Private, Address Original,
616                                              QualType Ty) {
617   if (DRD->getInitializer()) {
618     std::pair<llvm::Function *, llvm::Function *> Reduction =
619         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
620     const auto *CE = cast<CallExpr>(InitOp);
621     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
622     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
623     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
624     const auto *LHSDRE =
625         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
626     const auto *RHSDRE =
627         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
628     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
629     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
630                             [=]() { return Private; });
631     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
632                             [=]() { return Original; });
633     (void)PrivateScope.Privatize();
634     RValue Func = RValue::get(Reduction.second);
635     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
636     CGF.EmitIgnoredExpr(InitOp);
637   } else {
638     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
639     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
640     auto *GV = new llvm::GlobalVariable(
641         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
642         llvm::GlobalValue::PrivateLinkage, Init, Name);
643     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
644     RValue InitRVal;
645     switch (CGF.getEvaluationKind(Ty)) {
646     case TEK_Scalar:
647       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
648       break;
649     case TEK_Complex:
650       InitRVal =
651           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
652       break;
653     case TEK_Aggregate:
654       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
655       break;
656     }
657     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
658     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
659     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
660                          /*IsInitializer=*/false);
661   }
662 }
663 
664 /// Emit initialization of arrays of complex types.
665 /// \param DestAddr Address of the array.
666 /// \param Type Type of array.
667 /// \param Init Initial expression of array.
668 /// \param SrcAddr Address of the original array.
669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
670                                  QualType Type, bool EmitDeclareReductionInit,
671                                  const Expr *Init,
672                                  const OMPDeclareReductionDecl *DRD,
673                                  Address SrcAddr = Address::invalid()) {
674   // Perform element-by-element initialization.
675   QualType ElementTy;
676 
677   // Drill down to the base element type on both arrays.
678   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
679   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
680   DestAddr =
681       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
682   if (DRD)
683     SrcAddr =
684         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
685 
686   llvm::Value *SrcBegin = nullptr;
687   if (DRD)
688     SrcBegin = SrcAddr.getPointer();
689   llvm::Value *DestBegin = DestAddr.getPointer();
690   // Cast from pointer to array type to pointer to single element.
691   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
692   // The basic structure here is a while-do loop.
693   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
694   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
695   llvm::Value *IsEmpty =
696       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
697   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
698 
699   // Enter the loop body, making that address the current address.
700   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
701   CGF.EmitBlock(BodyBB);
702 
703   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
704 
705   llvm::PHINode *SrcElementPHI = nullptr;
706   Address SrcElementCurrent = Address::invalid();
707   if (DRD) {
708     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
709                                           "omp.arraycpy.srcElementPast");
710     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
711     SrcElementCurrent =
712         Address(SrcElementPHI,
713                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
714   }
715   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
716       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
717   DestElementPHI->addIncoming(DestBegin, EntryBB);
718   Address DestElementCurrent =
719       Address(DestElementPHI,
720               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
721 
722   // Emit copy.
723   {
724     CodeGenFunction::RunCleanupsScope InitScope(CGF);
725     if (EmitDeclareReductionInit) {
726       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
727                                        SrcElementCurrent, ElementTy);
728     } else
729       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
730                            /*IsInitializer=*/false);
731   }
732 
733   if (DRD) {
734     // Shift the address forward by one element.
735     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
736         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
737     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
738   }
739 
740   // Shift the address forward by one element.
741   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
742       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
743   // Check whether we've reached the end.
744   llvm::Value *Done =
745       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
746   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
747   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
748 
749   // Done.
750   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
751 }
752 
753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
754   return CGF.EmitOMPSharedLValue(E);
755 }
756 
757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
758                                             const Expr *E) {
759   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
760     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
761   return LValue();
762 }
763 
764 void ReductionCodeGen::emitAggregateInitialization(
765     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
766     const OMPDeclareReductionDecl *DRD) {
767   // Emit VarDecl with copy init for arrays.
768   // Get the address of the original variable captured in current
769   // captured region.
770   const auto *PrivateVD =
771       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
772   bool EmitDeclareReductionInit =
773       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
774   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
775                        EmitDeclareReductionInit,
776                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
777                                                 : PrivateVD->getInit(),
778                        DRD, SharedLVal.getAddress(CGF));
779 }
780 
781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
782                                    ArrayRef<const Expr *> Origs,
783                                    ArrayRef<const Expr *> Privates,
784                                    ArrayRef<const Expr *> ReductionOps) {
785   ClausesData.reserve(Shareds.size());
786   SharedAddresses.reserve(Shareds.size());
787   Sizes.reserve(Shareds.size());
788   BaseDecls.reserve(Shareds.size());
789   const auto *IOrig = Origs.begin();
790   const auto *IPriv = Privates.begin();
791   const auto *IRed = ReductionOps.begin();
792   for (const Expr *Ref : Shareds) {
793     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
794     std::advance(IOrig, 1);
795     std::advance(IPriv, 1);
796     std::advance(IRed, 1);
797   }
798 }
799 
800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
801   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
802          "Number of generated lvalues must be exactly N.");
803   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
804   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
805   SharedAddresses.emplace_back(First, Second);
806   if (ClausesData[N].Shared == ClausesData[N].Ref) {
807     OrigAddresses.emplace_back(First, Second);
808   } else {
809     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
810     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
811     OrigAddresses.emplace_back(First, Second);
812   }
813 }
814 
815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
816   const auto *PrivateVD =
817       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
818   QualType PrivateType = PrivateVD->getType();
819   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
820   if (!PrivateType->isVariablyModifiedType()) {
821     Sizes.emplace_back(
822         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
823         nullptr);
824     return;
825   }
826   llvm::Value *Size;
827   llvm::Value *SizeInChars;
828   auto *ElemType =
829       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
830           ->getElementType();
831   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
832   if (AsArraySection) {
833     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
834                                      OrigAddresses[N].first.getPointer(CGF));
835     Size = CGF.Builder.CreateNUWAdd(
836         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
837     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
838   } else {
839     SizeInChars =
840         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
841     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
842   }
843   Sizes.emplace_back(SizeInChars, Size);
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
853                                          llvm::Value *Size) {
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   QualType PrivateType = PrivateVD->getType();
857   if (!PrivateType->isVariablyModifiedType()) {
858     assert(!Size && !Sizes[N].second &&
859            "Size should be nullptr for non-variably modified reduction "
860            "items.");
861     return;
862   }
863   CodeGenFunction::OpaqueValueMapping OpaqueMap(
864       CGF,
865       cast<OpaqueValueExpr>(
866           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
867       RValue::get(Size));
868   CGF.EmitVariablyModifiedType(PrivateType);
869 }
870 
871 void ReductionCodeGen::emitInitialization(
872     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
873     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
874   assert(SharedAddresses.size() > N && "No variable was generated");
875   const auto *PrivateVD =
876       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
877   const OMPDeclareReductionDecl *DRD =
878       getReductionInit(ClausesData[N].ReductionOp);
879   QualType PrivateType = PrivateVD->getType();
880   PrivateAddr = CGF.Builder.CreateElementBitCast(
881       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
882   QualType SharedType = SharedAddresses[N].first.getType();
883   SharedLVal = CGF.MakeAddrLValue(
884       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
885                                        CGF.ConvertTypeForMem(SharedType)),
886       SharedType, SharedAddresses[N].first.getBaseInfo(),
887       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
888   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
889     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
890   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
891     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
892                                      PrivateAddr, SharedLVal.getAddress(CGF),
893                                      SharedLVal.getType());
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   const auto *PrivateVD =
904       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
905   QualType PrivateType = PrivateVD->getType();
906   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
907   return DTorKind != QualType::DK_none;
908 }
909 
910 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
911                                     Address PrivateAddr) {
912   const auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916   if (needCleanups(N)) {
917     PrivateAddr = CGF.Builder.CreateElementBitCast(
918         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
919     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
920   }
921 }
922 
923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
924                           LValue BaseLV) {
925   BaseTy = BaseTy.getNonReferenceType();
926   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
927          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
928     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
929       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
930     } else {
931       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
932       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
933     }
934     BaseTy = BaseTy->getPointeeType();
935   }
936   return CGF.MakeAddrLValue(
937       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
938                                        CGF.ConvertTypeForMem(ElTy)),
939       BaseLV.getType(), BaseLV.getBaseInfo(),
940       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
941 }
942 
943 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
944                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
945                           llvm::Value *Addr) {
946   Address Tmp = Address::invalid();
947   Address TopTmp = Address::invalid();
948   Address MostTopTmp = Address::invalid();
949   BaseTy = BaseTy.getNonReferenceType();
950   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
951          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
952     Tmp = CGF.CreateMemTemp(BaseTy);
953     if (TopTmp.isValid())
954       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
955     else
956       MostTopTmp = Tmp;
957     TopTmp = Tmp;
958     BaseTy = BaseTy->getPointeeType();
959   }
960   llvm::Type *Ty = BaseLVType;
961   if (Tmp.isValid())
962     Ty = Tmp.getElementType();
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
964   if (Tmp.isValid()) {
965     CGF.Builder.CreateStore(Addr, Tmp);
966     return MostTopTmp;
967   }
968   return Address(Addr, BaseLVAlignment);
969 }
970 
971 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
972   const VarDecl *OrigVD = nullptr;
973   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
974     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
975     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
976       Base = TempOASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
978       Base = TempASE->getBase()->IgnoreParenImpCasts();
979     DE = cast<DeclRefExpr>(Base);
980     OrigVD = cast<VarDecl>(DE->getDecl());
981   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
982     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
984       Base = TempASE->getBase()->IgnoreParenImpCasts();
985     DE = cast<DeclRefExpr>(Base);
986     OrigVD = cast<VarDecl>(DE->getDecl());
987   }
988   return OrigVD;
989 }
990 
991 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
992                                                Address PrivateAddr) {
993   const DeclRefExpr *DE;
994   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
995     BaseDecls.emplace_back(OrigVD);
996     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
997     LValue BaseLValue =
998         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
999                     OriginalBaseLValue);
1000     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1001         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1002     llvm::Value *PrivatePointer =
1003         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1004             PrivateAddr.getPointer(),
1005             SharedAddresses[N].first.getAddress(CGF).getType());
1006     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1007     return castToBase(CGF, OrigVD->getType(),
1008                       SharedAddresses[N].first.getType(),
1009                       OriginalBaseLValue.getAddress(CGF).getType(),
1010                       OriginalBaseLValue.getAlignment(), Ptr);
1011   }
1012   BaseDecls.emplace_back(
1013       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1014   return PrivateAddr;
1015 }
1016 
1017 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1018   const OMPDeclareReductionDecl *DRD =
1019       getReductionInit(ClausesData[N].ReductionOp);
1020   return DRD && DRD->getInitializer();
1021 }
1022 
1023 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1024   return CGF.EmitLoadOfPointerLValue(
1025       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1026       getThreadIDVariable()->getType()->castAs<PointerType>());
1027 }
1028 
1029 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1030   if (!CGF.HaveInsertPoint())
1031     return;
1032   // 1.2.2 OpenMP Language Terminology
1033   // Structured block - An executable statement with a single entry at the
1034   // top and a single exit at the bottom.
1035   // The point of exit cannot be a branch out of the structured block.
1036   // longjmp() and throw() must not violate the entry/exit criteria.
1037   CGF.EHStack.pushTerminate();
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OffloadEntriesInfoManager(CGM) {
1064   ASTContext &C = CGM.getContext();
1065   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1066   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1067   RD->startDefinition();
1068   // reserved_1
1069   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1070   // flags
1071   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1072   // reserved_2
1073   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1074   // reserved_3
1075   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1076   // psource
1077   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1078   RD->completeDefinition();
1079   IdentQTy = C.getRecordType(RD);
1080   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1081   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1082 
1083   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1084   llvm::omp::types::initializeTypes(CGM.getModule());
1085   loadOffloadInfoMetadata();
1086 }
1087 
1088 void CGOpenMPRuntime::clear() {
1089   InternalVars.clear();
1090   // Clean non-target variable declarations possibly used only in debug info.
1091   for (const auto &Data : EmittedNonTargetVariables) {
1092     if (!Data.getValue().pointsToAliveValue())
1093       continue;
1094     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1095     if (!GV)
1096       continue;
1097     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1098       continue;
1099     GV->eraseFromParent();
1100   }
1101 }
1102 
1103 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1104   SmallString<128> Buffer;
1105   llvm::raw_svector_ostream OS(Buffer);
1106   StringRef Sep = FirstSeparator;
1107   for (StringRef Part : Parts) {
1108     OS << Sep << Part;
1109     Sep = Separator;
1110   }
1111   return std::string(OS.str());
1112 }
1113 
1114 static llvm::Function *
1115 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1116                           const Expr *CombinerInitializer, const VarDecl *In,
1117                           const VarDecl *Out, bool IsCombiner) {
1118   // void .omp_combiner.(Ty *in, Ty *out);
1119   ASTContext &C = CGM.getContext();
1120   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1121   FunctionArgList Args;
1122   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1123                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1125                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   Args.push_back(&OmpOutParm);
1127   Args.push_back(&OmpInParm);
1128   const CGFunctionInfo &FnInfo =
1129       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1130   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1131   std::string Name = CGM.getOpenMPRuntime().getName(
1132       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1133   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1134                                     Name, &CGM.getModule());
1135   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1136   if (CGM.getLangOpts().Optimize) {
1137     Fn->removeFnAttr(llvm::Attribute::NoInline);
1138     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1139     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1140   }
1141   CodeGenFunction CGF(CGM);
1142   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1143   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1144   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1145                     Out->getLocation());
1146   CodeGenFunction::OMPPrivateScope Scope(CGF);
1147   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1148   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1153   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1154     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1155         .getAddress(CGF);
1156   });
1157   (void)Scope.Privatize();
1158   if (!IsCombiner && Out->hasInit() &&
1159       !CGF.isTrivialInitializer(Out->getInit())) {
1160     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1161                          Out->getType().getQualifiers(),
1162                          /*IsInitializer=*/true);
1163   }
1164   if (CombinerInitializer)
1165     CGF.EmitIgnoredExpr(CombinerInitializer);
1166   Scope.ForceCleanup();
1167   CGF.FinishFunction();
1168   return Fn;
1169 }
1170 
1171 void CGOpenMPRuntime::emitUserDefinedReduction(
1172     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1173   if (UDRMap.count(D) > 0)
1174     return;
1175   llvm::Function *Combiner = emitCombinerOrInitializer(
1176       CGM, D->getType(), D->getCombiner(),
1177       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1178       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1179       /*IsCombiner=*/true);
1180   llvm::Function *Initializer = nullptr;
1181   if (const Expr *Init = D->getInitializer()) {
1182     Initializer = emitCombinerOrInitializer(
1183         CGM, D->getType(),
1184         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1185                                                                      : nullptr,
1186         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1187         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1188         /*IsCombiner=*/false);
1189   }
1190   UDRMap.try_emplace(D, Combiner, Initializer);
1191   if (CGF) {
1192     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1193     Decls.second.push_back(D);
1194   }
1195 }
1196 
1197 std::pair<llvm::Function *, llvm::Function *>
1198 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1199   auto I = UDRMap.find(D);
1200   if (I != UDRMap.end())
1201     return I->second;
1202   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1203   return UDRMap.lookup(D);
1204 }
1205 
1206 namespace {
1207 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1208 // Builder if one is present.
1209 struct PushAndPopStackRAII {
1210   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1211                       bool HasCancel)
1212       : OMPBuilder(OMPBuilder) {
1213     if (!OMPBuilder)
1214       return;
1215 
1216     // The following callback is the crucial part of clangs cleanup process.
1217     //
1218     // NOTE:
1219     // Once the OpenMPIRBuilder is used to create parallel regions (and
1220     // similar), the cancellation destination (Dest below) is determined via
1221     // IP. That means if we have variables to finalize we split the block at IP,
1222     // use the new block (=BB) as destination to build a JumpDest (via
1223     // getJumpDestInCurrentScope(BB)) which then is fed to
1224     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1225     // to push & pop an FinalizationInfo object.
1226     // The FiniCB will still be needed but at the point where the
1227     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1228     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1229       assert(IP.getBlock()->end() == IP.getPoint() &&
1230              "Clang CG should cause non-terminated block!");
1231       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1232       CGF.Builder.restoreIP(IP);
1233       CodeGenFunction::JumpDest Dest =
1234           CGF.getOMPCancelDestination(OMPD_parallel);
1235       CGF.EmitBranchThroughCleanup(Dest);
1236     };
1237 
1238     // TODO: Remove this once we emit parallel regions through the
1239     //       OpenMPIRBuilder as it can do this setup internally.
1240     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1241         {FiniCB, OMPD_parallel, HasCancel});
1242     OMPBuilder->pushFinalizationCB(std::move(FI));
1243   }
1244   ~PushAndPopStackRAII() {
1245     if (OMPBuilder)
1246       OMPBuilder->popFinalizationCB();
1247   }
1248   llvm::OpenMPIRBuilder *OMPBuilder;
1249 };
1250 } // namespace
1251 
1252 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1253     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1254     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1255     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1256   assert(ThreadIDVar->getType()->isPointerType() &&
1257          "thread id variable must be of type kmp_int32 *");
1258   CodeGenFunction CGF(CGM, true);
1259   bool HasCancel = false;
1260   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1261     HasCancel = OPD->hasCancel();
1262   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1263     HasCancel = OPD->hasCancel();
1264   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1265     HasCancel = OPSD->hasCancel();
1266   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1267     HasCancel = OPFD->hasCancel();
1268   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275   else if (const auto *OPFD =
1276                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1277     HasCancel = OPFD->hasCancel();
1278 
1279   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1280   //       parallel region to make cancellation barriers work properly.
1281   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1282   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1283   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1284                                     HasCancel, OutlinedHelperName);
1285   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1286   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1287 }
1288 
1289 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1290     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1291     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1292   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1293   return emitParallelOrTeamsOutlinedFunction(
1294       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1295 }
1296 
1297 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1298     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1299     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1300   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1301   return emitParallelOrTeamsOutlinedFunction(
1302       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1303 }
1304 
1305 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1306     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1307     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1308     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1309     bool Tied, unsigned &NumberOfParts) {
1310   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1311                                               PrePostActionTy &) {
1312     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1313     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1314     llvm::Value *TaskArgs[] = {
1315         UpLoc, ThreadID,
1316         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1317                                     TaskTVar->getType()->castAs<PointerType>())
1318             .getPointer(CGF)};
1319     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1320                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1321                         TaskArgs);
1322   };
1323   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1324                                                             UntiedCodeGen);
1325   CodeGen.setAction(Action);
1326   assert(!ThreadIDVar->getType()->isPointerType() &&
1327          "thread id variable must be of type kmp_int32 for tasks");
1328   const OpenMPDirectiveKind Region =
1329       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1330                                                       : OMPD_task;
1331   const CapturedStmt *CS = D.getCapturedStmt(Region);
1332   bool HasCancel = false;
1333   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1336     HasCancel = TD->hasCancel();
1337   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1338     HasCancel = TD->hasCancel();
1339   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1340     HasCancel = TD->hasCancel();
1341 
1342   CodeGenFunction CGF(CGM, true);
1343   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1344                                         InnermostKind, HasCancel, Action);
1345   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1346   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1347   if (!Tied)
1348     NumberOfParts = Action.getNumberOfParts();
1349   return Res;
1350 }
1351 
1352 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1353                              const RecordDecl *RD, const CGRecordLayout &RL,
1354                              ArrayRef<llvm::Constant *> Data) {
1355   llvm::StructType *StructTy = RL.getLLVMType();
1356   unsigned PrevIdx = 0;
1357   ConstantInitBuilder CIBuilder(CGM);
1358   auto DI = Data.begin();
1359   for (const FieldDecl *FD : RD->fields()) {
1360     unsigned Idx = RL.getLLVMFieldNo(FD);
1361     // Fill the alignment.
1362     for (unsigned I = PrevIdx; I < Idx; ++I)
1363       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1364     PrevIdx = Idx + 1;
1365     Fields.add(*DI);
1366     ++DI;
1367   }
1368 }
1369 
1370 template <class... As>
1371 static llvm::GlobalVariable *
1372 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1373                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1374                    As &&... Args) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantInitBuilder CIBuilder(CGM);
1378   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1379   buildStructValue(Fields, CGM, RD, RL, Data);
1380   return Fields.finishAndCreateGlobal(
1381       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1382       std::forward<As>(Args)...);
1383 }
1384 
1385 template <typename T>
1386 static void
1387 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1388                                          ArrayRef<llvm::Constant *> Data,
1389                                          T &Parent) {
1390   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1391   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1392   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1393   buildStructValue(Fields, CGM, RD, RL, Data);
1394   Fields.finishAndAddTo(Parent);
1395 }
1396 
1397 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1398   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1399   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1400   FlagsTy FlagsKey(Flags, Reserved2Flags);
1401   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1402   if (!Entry) {
1403     if (!DefaultOpenMPPSource) {
1404       // Initialize default location for psource field of ident_t structure of
1405       // all ident_t objects. Format is ";file;function;line;column;;".
1406       // Taken from
1407       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1408       DefaultOpenMPPSource =
1409           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1410       DefaultOpenMPPSource =
1411           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1412     }
1413 
1414     llvm::Constant *Data[] = {
1415         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1416         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1417         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1418         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1419     llvm::GlobalValue *DefaultOpenMPLocation =
1420         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1421                            llvm::GlobalValue::PrivateLinkage);
1422     DefaultOpenMPLocation->setUnnamedAddr(
1423         llvm::GlobalValue::UnnamedAddr::Global);
1424 
1425     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1426   }
1427   return Address(Entry, Align);
1428 }
1429 
1430 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1431                                              bool AtCurrentPoint) {
1432   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1433   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1434 
1435   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1436   if (AtCurrentPoint) {
1437     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1438         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1439   } else {
1440     Elem.second.ServiceInsertPt =
1441         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1442     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1443   }
1444 }
1445 
1446 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1447   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1448   if (Elem.second.ServiceInsertPt) {
1449     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1450     Elem.second.ServiceInsertPt = nullptr;
1451     Ptr->eraseFromParent();
1452   }
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1456                                                  SourceLocation Loc,
1457                                                  unsigned Flags) {
1458   Flags |= OMP_IDENT_KMPC;
1459   // If no debug info is generated - return global default location.
1460   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1461       Loc.isInvalid())
1462     return getOrCreateDefaultLocation(Flags).getPointer();
1463 
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 
1466   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1467   Address LocValue = Address::invalid();
1468   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1469   if (I != OpenMPLocThreadIDMap.end())
1470     LocValue = Address(I->second.DebugLoc, Align);
1471 
1472   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1473   // GetOpenMPThreadID was called before this routine.
1474   if (!LocValue.isValid()) {
1475     // Generate "ident_t .kmpc_loc.addr;"
1476     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1477     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1478     Elem.second.DebugLoc = AI.getPointer();
1479     LocValue = AI;
1480 
1481     if (!Elem.second.ServiceInsertPt)
1482       setLocThreadIdInsertPt(CGF);
1483     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1484     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1485     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1486                              CGF.getTypeSize(IdentQTy));
1487   }
1488 
1489   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1490   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1491   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1492   LValue PSource =
1493       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1494 
1495   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1496   if (OMPDebugLoc == nullptr) {
1497     SmallString<128> Buffer2;
1498     llvm::raw_svector_ostream OS2(Buffer2);
1499     // Build debug location
1500     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1501     OS2 << ";" << PLoc.getFilename() << ";";
1502     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1503       OS2 << FD->getQualifiedNameAsString();
1504     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1505     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1506     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1507   }
1508   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1509   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1510 
1511   // Our callers always pass this to a runtime function, so for
1512   // convenience, go ahead and return a naked pointer.
1513   return LocValue.getPointer();
1514 }
1515 
1516 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1517                                           SourceLocation Loc) {
1518   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1519 
1520   llvm::Value *ThreadID = nullptr;
1521   // Check whether we've already cached a load of the thread id in this
1522   // function.
1523   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1524   if (I != OpenMPLocThreadIDMap.end()) {
1525     ThreadID = I->second.ThreadID;
1526     if (ThreadID != nullptr)
1527       return ThreadID;
1528   }
1529   // If exceptions are enabled, do not use parameter to avoid possible crash.
1530   if (auto *OMPRegionInfo =
1531           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1532     if (OMPRegionInfo->getThreadIDVariable()) {
1533       // Check if this an outlined function with thread id passed as argument.
1534       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1535       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1536       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1537           !CGF.getLangOpts().CXXExceptions ||
1538           CGF.Builder.GetInsertBlock() == TopBlock ||
1539           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1540           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1541               TopBlock ||
1542           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1543               CGF.Builder.GetInsertBlock()) {
1544         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1545         // If value loaded in entry block, cache it and use it everywhere in
1546         // function.
1547         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1548           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1549           Elem.second.ThreadID = ThreadID;
1550         }
1551         return ThreadID;
1552       }
1553     }
1554   }
1555 
1556   // This is not an outlined function region - need to call __kmpc_int32
1557   // kmpc_global_thread_num(ident_t *loc).
1558   // Generate thread id value and cache this value for use across the
1559   // function.
1560   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1561   if (!Elem.second.ServiceInsertPt)
1562     setLocThreadIdInsertPt(CGF);
1563   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1564   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1565   llvm::CallInst *Call = CGF.Builder.CreateCall(
1566       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1567           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1568       emitUpdateLocation(CGF, Loc));
1569   Call->setCallingConv(CGF.getRuntimeCC());
1570   Elem.second.ThreadID = Call;
1571   return Call;
1572 }
1573 
1574 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1575   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1577     clearLocThreadIdInsertPt(CGF);
1578     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1579   }
1580   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1581     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1582       UDRMap.erase(D);
1583     FunctionUDRMap.erase(CGF.CurFn);
1584   }
1585   auto I = FunctionUDMMap.find(CGF.CurFn);
1586   if (I != FunctionUDMMap.end()) {
1587     for(const auto *D : I->second)
1588       UDMMap.erase(D);
1589     FunctionUDMMap.erase(I);
1590   }
1591   LastprivateConditionalToTypes.erase(CGF.CurFn);
1592 }
1593 
1594 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1595   return IdentTy->getPointerTo();
1596 }
1597 
1598 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1599   if (!Kmpc_MicroTy) {
1600     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1601     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1602                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1603     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1604   }
1605   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1606 }
1607 
1608 llvm::FunctionCallee
1609 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1610   assert((IVSize == 32 || IVSize == 64) &&
1611          "IV size is not compatible with the omp runtime");
1612   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1613                                             : "__kmpc_for_static_init_4u")
1614                                 : (IVSigned ? "__kmpc_for_static_init_8"
1615                                             : "__kmpc_for_static_init_8u");
1616   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1617   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1618   llvm::Type *TypeParams[] = {
1619     getIdentTyPointerTy(),                     // loc
1620     CGM.Int32Ty,                               // tid
1621     CGM.Int32Ty,                               // schedtype
1622     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1623     PtrTy,                                     // p_lower
1624     PtrTy,                                     // p_upper
1625     PtrTy,                                     // p_stride
1626     ITy,                                       // incr
1627     ITy                                        // chunk
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 llvm::FunctionCallee
1635 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1636   assert((IVSize == 32 || IVSize == 64) &&
1637          "IV size is not compatible with the omp runtime");
1638   StringRef Name =
1639       IVSize == 32
1640           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1641           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1642   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1643   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1644                                CGM.Int32Ty,           // tid
1645                                CGM.Int32Ty,           // schedtype
1646                                ITy,                   // lower
1647                                ITy,                   // upper
1648                                ITy,                   // stride
1649                                ITy                    // chunk
1650   };
1651   auto *FnTy =
1652       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1653   return CGM.CreateRuntimeFunction(FnTy, Name);
1654 }
1655 
1656 llvm::FunctionCallee
1657 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1658   assert((IVSize == 32 || IVSize == 64) &&
1659          "IV size is not compatible with the omp runtime");
1660   StringRef Name =
1661       IVSize == 32
1662           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1663           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1664   llvm::Type *TypeParams[] = {
1665       getIdentTyPointerTy(), // loc
1666       CGM.Int32Ty,           // tid
1667   };
1668   auto *FnTy =
1669       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1670   return CGM.CreateRuntimeFunction(FnTy, Name);
1671 }
1672 
1673 llvm::FunctionCallee
1674 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1675   assert((IVSize == 32 || IVSize == 64) &&
1676          "IV size is not compatible with the omp runtime");
1677   StringRef Name =
1678       IVSize == 32
1679           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1680           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1681   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1682   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1683   llvm::Type *TypeParams[] = {
1684     getIdentTyPointerTy(),                     // loc
1685     CGM.Int32Ty,                               // tid
1686     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1687     PtrTy,                                     // p_lower
1688     PtrTy,                                     // p_upper
1689     PtrTy                                      // p_stride
1690   };
1691   auto *FnTy =
1692       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1693   return CGM.CreateRuntimeFunction(FnTy, Name);
1694 }
1695 
1696 /// Obtain information that uniquely identifies a target entry. This
1697 /// consists of the file and device IDs as well as line number associated with
1698 /// the relevant entry source location.
1699 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1700                                      unsigned &DeviceID, unsigned &FileID,
1701                                      unsigned &LineNum) {
1702   SourceManager &SM = C.getSourceManager();
1703 
1704   // The loc should be always valid and have a file ID (the user cannot use
1705   // #pragma directives in macros)
1706 
1707   assert(Loc.isValid() && "Source location is expected to be always valid.");
1708 
1709   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1710   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1711 
1712   llvm::sys::fs::UniqueID ID;
1713   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1714     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1715         << PLoc.getFilename() << EC.message();
1716 
1717   DeviceID = ID.getDevice();
1718   FileID = ID.getFile();
1719   LineNum = PLoc.getLine();
1720 }
1721 
1722 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1723   if (CGM.getLangOpts().OpenMPSimd)
1724     return Address::invalid();
1725   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1726       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1727   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1728               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1729                HasRequiresUnifiedSharedMemory))) {
1730     SmallString<64> PtrName;
1731     {
1732       llvm::raw_svector_ostream OS(PtrName);
1733       OS << CGM.getMangledName(GlobalDecl(VD));
1734       if (!VD->isExternallyVisible()) {
1735         unsigned DeviceID, FileID, Line;
1736         getTargetEntryUniqueInfo(CGM.getContext(),
1737                                  VD->getCanonicalDecl()->getBeginLoc(),
1738                                  DeviceID, FileID, Line);
1739         OS << llvm::format("_%x", FileID);
1740       }
1741       OS << "_decl_tgt_ref_ptr";
1742     }
1743     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1744     if (!Ptr) {
1745       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1746       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1747                                         PtrName);
1748 
1749       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1750       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1751 
1752       if (!CGM.getLangOpts().OpenMPIsDevice)
1753         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1754       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1755     }
1756     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1757   }
1758   return Address::invalid();
1759 }
1760 
1761 llvm::Constant *
1762 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1763   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1764          !CGM.getContext().getTargetInfo().isTLSSupported());
1765   // Lookup the entry, lazily creating it if necessary.
1766   std::string Suffix = getName({"cache", ""});
1767   return getOrCreateInternalVariable(
1768       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1769 }
1770 
1771 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1772                                                 const VarDecl *VD,
1773                                                 Address VDAddr,
1774                                                 SourceLocation Loc) {
1775   if (CGM.getLangOpts().OpenMPUseTLS &&
1776       CGM.getContext().getTargetInfo().isTLSSupported())
1777     return VDAddr;
1778 
1779   llvm::Type *VarTy = VDAddr.getElementType();
1780   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1781                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1782                                                        CGM.Int8PtrTy),
1783                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1784                          getOrCreateThreadPrivateCache(VD)};
1785   return Address(CGF.EmitRuntimeCall(
1786                      llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1787                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1788                      Args),
1789                  VDAddr.getAlignment());
1790 }
1791 
1792 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1793     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1794     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1795   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1796   // library.
1797   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1798   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1799                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1800                       OMPLoc);
1801   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1802   // to register constructor/destructor for variable.
1803   llvm::Value *Args[] = {
1804       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1805       Ctor, CopyCtor, Dtor};
1806   CGF.EmitRuntimeCall(
1807       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1808           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1809       Args);
1810 }
1811 
1812 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1813     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1814     bool PerformInit, CodeGenFunction *CGF) {
1815   if (CGM.getLangOpts().OpenMPUseTLS &&
1816       CGM.getContext().getTargetInfo().isTLSSupported())
1817     return nullptr;
1818 
1819   VD = VD->getDefinition(CGM.getContext());
1820   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1821     QualType ASTTy = VD->getType();
1822 
1823     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1824     const Expr *Init = VD->getAnyInitializer();
1825     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1826       // Generate function that re-emits the declaration's initializer into the
1827       // threadprivate copy of the variable VD
1828       CodeGenFunction CtorCGF(CGM);
1829       FunctionArgList Args;
1830       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1832                             ImplicitParamDecl::Other);
1833       Args.push_back(&Dst);
1834 
1835       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836           CGM.getContext().VoidPtrTy, Args);
1837       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838       std::string Name = getName({"__kmpc_global_ctor_", ""});
1839       llvm::Function *Fn =
1840           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1841       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1842                             Args, Loc, Loc);
1843       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1844           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1845           CGM.getContext().VoidPtrTy, Dst.getLocation());
1846       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1847       Arg = CtorCGF.Builder.CreateElementBitCast(
1848           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1849       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1850                                /*IsInitializer=*/true);
1851       ArgVal = CtorCGF.EmitLoadOfScalar(
1852           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1853           CGM.getContext().VoidPtrTy, Dst.getLocation());
1854       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1855       CtorCGF.FinishFunction();
1856       Ctor = Fn;
1857     }
1858     if (VD->getType().isDestructedType() != QualType::DK_none) {
1859       // Generate function that emits destructor call for the threadprivate copy
1860       // of the variable VD
1861       CodeGenFunction DtorCGF(CGM);
1862       FunctionArgList Args;
1863       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1864                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1865                             ImplicitParamDecl::Other);
1866       Args.push_back(&Dst);
1867 
1868       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1869           CGM.getContext().VoidTy, Args);
1870       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1871       std::string Name = getName({"__kmpc_global_dtor_", ""});
1872       llvm::Function *Fn =
1873           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1874       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1875       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1876                             Loc, Loc);
1877       // Create a scope with an artificial location for the body of this function.
1878       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1879       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1880           DtorCGF.GetAddrOfLocalVar(&Dst),
1881           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1882       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1883                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1884                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1885       DtorCGF.FinishFunction();
1886       Dtor = Fn;
1887     }
1888     // Do not emit init function if it is not required.
1889     if (!Ctor && !Dtor)
1890       return nullptr;
1891 
1892     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1893     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1894                                                /*isVarArg=*/false)
1895                            ->getPointerTo();
1896     // Copying constructor for the threadprivate variable.
1897     // Must be NULL - reserved by runtime, but currently it requires that this
1898     // parameter is always NULL. Otherwise it fires assertion.
1899     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1900     if (Ctor == nullptr) {
1901       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1902                                              /*isVarArg=*/false)
1903                          ->getPointerTo();
1904       Ctor = llvm::Constant::getNullValue(CtorTy);
1905     }
1906     if (Dtor == nullptr) {
1907       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1908                                              /*isVarArg=*/false)
1909                          ->getPointerTo();
1910       Dtor = llvm::Constant::getNullValue(DtorTy);
1911     }
1912     if (!CGF) {
1913       auto *InitFunctionTy =
1914           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1915       std::string Name = getName({"__omp_threadprivate_init_", ""});
1916       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1917           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1918       CodeGenFunction InitCGF(CGM);
1919       FunctionArgList ArgList;
1920       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1921                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1922                             Loc, Loc);
1923       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1924       InitCGF.FinishFunction();
1925       return InitFunction;
1926     }
1927     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1928   }
1929   return nullptr;
1930 }
1931 
1932 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1933                                                      llvm::GlobalVariable *Addr,
1934                                                      bool PerformInit) {
1935   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1936       !CGM.getLangOpts().OpenMPIsDevice)
1937     return false;
1938   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1939       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1940   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1941       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1942        HasRequiresUnifiedSharedMemory))
1943     return CGM.getLangOpts().OpenMPIsDevice;
1944   VD = VD->getDefinition(CGM.getContext());
1945   assert(VD && "Unknown VarDecl");
1946 
1947   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1948     return CGM.getLangOpts().OpenMPIsDevice;
1949 
1950   QualType ASTTy = VD->getType();
1951   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1952 
1953   // Produce the unique prefix to identify the new target regions. We use
1954   // the source location of the variable declaration which we know to not
1955   // conflict with any target region.
1956   unsigned DeviceID;
1957   unsigned FileID;
1958   unsigned Line;
1959   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1960   SmallString<128> Buffer, Out;
1961   {
1962     llvm::raw_svector_ostream OS(Buffer);
1963     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1964        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1965   }
1966 
1967   const Expr *Init = VD->getAnyInitializer();
1968   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1969     llvm::Constant *Ctor;
1970     llvm::Constant *ID;
1971     if (CGM.getLangOpts().OpenMPIsDevice) {
1972       // Generate function that re-emits the declaration's initializer into
1973       // the threadprivate copy of the variable VD
1974       CodeGenFunction CtorCGF(CGM);
1975 
1976       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1979           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1980       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1981       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982                             FunctionArgList(), Loc, Loc);
1983       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1984       CtorCGF.EmitAnyExprToMem(Init,
1985                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1986                                Init->getType().getQualifiers(),
1987                                /*IsInitializer=*/true);
1988       CtorCGF.FinishFunction();
1989       Ctor = Fn;
1990       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1991       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1992     } else {
1993       Ctor = new llvm::GlobalVariable(
1994           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1995           llvm::GlobalValue::PrivateLinkage,
1996           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1997       ID = Ctor;
1998     }
1999 
2000     // Register the information for the entry associated with the constructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2005   }
2006   if (VD->getType().isDestructedType() != QualType::DK_none) {
2007     llvm::Constant *Dtor;
2008     llvm::Constant *ID;
2009     if (CGM.getLangOpts().OpenMPIsDevice) {
2010       // Generate function that emits destructor call for the threadprivate
2011       // copy of the variable VD
2012       CodeGenFunction DtorCGF(CGM);
2013 
2014       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2015       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2016       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2017           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2018       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2019       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2020                             FunctionArgList(), Loc, Loc);
2021       // Create a scope with an artificial location for the body of this
2022       // function.
2023       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2024       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2025                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2026                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2027       DtorCGF.FinishFunction();
2028       Dtor = Fn;
2029       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2030       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2031     } else {
2032       Dtor = new llvm::GlobalVariable(
2033           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2034           llvm::GlobalValue::PrivateLinkage,
2035           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2036       ID = Dtor;
2037     }
2038     // Register the information for the entry associated with the destructor.
2039     Out.clear();
2040     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2041         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2042         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2043   }
2044   return CGM.getLangOpts().OpenMPIsDevice;
2045 }
2046 
2047 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2048                                                           QualType VarType,
2049                                                           StringRef Name) {
2050   std::string Suffix = getName({"artificial", ""});
2051   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2052   llvm::Value *GAddr =
2053       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2054   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2055       CGM.getTarget().isTLSSupported()) {
2056     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2057     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2058   }
2059   std::string CacheSuffix = getName({"cache", ""});
2060   llvm::Value *Args[] = {
2061       emitUpdateLocation(CGF, SourceLocation()),
2062       getThreadID(CGF, SourceLocation()),
2063       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2064       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2065                                 /*isSigned=*/false),
2066       getOrCreateInternalVariable(
2067           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2068   return Address(
2069       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2070           CGF.EmitRuntimeCall(
2071               llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2072                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2073               Args),
2074           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2075       CGM.getContext().getTypeAlignInChars(VarType));
2076 }
2077 
2078 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2079                                    const RegionCodeGenTy &ThenGen,
2080                                    const RegionCodeGenTy &ElseGen) {
2081   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2082 
2083   // If the condition constant folds and can be elided, try to avoid emitting
2084   // the condition and the dead arm of the if/else.
2085   bool CondConstant;
2086   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2087     if (CondConstant)
2088       ThenGen(CGF);
2089     else
2090       ElseGen(CGF);
2091     return;
2092   }
2093 
2094   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2095   // emit the conditional branch.
2096   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2097   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2098   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2099   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2100 
2101   // Emit the 'then' code.
2102   CGF.EmitBlock(ThenBlock);
2103   ThenGen(CGF);
2104   CGF.EmitBranch(ContBlock);
2105   // Emit the 'else' code if present.
2106   // There is no need to emit line number for unconditional branch.
2107   (void)ApplyDebugLocation::CreateEmpty(CGF);
2108   CGF.EmitBlock(ElseBlock);
2109   ElseGen(CGF);
2110   // There is no need to emit line number for unconditional branch.
2111   (void)ApplyDebugLocation::CreateEmpty(CGF);
2112   CGF.EmitBranch(ContBlock);
2113   // Emit the continuation block for code after the if.
2114   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2115 }
2116 
2117 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2118                                        llvm::Function *OutlinedFn,
2119                                        ArrayRef<llvm::Value *> CapturedVars,
2120                                        const Expr *IfCond) {
2121   if (!CGF.HaveInsertPoint())
2122     return;
2123   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2124   auto &M = CGM.getModule();
2125   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2126                                                          PrePostActionTy &) {
2127     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2128     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2129     llvm::Value *Args[] = {
2130         RTLoc,
2131         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2132         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2133     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2134     RealArgs.append(std::begin(Args), std::end(Args));
2135     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2136 
2137     llvm::FunctionCallee RTLFn =
2138         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2139             M, OMPRTL___kmpc_fork_call);
2140     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2141   };
2142   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2143                     Loc](CodeGenFunction &CGF, PrePostActionTy &) {
2144     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2145     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2146     // Build calls:
2147     // __kmpc_serialized_parallel(&Loc, GTid);
2148     llvm::Value *Args[] = {RTLoc, ThreadID};
2149     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2150                             M, OMPRTL___kmpc_serialized_parallel),
2151                         Args);
2152 
2153     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2154     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2155     Address ZeroAddrBound =
2156         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2157                                          /*Name=*/".bound.zero.addr");
2158     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2159     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2160     // ThreadId for serialized parallels is 0.
2161     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2162     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2163     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2164     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2165 
2166     // __kmpc_end_serialized_parallel(&Loc, GTid);
2167     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2168     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2169                             M, OMPRTL___kmpc_end_serialized_parallel),
2170                         EndArgs);
2171   };
2172   if (IfCond) {
2173     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2174   } else {
2175     RegionCodeGenTy ThenRCG(ThenGen);
2176     ThenRCG(CGF);
2177   }
2178 }
2179 
2180 // If we're inside an (outlined) parallel region, use the region info's
2181 // thread-ID variable (it is passed in a first argument of the outlined function
2182 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2183 // regular serial code region, get thread ID by calling kmp_int32
2184 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2185 // return the address of that temp.
2186 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2187                                              SourceLocation Loc) {
2188   if (auto *OMPRegionInfo =
2189           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2190     if (OMPRegionInfo->getThreadIDVariable())
2191       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2192 
2193   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2194   QualType Int32Ty =
2195       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2196   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2197   CGF.EmitStoreOfScalar(ThreadID,
2198                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2199 
2200   return ThreadIDTemp;
2201 }
2202 
2203 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2204     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2205   SmallString<256> Buffer;
2206   llvm::raw_svector_ostream Out(Buffer);
2207   Out << Name;
2208   StringRef RuntimeName = Out.str();
2209   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2210   if (Elem.second) {
2211     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2212            "OMP internal variable has different type than requested");
2213     return &*Elem.second;
2214   }
2215 
2216   return Elem.second = new llvm::GlobalVariable(
2217              CGM.getModule(), Ty, /*IsConstant*/ false,
2218              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2219              Elem.first(), /*InsertBefore=*/nullptr,
2220              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2221 }
2222 
2223 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2224   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2225   std::string Name = getName({Prefix, "var"});
2226   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2227 }
2228 
2229 namespace {
2230 /// Common pre(post)-action for different OpenMP constructs.
2231 class CommonActionTy final : public PrePostActionTy {
2232   llvm::FunctionCallee EnterCallee;
2233   ArrayRef<llvm::Value *> EnterArgs;
2234   llvm::FunctionCallee ExitCallee;
2235   ArrayRef<llvm::Value *> ExitArgs;
2236   bool Conditional;
2237   llvm::BasicBlock *ContBlock = nullptr;
2238 
2239 public:
2240   CommonActionTy(llvm::FunctionCallee EnterCallee,
2241                  ArrayRef<llvm::Value *> EnterArgs,
2242                  llvm::FunctionCallee ExitCallee,
2243                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2244       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2245         ExitArgs(ExitArgs), Conditional(Conditional) {}
2246   void Enter(CodeGenFunction &CGF) override {
2247     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2248     if (Conditional) {
2249       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2250       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2251       ContBlock = CGF.createBasicBlock("omp_if.end");
2252       // Generate the branch (If-stmt)
2253       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2254       CGF.EmitBlock(ThenBlock);
2255     }
2256   }
2257   void Done(CodeGenFunction &CGF) {
2258     // Emit the rest of blocks/branches
2259     CGF.EmitBranch(ContBlock);
2260     CGF.EmitBlock(ContBlock, true);
2261   }
2262   void Exit(CodeGenFunction &CGF) override {
2263     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2264   }
2265 };
2266 } // anonymous namespace
2267 
2268 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2269                                          StringRef CriticalName,
2270                                          const RegionCodeGenTy &CriticalOpGen,
2271                                          SourceLocation Loc, const Expr *Hint) {
2272   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2273   // CriticalOpGen();
2274   // __kmpc_end_critical(ident_t *, gtid, Lock);
2275   // Prepare arguments and build a call to __kmpc_critical
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2279                          getCriticalRegionLock(CriticalName)};
2280   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2281                                                 std::end(Args));
2282   if (Hint) {
2283     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2284         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2285   }
2286   CommonActionTy Action(
2287       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2288           CGM.getModule(),
2289           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2290       EnterArgs,
2291       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2292           CGM.getModule(), OMPRTL___kmpc_end_critical),
2293       Args);
2294   CriticalOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2296 }
2297 
2298 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2299                                        const RegionCodeGenTy &MasterOpGen,
2300                                        SourceLocation Loc) {
2301   if (!CGF.HaveInsertPoint())
2302     return;
2303   // if(__kmpc_master(ident_t *, gtid)) {
2304   //   MasterOpGen();
2305   //   __kmpc_end_master(ident_t *, gtid);
2306   // }
2307   // Prepare arguments and build a call to __kmpc_master
2308   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2309   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2310                             CGM.getModule(), OMPRTL___kmpc_master),
2311                         Args,
2312                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2313                             CGM.getModule(), OMPRTL___kmpc_end_master),
2314                         Args,
2315                         /*Conditional=*/true);
2316   MasterOpGen.setAction(Action);
2317   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2318   Action.Done(CGF);
2319 }
2320 
2321 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2322                                         SourceLocation Loc) {
2323   if (!CGF.HaveInsertPoint())
2324     return;
2325   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2326   if (OMPBuilder) {
2327     OMPBuilder->CreateTaskyield(CGF.Builder);
2328   } else {
2329     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330     llvm::Value *Args[] = {
2331         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2334                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335                         Args);
2336   }
2337 
2338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339     Region->emitUntiedSwitch(CGF);
2340 }
2341 
2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343                                           const RegionCodeGenTy &TaskgroupOpGen,
2344                                           SourceLocation Loc) {
2345   if (!CGF.HaveInsertPoint())
2346     return;
2347   // __kmpc_taskgroup(ident_t *, gtid);
2348   // TaskgroupOpGen();
2349   // __kmpc_end_taskgroup(ident_t *, gtid);
2350   // Prepare arguments and build a call to __kmpc_taskgroup
2351   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2353                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354                         Args,
2355                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357                         Args);
2358   TaskgroupOpGen.setAction(Action);
2359   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360 }
2361 
2362 /// Given an array of pointers to variables, project the address of a
2363 /// given variable.
2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365                                       unsigned Index, const VarDecl *Var) {
2366   // Pull out the pointer to the variable.
2367   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369 
2370   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2371   Addr = CGF.Builder.CreateElementBitCast(
2372       Addr, CGF.ConvertTypeForMem(Var->getType()));
2373   return Addr;
2374 }
2375 
2376 static llvm::Value *emitCopyprivateCopyFunction(
2377     CodeGenModule &CGM, llvm::Type *ArgsType,
2378     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2379     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2380     SourceLocation Loc) {
2381   ASTContext &C = CGM.getContext();
2382   // void copy_func(void *LHSArg, void *RHSArg);
2383   FunctionArgList Args;
2384   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387                            ImplicitParamDecl::Other);
2388   Args.push_back(&LHSArg);
2389   Args.push_back(&RHSArg);
2390   const auto &CGFI =
2391       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2392   std::string Name =
2393       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2395                                     llvm::GlobalValue::InternalLinkage, Name,
2396                                     &CGM.getModule());
2397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2398   Fn->setDoesNotRecurse();
2399   CodeGenFunction CGF(CGM);
2400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2401   // Dest = (void*[n])(LHSArg);
2402   // Src = (void*[n])(RHSArg);
2403   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2405       ArgsType), CGF.getPointerAlign());
2406   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411   // ...
2412   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414     const auto *DestVar =
2415         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417 
2418     const auto *SrcVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421 
2422     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423     QualType Type = VD->getType();
2424     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425   }
2426   CGF.FinishFunction();
2427   return Fn;
2428 }
2429 
2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431                                        const RegionCodeGenTy &SingleOpGen,
2432                                        SourceLocation Loc,
2433                                        ArrayRef<const Expr *> CopyprivateVars,
2434                                        ArrayRef<const Expr *> SrcExprs,
2435                                        ArrayRef<const Expr *> DstExprs,
2436                                        ArrayRef<const Expr *> AssignmentOps) {
2437   if (!CGF.HaveInsertPoint())
2438     return;
2439   assert(CopyprivateVars.size() == SrcExprs.size() &&
2440          CopyprivateVars.size() == DstExprs.size() &&
2441          CopyprivateVars.size() == AssignmentOps.size());
2442   ASTContext &C = CGM.getContext();
2443   // int32 did_it = 0;
2444   // if(__kmpc_single(ident_t *, gtid)) {
2445   //   SingleOpGen();
2446   //   __kmpc_end_single(ident_t *, gtid);
2447   //   did_it = 1;
2448   // }
2449   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450   // <copy_func>, did_it);
2451 
2452   Address DidIt = Address::invalid();
2453   if (!CopyprivateVars.empty()) {
2454     // int32 did_it = 0;
2455     QualType KmpInt32Ty =
2456         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459   }
2460   // Prepare arguments and build a call to __kmpc_single
2461   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2463                             CGM.getModule(), OMPRTL___kmpc_single),
2464                         Args,
2465                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_end_single),
2467                         Args,
2468                         /*Conditional=*/true);
2469   SingleOpGen.setAction(Action);
2470   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471   if (DidIt.isValid()) {
2472     // did_it = 1;
2473     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474   }
2475   Action.Done(CGF);
2476   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477   // <copy_func>, did_it);
2478   if (DidIt.isValid()) {
2479     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480     QualType CopyprivateArrayTy = C.getConstantArrayType(
2481         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482         /*IndexTypeQuals=*/0);
2483     // Create a list of all private variables for copyprivate.
2484     Address CopyprivateList =
2485         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488       CGF.Builder.CreateStore(
2489           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491               CGF.VoidPtrTy),
2492           Elem);
2493     }
2494     // Build function that copies private values from single region to all other
2495     // threads in the corresponding parallel region.
2496     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500     Address CL =
2501       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2502                                                       CGF.VoidPtrTy);
2503     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2504     llvm::Value *Args[] = {
2505         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2506         getThreadID(CGF, Loc),        // i32 <gtid>
2507         BufSize,                      // size_t <buf_size>
2508         CL.getPointer(),              // void *<copyprivate list>
2509         CpyFn,                        // void (*) (void *, void *) <copy_func>
2510         DidItVal                      // i32 did_it
2511     };
2512     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2513                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2514                         Args);
2515   }
2516 }
2517 
2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2519                                         const RegionCodeGenTy &OrderedOpGen,
2520                                         SourceLocation Loc, bool IsThreads) {
2521   if (!CGF.HaveInsertPoint())
2522     return;
2523   // __kmpc_ordered(ident_t *, gtid);
2524   // OrderedOpGen();
2525   // __kmpc_end_ordered(ident_t *, gtid);
2526   // Prepare arguments and build a call to __kmpc_ordered
2527   if (IsThreads) {
2528     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2529     CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2530                               CGM.getModule(), OMPRTL___kmpc_ordered),
2531                           Args,
2532                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2534                           Args);
2535     OrderedOpGen.setAction(Action);
2536     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2537     return;
2538   }
2539   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540 }
2541 
2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2543   unsigned Flags;
2544   if (Kind == OMPD_for)
2545     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2546   else if (Kind == OMPD_sections)
2547     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2548   else if (Kind == OMPD_single)
2549     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2550   else if (Kind == OMPD_barrier)
2551     Flags = OMP_IDENT_BARRIER_EXPL;
2552   else
2553     Flags = OMP_IDENT_BARRIER_IMPL;
2554   return Flags;
2555 }
2556 
2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2558     CodeGenFunction &CGF, const OMPLoopDirective &S,
2559     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2560   // Check if the loop directive is actually a doacross loop directive. In this
2561   // case choose static, 1 schedule.
2562   if (llvm::any_of(
2563           S.getClausesOfKind<OMPOrderedClause>(),
2564           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2565     ScheduleKind = OMPC_SCHEDULE_static;
2566     // Chunk size is 1 in this case.
2567     llvm::APInt ChunkSize(32, 1);
2568     ChunkExpr = IntegerLiteral::Create(
2569         CGF.getContext(), ChunkSize,
2570         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2571         SourceLocation());
2572   }
2573 }
2574 
2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2576                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2577                                       bool ForceSimpleCall) {
2578   // Check if we should use the OMPBuilder
2579   auto *OMPRegionInfo =
2580       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2581   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2582   if (OMPBuilder) {
2583     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
2584         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2585     return;
2586   }
2587 
2588   if (!CGF.HaveInsertPoint())
2589     return;
2590   // Build call __kmpc_cancel_barrier(loc, thread_id);
2591   // Build call __kmpc_barrier(loc, thread_id);
2592   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2593   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2594   // thread_id);
2595   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2596                          getThreadID(CGF, Loc)};
2597   if (OMPRegionInfo) {
2598     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2599       llvm::Value *Result = CGF.EmitRuntimeCall(
2600           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2601               CGM.getModule(), OMPRTL___kmpc_cancel_barrier),
2602           Args);
2603       if (EmitChecks) {
2604         // if (__kmpc_cancel_barrier()) {
2605         //   exit from construct;
2606         // }
2607         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2608         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2609         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2610         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2611         CGF.EmitBlock(ExitBB);
2612         //   exit from construct;
2613         CodeGenFunction::JumpDest CancelDestination =
2614             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2615         CGF.EmitBranchThroughCleanup(CancelDestination);
2616         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2617       }
2618       return;
2619     }
2620   }
2621   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2622                           CGM.getModule(), OMPRTL___kmpc_barrier),
2623                       Args);
2624 }
2625 
2626 /// Map the OpenMP loop schedule to the runtime enumeration.
2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2628                                           bool Chunked, bool Ordered) {
2629   switch (ScheduleKind) {
2630   case OMPC_SCHEDULE_static:
2631     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2632                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2633   case OMPC_SCHEDULE_dynamic:
2634     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2635   case OMPC_SCHEDULE_guided:
2636     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2637   case OMPC_SCHEDULE_runtime:
2638     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2639   case OMPC_SCHEDULE_auto:
2640     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2641   case OMPC_SCHEDULE_unknown:
2642     assert(!Chunked && "chunk was specified but schedule kind not known");
2643     return Ordered ? OMP_ord_static : OMP_sch_static;
2644   }
2645   llvm_unreachable("Unexpected runtime schedule");
2646 }
2647 
2648 /// Map the OpenMP distribute schedule to the runtime enumeration.
2649 static OpenMPSchedType
2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2651   // only static is allowed for dist_schedule
2652   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2656                                          bool Chunked) const {
2657   OpenMPSchedType Schedule =
2658       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2659   return Schedule == OMP_sch_static;
2660 }
2661 
2662 bool CGOpenMPRuntime::isStaticNonchunked(
2663     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2664   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2665   return Schedule == OMP_dist_sch_static;
2666 }
2667 
2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2669                                       bool Chunked) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2672   return Schedule == OMP_sch_static_chunked;
2673 }
2674 
2675 bool CGOpenMPRuntime::isStaticChunked(
2676     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2677   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2678   return Schedule == OMP_dist_sch_static_chunked;
2679 }
2680 
2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2682   OpenMPSchedType Schedule =
2683       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2684   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2685   return Schedule != OMP_sch_static;
2686 }
2687 
2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2689                                   OpenMPScheduleClauseModifier M1,
2690                                   OpenMPScheduleClauseModifier M2) {
2691   int Modifier = 0;
2692   switch (M1) {
2693   case OMPC_SCHEDULE_MODIFIER_monotonic:
2694     Modifier = OMP_sch_modifier_monotonic;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2697     Modifier = OMP_sch_modifier_nonmonotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_simd:
2700     if (Schedule == OMP_sch_static_chunked)
2701       Schedule = OMP_sch_static_balanced_chunked;
2702     break;
2703   case OMPC_SCHEDULE_MODIFIER_last:
2704   case OMPC_SCHEDULE_MODIFIER_unknown:
2705     break;
2706   }
2707   switch (M2) {
2708   case OMPC_SCHEDULE_MODIFIER_monotonic:
2709     Modifier = OMP_sch_modifier_monotonic;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2712     Modifier = OMP_sch_modifier_nonmonotonic;
2713     break;
2714   case OMPC_SCHEDULE_MODIFIER_simd:
2715     if (Schedule == OMP_sch_static_chunked)
2716       Schedule = OMP_sch_static_balanced_chunked;
2717     break;
2718   case OMPC_SCHEDULE_MODIFIER_last:
2719   case OMPC_SCHEDULE_MODIFIER_unknown:
2720     break;
2721   }
2722   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2723   // If the static schedule kind is specified or if the ordered clause is
2724   // specified, and if the nonmonotonic modifier is not specified, the effect is
2725   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2726   // modifier is specified, the effect is as if the nonmonotonic modifier is
2727   // specified.
2728   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2729     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2730           Schedule == OMP_sch_static_balanced_chunked ||
2731           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2732           Schedule == OMP_dist_sch_static_chunked ||
2733           Schedule == OMP_dist_sch_static))
2734       Modifier = OMP_sch_modifier_nonmonotonic;
2735   }
2736   return Schedule | Modifier;
2737 }
2738 
2739 void CGOpenMPRuntime::emitForDispatchInit(
2740     CodeGenFunction &CGF, SourceLocation Loc,
2741     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2742     bool Ordered, const DispatchRTInput &DispatchValues) {
2743   if (!CGF.HaveInsertPoint())
2744     return;
2745   OpenMPSchedType Schedule = getRuntimeSchedule(
2746       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2747   assert(Ordered ||
2748          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2749           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2750           Schedule != OMP_sch_static_balanced_chunked));
2751   // Call __kmpc_dispatch_init(
2752   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2753   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2754   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2755 
2756   // If the Chunk was not specified in the clause - use default value 1.
2757   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2758                                             : CGF.Builder.getIntN(IVSize, 1);
2759   llvm::Value *Args[] = {
2760       emitUpdateLocation(CGF, Loc),
2761       getThreadID(CGF, Loc),
2762       CGF.Builder.getInt32(addMonoNonMonoModifier(
2763           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2764       DispatchValues.LB,                                     // Lower
2765       DispatchValues.UB,                                     // Upper
2766       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2767       Chunk                                                  // Chunk
2768   };
2769   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2770 }
2771 
2772 static void emitForStaticInitCall(
2773     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2774     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2775     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2776     const CGOpenMPRuntime::StaticRTInput &Values) {
2777   if (!CGF.HaveInsertPoint())
2778     return;
2779 
2780   assert(!Values.Ordered);
2781   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2782          Schedule == OMP_sch_static_balanced_chunked ||
2783          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2784          Schedule == OMP_dist_sch_static ||
2785          Schedule == OMP_dist_sch_static_chunked);
2786 
2787   // Call __kmpc_for_static_init(
2788   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2789   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2790   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2791   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2792   llvm::Value *Chunk = Values.Chunk;
2793   if (Chunk == nullptr) {
2794     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2795             Schedule == OMP_dist_sch_static) &&
2796            "expected static non-chunked schedule");
2797     // If the Chunk was not specified in the clause - use default value 1.
2798     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2799   } else {
2800     assert((Schedule == OMP_sch_static_chunked ||
2801             Schedule == OMP_sch_static_balanced_chunked ||
2802             Schedule == OMP_ord_static_chunked ||
2803             Schedule == OMP_dist_sch_static_chunked) &&
2804            "expected static chunked schedule");
2805   }
2806   llvm::Value *Args[] = {
2807       UpdateLocation,
2808       ThreadId,
2809       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2810                                                   M2)), // Schedule type
2811       Values.IL.getPointer(),                           // &isLastIter
2812       Values.LB.getPointer(),                           // &LB
2813       Values.UB.getPointer(),                           // &UB
2814       Values.ST.getPointer(),                           // &Stride
2815       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2816       Chunk                                             // Chunk
2817   };
2818   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2819 }
2820 
2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2822                                         SourceLocation Loc,
2823                                         OpenMPDirectiveKind DKind,
2824                                         const OpenMPScheduleTy &ScheduleKind,
2825                                         const StaticRTInput &Values) {
2826   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2827       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2828   assert(isOpenMPWorksharingDirective(DKind) &&
2829          "Expected loop-based or sections-based directive.");
2830   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2831                                              isOpenMPLoopDirective(DKind)
2832                                                  ? OMP_IDENT_WORK_LOOP
2833                                                  : OMP_IDENT_WORK_SECTIONS);
2834   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2835   llvm::FunctionCallee StaticInitFunction =
2836       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2839                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2840 }
2841 
2842 void CGOpenMPRuntime::emitDistributeStaticInit(
2843     CodeGenFunction &CGF, SourceLocation Loc,
2844     OpenMPDistScheduleClauseKind SchedKind,
2845     const CGOpenMPRuntime::StaticRTInput &Values) {
2846   OpenMPSchedType ScheduleNum =
2847       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2848   llvm::Value *UpdatedLocation =
2849       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2850   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2851   llvm::FunctionCallee StaticInitFunction =
2852       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2853   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2854                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2855                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2856 }
2857 
2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2859                                           SourceLocation Loc,
2860                                           OpenMPDirectiveKind DKind) {
2861   if (!CGF.HaveInsertPoint())
2862     return;
2863   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2864   llvm::Value *Args[] = {
2865       emitUpdateLocation(CGF, Loc,
2866                          isOpenMPDistributeDirective(DKind)
2867                              ? OMP_IDENT_WORK_DISTRIBUTE
2868                              : isOpenMPLoopDirective(DKind)
2869                                    ? OMP_IDENT_WORK_LOOP
2870                                    : OMP_IDENT_WORK_SECTIONS),
2871       getThreadID(CGF, Loc)};
2872   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2873   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2874                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2875                       Args);
2876 }
2877 
2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2879                                                  SourceLocation Loc,
2880                                                  unsigned IVSize,
2881                                                  bool IVSigned) {
2882   if (!CGF.HaveInsertPoint())
2883     return;
2884   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2885   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2886   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2887 }
2888 
2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2890                                           SourceLocation Loc, unsigned IVSize,
2891                                           bool IVSigned, Address IL,
2892                                           Address LB, Address UB,
2893                                           Address ST) {
2894   // Call __kmpc_dispatch_next(
2895   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2896   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2897   //          kmp_int[32|64] *p_stride);
2898   llvm::Value *Args[] = {
2899       emitUpdateLocation(CGF, Loc),
2900       getThreadID(CGF, Loc),
2901       IL.getPointer(), // &isLastIter
2902       LB.getPointer(), // &Lower
2903       UB.getPointer(), // &Upper
2904       ST.getPointer()  // &Stride
2905   };
2906   llvm::Value *Call =
2907       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2908   return CGF.EmitScalarConversion(
2909       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2910       CGF.getContext().BoolTy, Loc);
2911 }
2912 
2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2914                                            llvm::Value *NumThreads,
2915                                            SourceLocation Loc) {
2916   if (!CGF.HaveInsertPoint())
2917     return;
2918   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2919   llvm::Value *Args[] = {
2920       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2921       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2922   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2923                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2924                       Args);
2925 }
2926 
2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2928                                          ProcBindKind ProcBind,
2929                                          SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2933   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2937   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2943                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2944   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2945   if (OMPBuilder) {
2946     OMPBuilder->CreateFlush(CGF.Builder);
2947   } else {
2948     if (!CGF.HaveInsertPoint())
2949       return;
2950     // Build call void __kmpc_flush(ident_t *loc)
2951     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2952                             CGM.getModule(), OMPRTL___kmpc_flush),
2953                         emitUpdateLocation(CGF, Loc));
2954   }
2955 }
2956 
2957 namespace {
2958 /// Indexes of fields for type kmp_task_t.
2959 enum KmpTaskTFields {
2960   /// List of shared variables.
2961   KmpTaskTShareds,
2962   /// Task routine.
2963   KmpTaskTRoutine,
2964   /// Partition id for the untied tasks.
2965   KmpTaskTPartId,
2966   /// Function with call of destructors for private variables.
2967   Data1,
2968   /// Task priority.
2969   Data2,
2970   /// (Taskloops only) Lower bound.
2971   KmpTaskTLowerBound,
2972   /// (Taskloops only) Upper bound.
2973   KmpTaskTUpperBound,
2974   /// (Taskloops only) Stride.
2975   KmpTaskTStride,
2976   /// (Taskloops only) Is last iteration flag.
2977   KmpTaskTLastIter,
2978   /// (Taskloops only) Reduction data.
2979   KmpTaskTReductions,
2980 };
2981 } // anonymous namespace
2982 
2983 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2984   return OffloadEntriesTargetRegion.empty() &&
2985          OffloadEntriesDeviceGlobalVar.empty();
2986 }
2987 
2988 /// Initialize target region entry.
2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2990     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2991                                     StringRef ParentName, unsigned LineNum,
2992                                     unsigned Order) {
2993   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2994                                              "only required for the device "
2995                                              "code generation.");
2996   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2997       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2998                                    OMPTargetRegionEntryTargetRegion);
2999   ++OffloadingEntriesNum;
3000 }
3001 
3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3003     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3004                                   StringRef ParentName, unsigned LineNum,
3005                                   llvm::Constant *Addr, llvm::Constant *ID,
3006                                   OMPTargetRegionEntryKind Flags) {
3007   // If we are emitting code for a target, the entry is already initialized,
3008   // only has to be registered.
3009   if (CGM.getLangOpts().OpenMPIsDevice) {
3010     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3011       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3012           DiagnosticsEngine::Error,
3013           "Unable to find target region on line '%0' in the device code.");
3014       CGM.getDiags().Report(DiagID) << LineNum;
3015       return;
3016     }
3017     auto &Entry =
3018         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3019     assert(Entry.isValid() && "Entry not initialized!");
3020     Entry.setAddress(Addr);
3021     Entry.setID(ID);
3022     Entry.setFlags(Flags);
3023   } else {
3024     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3025     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3026     ++OffloadingEntriesNum;
3027   }
3028 }
3029 
3030 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3031     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3032     unsigned LineNum) const {
3033   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3034   if (PerDevice == OffloadEntriesTargetRegion.end())
3035     return false;
3036   auto PerFile = PerDevice->second.find(FileID);
3037   if (PerFile == PerDevice->second.end())
3038     return false;
3039   auto PerParentName = PerFile->second.find(ParentName);
3040   if (PerParentName == PerFile->second.end())
3041     return false;
3042   auto PerLine = PerParentName->second.find(LineNum);
3043   if (PerLine == PerParentName->second.end())
3044     return false;
3045   // Fail if this entry is already registered.
3046   if (PerLine->second.getAddress() || PerLine->second.getID())
3047     return false;
3048   return true;
3049 }
3050 
3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3052     const OffloadTargetRegionEntryInfoActTy &Action) {
3053   // Scan all target region entries and perform the provided action.
3054   for (const auto &D : OffloadEntriesTargetRegion)
3055     for (const auto &F : D.second)
3056       for (const auto &P : F.second)
3057         for (const auto &L : P.second)
3058           Action(D.first, F.first, P.first(), L.first, L.second);
3059 }
3060 
3061 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3062     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3063                                        OMPTargetGlobalVarEntryKind Flags,
3064                                        unsigned Order) {
3065   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3066                                              "only required for the device "
3067                                              "code generation.");
3068   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3069   ++OffloadingEntriesNum;
3070 }
3071 
3072 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3073     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3074                                      CharUnits VarSize,
3075                                      OMPTargetGlobalVarEntryKind Flags,
3076                                      llvm::GlobalValue::LinkageTypes Linkage) {
3077   if (CGM.getLangOpts().OpenMPIsDevice) {
3078     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3079     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3080            "Entry not initialized!");
3081     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3082            "Resetting with the new address.");
3083     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3084       if (Entry.getVarSize().isZero()) {
3085         Entry.setVarSize(VarSize);
3086         Entry.setLinkage(Linkage);
3087       }
3088       return;
3089     }
3090     Entry.setVarSize(VarSize);
3091     Entry.setLinkage(Linkage);
3092     Entry.setAddress(Addr);
3093   } else {
3094     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3095       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3096       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3097              "Entry not initialized!");
3098       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3099              "Resetting with the new address.");
3100       if (Entry.getVarSize().isZero()) {
3101         Entry.setVarSize(VarSize);
3102         Entry.setLinkage(Linkage);
3103       }
3104       return;
3105     }
3106     OffloadEntriesDeviceGlobalVar.try_emplace(
3107         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3108     ++OffloadingEntriesNum;
3109   }
3110 }
3111 
3112 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3113     actOnDeviceGlobalVarEntriesInfo(
3114         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3115   // Scan all target region entries and perform the provided action.
3116   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3117     Action(E.getKey(), E.getValue());
3118 }
3119 
3120 void CGOpenMPRuntime::createOffloadEntry(
3121     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3122     llvm::GlobalValue::LinkageTypes Linkage) {
3123   StringRef Name = Addr->getName();
3124   llvm::Module &M = CGM.getModule();
3125   llvm::LLVMContext &C = M.getContext();
3126 
3127   // Create constant string with the name.
3128   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3129 
3130   std::string StringName = getName({"omp_offloading", "entry_name"});
3131   auto *Str = new llvm::GlobalVariable(
3132       M, StrPtrInit->getType(), /*isConstant=*/true,
3133       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3134   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3135 
3136   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3137                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3138                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3139                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3140                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3141   std::string EntryName = getName({"omp_offloading", "entry", ""});
3142   llvm::GlobalVariable *Entry = createGlobalStruct(
3143       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3144       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3145 
3146   // The entry has to be created in the section the linker expects it to be.
3147   Entry->setSection("omp_offloading_entries");
3148 }
3149 
3150 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3151   // Emit the offloading entries and metadata so that the device codegen side
3152   // can easily figure out what to emit. The produced metadata looks like
3153   // this:
3154   //
3155   // !omp_offload.info = !{!1, ...}
3156   //
3157   // Right now we only generate metadata for function that contain target
3158   // regions.
3159 
3160   // If we are in simd mode or there are no entries, we don't need to do
3161   // anything.
3162   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3163     return;
3164 
3165   llvm::Module &M = CGM.getModule();
3166   llvm::LLVMContext &C = M.getContext();
3167   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3168                          SourceLocation, StringRef>,
3169               16>
3170       OrderedEntries(OffloadEntriesInfoManager.size());
3171   llvm::SmallVector<StringRef, 16> ParentFunctions(
3172       OffloadEntriesInfoManager.size());
3173 
3174   // Auxiliary methods to create metadata values and strings.
3175   auto &&GetMDInt = [this](unsigned V) {
3176     return llvm::ConstantAsMetadata::get(
3177         llvm::ConstantInt::get(CGM.Int32Ty, V));
3178   };
3179 
3180   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3181 
3182   // Create the offloading info metadata node.
3183   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3184 
3185   // Create function that emits metadata for each target region entry;
3186   auto &&TargetRegionMetadataEmitter =
3187       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3188        &GetMDString](
3189           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3190           unsigned Line,
3191           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3192         // Generate metadata for target regions. Each entry of this metadata
3193         // contains:
3194         // - Entry 0 -> Kind of this type of metadata (0).
3195         // - Entry 1 -> Device ID of the file where the entry was identified.
3196         // - Entry 2 -> File ID of the file where the entry was identified.
3197         // - Entry 3 -> Mangled name of the function where the entry was
3198         // identified.
3199         // - Entry 4 -> Line in the file where the entry was identified.
3200         // - Entry 5 -> Order the entry was created.
3201         // The first element of the metadata node is the kind.
3202         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3203                                  GetMDInt(FileID),      GetMDString(ParentName),
3204                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3205 
3206         SourceLocation Loc;
3207         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3208                   E = CGM.getContext().getSourceManager().fileinfo_end();
3209              I != E; ++I) {
3210           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3211               I->getFirst()->getUniqueID().getFile() == FileID) {
3212             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3213                 I->getFirst(), Line, 1);
3214             break;
3215           }
3216         }
3217         // Save this entry in the right position of the ordered entries array.
3218         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3219         ParentFunctions[E.getOrder()] = ParentName;
3220 
3221         // Add metadata to the named metadata node.
3222         MD->addOperand(llvm::MDNode::get(C, Ops));
3223       };
3224 
3225   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3226       TargetRegionMetadataEmitter);
3227 
3228   // Create function that emits metadata for each device global variable entry;
3229   auto &&DeviceGlobalVarMetadataEmitter =
3230       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3231        MD](StringRef MangledName,
3232            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3233                &E) {
3234         // Generate metadata for global variables. Each entry of this metadata
3235         // contains:
3236         // - Entry 0 -> Kind of this type of metadata (1).
3237         // - Entry 1 -> Mangled name of the variable.
3238         // - Entry 2 -> Declare target kind.
3239         // - Entry 3 -> Order the entry was created.
3240         // The first element of the metadata node is the kind.
3241         llvm::Metadata *Ops[] = {
3242             GetMDInt(E.getKind()), GetMDString(MangledName),
3243             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3244 
3245         // Save this entry in the right position of the ordered entries array.
3246         OrderedEntries[E.getOrder()] =
3247             std::make_tuple(&E, SourceLocation(), MangledName);
3248 
3249         // Add metadata to the named metadata node.
3250         MD->addOperand(llvm::MDNode::get(C, Ops));
3251       };
3252 
3253   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3254       DeviceGlobalVarMetadataEmitter);
3255 
3256   for (const auto &E : OrderedEntries) {
3257     assert(std::get<0>(E) && "All ordered entries must exist!");
3258     if (const auto *CE =
3259             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3260                 std::get<0>(E))) {
3261       if (!CE->getID() || !CE->getAddress()) {
3262         // Do not blame the entry if the parent funtion is not emitted.
3263         StringRef FnName = ParentFunctions[CE->getOrder()];
3264         if (!CGM.GetGlobalValue(FnName))
3265           continue;
3266         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3267             DiagnosticsEngine::Error,
3268             "Offloading entry for target region in %0 is incorrect: either the "
3269             "address or the ID is invalid.");
3270         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3271         continue;
3272       }
3273       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3274                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3275     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3276                                              OffloadEntryInfoDeviceGlobalVar>(
3277                    std::get<0>(E))) {
3278       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3279           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3280               CE->getFlags());
3281       switch (Flags) {
3282       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3283         if (CGM.getLangOpts().OpenMPIsDevice &&
3284             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3285           continue;
3286         if (!CE->getAddress()) {
3287           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3288               DiagnosticsEngine::Error, "Offloading entry for declare target "
3289                                         "variable %0 is incorrect: the "
3290                                         "address is invalid.");
3291           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3292           continue;
3293         }
3294         // The vaiable has no definition - no need to add the entry.
3295         if (CE->getVarSize().isZero())
3296           continue;
3297         break;
3298       }
3299       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3300         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3301                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3302                "Declaret target link address is set.");
3303         if (CGM.getLangOpts().OpenMPIsDevice)
3304           continue;
3305         if (!CE->getAddress()) {
3306           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3307               DiagnosticsEngine::Error,
3308               "Offloading entry for declare target variable is incorrect: the "
3309               "address is invalid.");
3310           CGM.getDiags().Report(DiagID);
3311           continue;
3312         }
3313         break;
3314       }
3315       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3316                          CE->getVarSize().getQuantity(), Flags,
3317                          CE->getLinkage());
3318     } else {
3319       llvm_unreachable("Unsupported entry kind.");
3320     }
3321   }
3322 }
3323 
3324 /// Loads all the offload entries information from the host IR
3325 /// metadata.
3326 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3327   // If we are in target mode, load the metadata from the host IR. This code has
3328   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3329 
3330   if (!CGM.getLangOpts().OpenMPIsDevice)
3331     return;
3332 
3333   if (CGM.getLangOpts().OMPHostIRFile.empty())
3334     return;
3335 
3336   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3337   if (auto EC = Buf.getError()) {
3338     CGM.getDiags().Report(diag::err_cannot_open_file)
3339         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3340     return;
3341   }
3342 
3343   llvm::LLVMContext C;
3344   auto ME = expectedToErrorOrAndEmitErrors(
3345       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3346 
3347   if (auto EC = ME.getError()) {
3348     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3349         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3350     CGM.getDiags().Report(DiagID)
3351         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3352     return;
3353   }
3354 
3355   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3356   if (!MD)
3357     return;
3358 
3359   for (llvm::MDNode *MN : MD->operands()) {
3360     auto &&GetMDInt = [MN](unsigned Idx) {
3361       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3362       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3363     };
3364 
3365     auto &&GetMDString = [MN](unsigned Idx) {
3366       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3367       return V->getString();
3368     };
3369 
3370     switch (GetMDInt(0)) {
3371     default:
3372       llvm_unreachable("Unexpected metadata!");
3373       break;
3374     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375         OffloadingEntryInfoTargetRegion:
3376       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3377           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3378           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3379           /*Order=*/GetMDInt(5));
3380       break;
3381     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3382         OffloadingEntryInfoDeviceGlobalVar:
3383       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3384           /*MangledName=*/GetMDString(1),
3385           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3386               /*Flags=*/GetMDInt(2)),
3387           /*Order=*/GetMDInt(3));
3388       break;
3389     }
3390   }
3391 }
3392 
3393 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3394   if (!KmpRoutineEntryPtrTy) {
3395     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3396     ASTContext &C = CGM.getContext();
3397     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3398     FunctionProtoType::ExtProtoInfo EPI;
3399     KmpRoutineEntryPtrQTy = C.getPointerType(
3400         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3401     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3402   }
3403 }
3404 
3405 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3406   // Make sure the type of the entry is already created. This is the type we
3407   // have to create:
3408   // struct __tgt_offload_entry{
3409   //   void      *addr;       // Pointer to the offload entry info.
3410   //                          // (function or global)
3411   //   char      *name;       // Name of the function or global.
3412   //   size_t     size;       // Size of the entry info (0 if it a function).
3413   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3414   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3415   // };
3416   if (TgtOffloadEntryQTy.isNull()) {
3417     ASTContext &C = CGM.getContext();
3418     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3419     RD->startDefinition();
3420     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3421     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3422     addFieldToRecordDecl(C, RD, C.getSizeType());
3423     addFieldToRecordDecl(
3424         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3425     addFieldToRecordDecl(
3426         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3427     RD->completeDefinition();
3428     RD->addAttr(PackedAttr::CreateImplicit(C));
3429     TgtOffloadEntryQTy = C.getRecordType(RD);
3430   }
3431   return TgtOffloadEntryQTy;
3432 }
3433 
3434 namespace {
3435 struct PrivateHelpersTy {
3436   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3437                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3438       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3439         PrivateElemInit(PrivateElemInit) {}
3440   const Expr *OriginalRef = nullptr;
3441   const VarDecl *Original = nullptr;
3442   const VarDecl *PrivateCopy = nullptr;
3443   const VarDecl *PrivateElemInit = nullptr;
3444 };
3445 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3446 } // anonymous namespace
3447 
3448 static RecordDecl *
3449 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3450   if (!Privates.empty()) {
3451     ASTContext &C = CGM.getContext();
3452     // Build struct .kmp_privates_t. {
3453     //         /*  private vars  */
3454     //       };
3455     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3456     RD->startDefinition();
3457     for (const auto &Pair : Privates) {
3458       const VarDecl *VD = Pair.second.Original;
3459       QualType Type = VD->getType().getNonReferenceType();
3460       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3461       if (VD->hasAttrs()) {
3462         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3463              E(VD->getAttrs().end());
3464              I != E; ++I)
3465           FD->addAttr(*I);
3466       }
3467     }
3468     RD->completeDefinition();
3469     return RD;
3470   }
3471   return nullptr;
3472 }
3473 
3474 static RecordDecl *
3475 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3476                          QualType KmpInt32Ty,
3477                          QualType KmpRoutineEntryPointerQTy) {
3478   ASTContext &C = CGM.getContext();
3479   // Build struct kmp_task_t {
3480   //         void *              shareds;
3481   //         kmp_routine_entry_t routine;
3482   //         kmp_int32           part_id;
3483   //         kmp_cmplrdata_t data1;
3484   //         kmp_cmplrdata_t data2;
3485   // For taskloops additional fields:
3486   //         kmp_uint64          lb;
3487   //         kmp_uint64          ub;
3488   //         kmp_int64           st;
3489   //         kmp_int32           liter;
3490   //         void *              reductions;
3491   //       };
3492   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3493   UD->startDefinition();
3494   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3495   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3496   UD->completeDefinition();
3497   QualType KmpCmplrdataTy = C.getRecordType(UD);
3498   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3499   RD->startDefinition();
3500   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3501   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3502   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3503   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3504   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3505   if (isOpenMPTaskLoopDirective(Kind)) {
3506     QualType KmpUInt64Ty =
3507         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3508     QualType KmpInt64Ty =
3509         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3510     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3511     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3512     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3513     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3514     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3515   }
3516   RD->completeDefinition();
3517   return RD;
3518 }
3519 
3520 static RecordDecl *
3521 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3522                                      ArrayRef<PrivateDataTy> Privates) {
3523   ASTContext &C = CGM.getContext();
3524   // Build struct kmp_task_t_with_privates {
3525   //         kmp_task_t task_data;
3526   //         .kmp_privates_t. privates;
3527   //       };
3528   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3529   RD->startDefinition();
3530   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3531   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3532     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3533   RD->completeDefinition();
3534   return RD;
3535 }
3536 
3537 /// Emit a proxy function which accepts kmp_task_t as the second
3538 /// argument.
3539 /// \code
3540 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3541 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3542 ///   For taskloops:
3543 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3544 ///   tt->reductions, tt->shareds);
3545 ///   return 0;
3546 /// }
3547 /// \endcode
3548 static llvm::Function *
3549 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3550                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3551                       QualType KmpTaskTWithPrivatesPtrQTy,
3552                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3553                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3554                       llvm::Value *TaskPrivatesMap) {
3555   ASTContext &C = CGM.getContext();
3556   FunctionArgList Args;
3557   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3558                             ImplicitParamDecl::Other);
3559   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3560                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3561                                 ImplicitParamDecl::Other);
3562   Args.push_back(&GtidArg);
3563   Args.push_back(&TaskTypeArg);
3564   const auto &TaskEntryFnInfo =
3565       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3566   llvm::FunctionType *TaskEntryTy =
3567       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3568   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3569   auto *TaskEntry = llvm::Function::Create(
3570       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3571   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3572   TaskEntry->setDoesNotRecurse();
3573   CodeGenFunction CGF(CGM);
3574   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3575                     Loc, Loc);
3576 
3577   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3578   // tt,
3579   // For taskloops:
3580   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581   // tt->task_data.shareds);
3582   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3583       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3584   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3585       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3586       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3587   const auto *KmpTaskTWithPrivatesQTyRD =
3588       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3589   LValue Base =
3590       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3591   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3592   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3593   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3594   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3595 
3596   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3597   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3598   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3599       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3600       CGF.ConvertTypeForMem(SharedsPtrTy));
3601 
3602   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3603   llvm::Value *PrivatesParam;
3604   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3605     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3606     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3607         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3608   } else {
3609     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3610   }
3611 
3612   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3613                                TaskPrivatesMap,
3614                                CGF.Builder
3615                                    .CreatePointerBitCastOrAddrSpaceCast(
3616                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3617                                    .getPointer()};
3618   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3619                                           std::end(CommonArgs));
3620   if (isOpenMPTaskLoopDirective(Kind)) {
3621     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3622     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3623     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3624     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3625     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3626     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3627     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3628     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3629     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3630     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3631     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3632     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3633     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3634     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3635     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3636     CallArgs.push_back(LBParam);
3637     CallArgs.push_back(UBParam);
3638     CallArgs.push_back(StParam);
3639     CallArgs.push_back(LIParam);
3640     CallArgs.push_back(RParam);
3641   }
3642   CallArgs.push_back(SharedsParam);
3643 
3644   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3645                                                   CallArgs);
3646   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3647                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3648   CGF.FinishFunction();
3649   return TaskEntry;
3650 }
3651 
3652 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3653                                             SourceLocation Loc,
3654                                             QualType KmpInt32Ty,
3655                                             QualType KmpTaskTWithPrivatesPtrQTy,
3656                                             QualType KmpTaskTWithPrivatesQTy) {
3657   ASTContext &C = CGM.getContext();
3658   FunctionArgList Args;
3659   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3660                             ImplicitParamDecl::Other);
3661   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3662                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3663                                 ImplicitParamDecl::Other);
3664   Args.push_back(&GtidArg);
3665   Args.push_back(&TaskTypeArg);
3666   const auto &DestructorFnInfo =
3667       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3668   llvm::FunctionType *DestructorFnTy =
3669       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3670   std::string Name =
3671       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3672   auto *DestructorFn =
3673       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3674                              Name, &CGM.getModule());
3675   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3676                                     DestructorFnInfo);
3677   DestructorFn->setDoesNotRecurse();
3678   CodeGenFunction CGF(CGM);
3679   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3680                     Args, Loc, Loc);
3681 
3682   LValue Base = CGF.EmitLoadOfPointerLValue(
3683       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3684       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3685   const auto *KmpTaskTWithPrivatesQTyRD =
3686       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3687   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3688   Base = CGF.EmitLValueForField(Base, *FI);
3689   for (const auto *Field :
3690        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3691     if (QualType::DestructionKind DtorKind =
3692             Field->getType().isDestructedType()) {
3693       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3694       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3695     }
3696   }
3697   CGF.FinishFunction();
3698   return DestructorFn;
3699 }
3700 
3701 /// Emit a privates mapping function for correct handling of private and
3702 /// firstprivate variables.
3703 /// \code
3704 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3705 /// **noalias priv1,...,  <tyn> **noalias privn) {
3706 ///   *priv1 = &.privates.priv1;
3707 ///   ...;
3708 ///   *privn = &.privates.privn;
3709 /// }
3710 /// \endcode
3711 static llvm::Value *
3712 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3713                                ArrayRef<const Expr *> PrivateVars,
3714                                ArrayRef<const Expr *> FirstprivateVars,
3715                                ArrayRef<const Expr *> LastprivateVars,
3716                                QualType PrivatesQTy,
3717                                ArrayRef<PrivateDataTy> Privates) {
3718   ASTContext &C = CGM.getContext();
3719   FunctionArgList Args;
3720   ImplicitParamDecl TaskPrivatesArg(
3721       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3722       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3723       ImplicitParamDecl::Other);
3724   Args.push_back(&TaskPrivatesArg);
3725   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3726   unsigned Counter = 1;
3727   for (const Expr *E : PrivateVars) {
3728     Args.push_back(ImplicitParamDecl::Create(
3729         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730         C.getPointerType(C.getPointerType(E->getType()))
3731             .withConst()
3732             .withRestrict(),
3733         ImplicitParamDecl::Other));
3734     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3735     PrivateVarsPos[VD] = Counter;
3736     ++Counter;
3737   }
3738   for (const Expr *E : FirstprivateVars) {
3739     Args.push_back(ImplicitParamDecl::Create(
3740         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3741         C.getPointerType(C.getPointerType(E->getType()))
3742             .withConst()
3743             .withRestrict(),
3744         ImplicitParamDecl::Other));
3745     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3746     PrivateVarsPos[VD] = Counter;
3747     ++Counter;
3748   }
3749   for (const Expr *E : LastprivateVars) {
3750     Args.push_back(ImplicitParamDecl::Create(
3751         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3752         C.getPointerType(C.getPointerType(E->getType()))
3753             .withConst()
3754             .withRestrict(),
3755         ImplicitParamDecl::Other));
3756     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3757     PrivateVarsPos[VD] = Counter;
3758     ++Counter;
3759   }
3760   const auto &TaskPrivatesMapFnInfo =
3761       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3762   llvm::FunctionType *TaskPrivatesMapTy =
3763       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3764   std::string Name =
3765       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3766   auto *TaskPrivatesMap = llvm::Function::Create(
3767       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3768       &CGM.getModule());
3769   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3770                                     TaskPrivatesMapFnInfo);
3771   if (CGM.getLangOpts().Optimize) {
3772     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3773     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3774     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3775   }
3776   CodeGenFunction CGF(CGM);
3777   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3778                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3779 
3780   // *privi = &.privates.privi;
3781   LValue Base = CGF.EmitLoadOfPointerLValue(
3782       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3783       TaskPrivatesArg.getType()->castAs<PointerType>());
3784   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3785   Counter = 0;
3786   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3787     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3788     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3789     LValue RefLVal =
3790         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3791     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3792         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3793     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3794     ++Counter;
3795   }
3796   CGF.FinishFunction();
3797   return TaskPrivatesMap;
3798 }
3799 
3800 /// Emit initialization for private variables in task-based directives.
3801 static void emitPrivatesInit(CodeGenFunction &CGF,
3802                              const OMPExecutableDirective &D,
3803                              Address KmpTaskSharedsPtr, LValue TDBase,
3804                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3805                              QualType SharedsTy, QualType SharedsPtrTy,
3806                              const OMPTaskDataTy &Data,
3807                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3808   ASTContext &C = CGF.getContext();
3809   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3810   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3811   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3812                                  ? OMPD_taskloop
3813                                  : OMPD_task;
3814   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3815   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3816   LValue SrcBase;
3817   bool IsTargetTask =
3818       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3819       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3820   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3821   // PointersArray and SizesArray. The original variables for these arrays are
3822   // not captured and we get their addresses explicitly.
3823   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3824       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3825     SrcBase = CGF.MakeAddrLValue(
3826         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3827             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3828         SharedsTy);
3829   }
3830   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3831   for (const PrivateDataTy &Pair : Privates) {
3832     const VarDecl *VD = Pair.second.PrivateCopy;
3833     const Expr *Init = VD->getAnyInitializer();
3834     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3835                              !CGF.isTrivialInitializer(Init)))) {
3836       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3837       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3838         const VarDecl *OriginalVD = Pair.second.Original;
3839         // Check if the variable is the target-based BasePointersArray,
3840         // PointersArray or SizesArray.
3841         LValue SharedRefLValue;
3842         QualType Type = PrivateLValue.getType();
3843         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3844         if (IsTargetTask && !SharedField) {
3845           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3846                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3847                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3848                          ->getNumParams() == 0 &&
3849                  isa<TranslationUnitDecl>(
3850                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3851                          ->getDeclContext()) &&
3852                  "Expected artificial target data variable.");
3853           SharedRefLValue =
3854               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3855         } else if (ForDup) {
3856           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3857           SharedRefLValue = CGF.MakeAddrLValue(
3858               Address(SharedRefLValue.getPointer(CGF),
3859                       C.getDeclAlign(OriginalVD)),
3860               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3861               SharedRefLValue.getTBAAInfo());
3862         } else if (CGF.LambdaCaptureFields.count(
3863                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3864                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3865           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3866         } else {
3867           // Processing for implicitly captured variables.
3868           InlinedOpenMPRegionRAII Region(
3869               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3870               /*HasCancel=*/false);
3871           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3872         }
3873         if (Type->isArrayType()) {
3874           // Initialize firstprivate array.
3875           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3876             // Perform simple memcpy.
3877             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3878           } else {
3879             // Initialize firstprivate array using element-by-element
3880             // initialization.
3881             CGF.EmitOMPAggregateAssign(
3882                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3883                 Type,
3884                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3885                                                   Address SrcElement) {
3886                   // Clean up any temporaries needed by the initialization.
3887                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3888                   InitScope.addPrivate(
3889                       Elem, [SrcElement]() -> Address { return SrcElement; });
3890                   (void)InitScope.Privatize();
3891                   // Emit initialization for single element.
3892                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3893                       CGF, &CapturesInfo);
3894                   CGF.EmitAnyExprToMem(Init, DestElement,
3895                                        Init->getType().getQualifiers(),
3896                                        /*IsInitializer=*/false);
3897                 });
3898           }
3899         } else {
3900           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3901           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3902             return SharedRefLValue.getAddress(CGF);
3903           });
3904           (void)InitScope.Privatize();
3905           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3906           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3907                              /*capturedByInit=*/false);
3908         }
3909       } else {
3910         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3911       }
3912     }
3913     ++FI;
3914   }
3915 }
3916 
3917 /// Check if duplication function is required for taskloops.
3918 static bool checkInitIsRequired(CodeGenFunction &CGF,
3919                                 ArrayRef<PrivateDataTy> Privates) {
3920   bool InitRequired = false;
3921   for (const PrivateDataTy &Pair : Privates) {
3922     const VarDecl *VD = Pair.second.PrivateCopy;
3923     const Expr *Init = VD->getAnyInitializer();
3924     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3925                                     !CGF.isTrivialInitializer(Init));
3926     if (InitRequired)
3927       break;
3928   }
3929   return InitRequired;
3930 }
3931 
3932 
3933 /// Emit task_dup function (for initialization of
3934 /// private/firstprivate/lastprivate vars and last_iter flag)
3935 /// \code
3936 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3937 /// lastpriv) {
3938 /// // setup lastprivate flag
3939 ///    task_dst->last = lastpriv;
3940 /// // could be constructor calls here...
3941 /// }
3942 /// \endcode
3943 static llvm::Value *
3944 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3945                     const OMPExecutableDirective &D,
3946                     QualType KmpTaskTWithPrivatesPtrQTy,
3947                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3948                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3949                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3950                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3951   ASTContext &C = CGM.getContext();
3952   FunctionArgList Args;
3953   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3954                            KmpTaskTWithPrivatesPtrQTy,
3955                            ImplicitParamDecl::Other);
3956   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3957                            KmpTaskTWithPrivatesPtrQTy,
3958                            ImplicitParamDecl::Other);
3959   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3960                                 ImplicitParamDecl::Other);
3961   Args.push_back(&DstArg);
3962   Args.push_back(&SrcArg);
3963   Args.push_back(&LastprivArg);
3964   const auto &TaskDupFnInfo =
3965       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3966   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3967   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3968   auto *TaskDup = llvm::Function::Create(
3969       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3970   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3971   TaskDup->setDoesNotRecurse();
3972   CodeGenFunction CGF(CGM);
3973   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3974                     Loc);
3975 
3976   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3977       CGF.GetAddrOfLocalVar(&DstArg),
3978       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3979   // task_dst->liter = lastpriv;
3980   if (WithLastIter) {
3981     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3982     LValue Base = CGF.EmitLValueForField(
3983         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3984     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3985     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3986         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3987     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3988   }
3989 
3990   // Emit initial values for private copies (if any).
3991   assert(!Privates.empty());
3992   Address KmpTaskSharedsPtr = Address::invalid();
3993   if (!Data.FirstprivateVars.empty()) {
3994     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3995         CGF.GetAddrOfLocalVar(&SrcArg),
3996         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3997     LValue Base = CGF.EmitLValueForField(
3998         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3999     KmpTaskSharedsPtr = Address(
4000         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4001                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4002                                                   KmpTaskTShareds)),
4003                              Loc),
4004         CGM.getNaturalTypeAlignment(SharedsTy));
4005   }
4006   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4007                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4008   CGF.FinishFunction();
4009   return TaskDup;
4010 }
4011 
4012 /// Checks if destructor function is required to be generated.
4013 /// \return true if cleanups are required, false otherwise.
4014 static bool
4015 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4016   bool NeedsCleanup = false;
4017   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4018   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4019   for (const FieldDecl *FD : PrivateRD->fields()) {
4020     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4021     if (NeedsCleanup)
4022       break;
4023   }
4024   return NeedsCleanup;
4025 }
4026 
4027 namespace {
4028 /// Loop generator for OpenMP iterator expression.
4029 class OMPIteratorGeneratorScope final
4030     : public CodeGenFunction::OMPPrivateScope {
4031   CodeGenFunction &CGF;
4032   const OMPIteratorExpr *E = nullptr;
4033   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4034   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4035   OMPIteratorGeneratorScope() = delete;
4036   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4037 
4038 public:
4039   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4040       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4041     if (!E)
4042       return;
4043     SmallVector<llvm::Value *, 4> Uppers;
4044     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4045       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4046       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4047       addPrivate(VD, [&CGF, VD]() {
4048         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4049       });
4050       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4051       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4052         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4053                                  "counter.addr");
4054       });
4055     }
4056     Privatize();
4057 
4058     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4059       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4060       LValue CLVal =
4061           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4062                              HelperData.CounterVD->getType());
4063       // Counter = 0;
4064       CGF.EmitStoreOfScalar(
4065           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4066           CLVal);
4067       CodeGenFunction::JumpDest &ContDest =
4068           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4069       CodeGenFunction::JumpDest &ExitDest =
4070           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4071       // N = <number-of_iterations>;
4072       llvm::Value *N = Uppers[I];
4073       // cont:
4074       // if (Counter < N) goto body; else goto exit;
4075       CGF.EmitBlock(ContDest.getBlock());
4076       auto *CVal =
4077           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4078       llvm::Value *Cmp =
4079           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4080               ? CGF.Builder.CreateICmpSLT(CVal, N)
4081               : CGF.Builder.CreateICmpULT(CVal, N);
4082       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4083       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4084       // body:
4085       CGF.EmitBlock(BodyBB);
4086       // Iteri = Begini + Counter * Stepi;
4087       CGF.EmitIgnoredExpr(HelperData.Update);
4088     }
4089   }
4090   ~OMPIteratorGeneratorScope() {
4091     if (!E)
4092       return;
4093     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4094       // Counter = Counter + 1;
4095       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4096       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4097       // goto cont;
4098       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4099       // exit:
4100       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4101     }
4102   }
4103 };
4104 } // namespace
4105 
4106 static std::pair<llvm::Value *, llvm::Value *>
4107 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4108   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4109   llvm::Value *Addr;
4110   if (OASE) {
4111     const Expr *Base = OASE->getBase();
4112     Addr = CGF.EmitScalarExpr(Base);
4113   } else {
4114     Addr = CGF.EmitLValue(E).getPointer(CGF);
4115   }
4116   llvm::Value *SizeVal;
4117   QualType Ty = E->getType();
4118   if (OASE) {
4119     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4120     for (const Expr *SE : OASE->getDimensions()) {
4121       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4122       Sz = CGF.EmitScalarConversion(
4123           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4124       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4125     }
4126   } else if (const auto *ASE =
4127                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4128     LValue UpAddrLVal =
4129         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4130     llvm::Value *UpAddr =
4131         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4132     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4133     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4134     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4135   } else {
4136     SizeVal = CGF.getTypeSize(Ty);
4137   }
4138   return std::make_pair(Addr, SizeVal);
4139 }
4140 
4141 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4142 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4143   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4144   if (KmpTaskAffinityInfoTy.isNull()) {
4145     RecordDecl *KmpAffinityInfoRD =
4146         C.buildImplicitRecord("kmp_task_affinity_info_t");
4147     KmpAffinityInfoRD->startDefinition();
4148     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4149     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4150     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4151     KmpAffinityInfoRD->completeDefinition();
4152     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4153   }
4154 }
4155 
4156 CGOpenMPRuntime::TaskResultTy
4157 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4158                               const OMPExecutableDirective &D,
4159                               llvm::Function *TaskFunction, QualType SharedsTy,
4160                               Address Shareds, const OMPTaskDataTy &Data) {
4161   ASTContext &C = CGM.getContext();
4162   llvm::SmallVector<PrivateDataTy, 4> Privates;
4163   // Aggregate privates and sort them by the alignment.
4164   const auto *I = Data.PrivateCopies.begin();
4165   for (const Expr *E : Data.PrivateVars) {
4166     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4167     Privates.emplace_back(
4168         C.getDeclAlign(VD),
4169         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4170                          /*PrivateElemInit=*/nullptr));
4171     ++I;
4172   }
4173   I = Data.FirstprivateCopies.begin();
4174   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4175   for (const Expr *E : Data.FirstprivateVars) {
4176     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4177     Privates.emplace_back(
4178         C.getDeclAlign(VD),
4179         PrivateHelpersTy(
4180             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4181             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4182     ++I;
4183     ++IElemInitRef;
4184   }
4185   I = Data.LastprivateCopies.begin();
4186   for (const Expr *E : Data.LastprivateVars) {
4187     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4188     Privates.emplace_back(
4189         C.getDeclAlign(VD),
4190         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4191                          /*PrivateElemInit=*/nullptr));
4192     ++I;
4193   }
4194   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4195     return L.first > R.first;
4196   });
4197   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4198   // Build type kmp_routine_entry_t (if not built yet).
4199   emitKmpRoutineEntryT(KmpInt32Ty);
4200   // Build type kmp_task_t (if not built yet).
4201   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4202     if (SavedKmpTaskloopTQTy.isNull()) {
4203       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4204           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4205     }
4206     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4207   } else {
4208     assert((D.getDirectiveKind() == OMPD_task ||
4209             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4210             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4211            "Expected taskloop, task or target directive");
4212     if (SavedKmpTaskTQTy.isNull()) {
4213       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4214           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4215     }
4216     KmpTaskTQTy = SavedKmpTaskTQTy;
4217   }
4218   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4219   // Build particular struct kmp_task_t for the given task.
4220   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4221       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4222   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4223   QualType KmpTaskTWithPrivatesPtrQTy =
4224       C.getPointerType(KmpTaskTWithPrivatesQTy);
4225   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4226   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4227       KmpTaskTWithPrivatesTy->getPointerTo();
4228   llvm::Value *KmpTaskTWithPrivatesTySize =
4229       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4230   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4231 
4232   // Emit initial values for private copies (if any).
4233   llvm::Value *TaskPrivatesMap = nullptr;
4234   llvm::Type *TaskPrivatesMapTy =
4235       std::next(TaskFunction->arg_begin(), 3)->getType();
4236   if (!Privates.empty()) {
4237     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4238     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4239         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4240         FI->getType(), Privates);
4241     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4242         TaskPrivatesMap, TaskPrivatesMapTy);
4243   } else {
4244     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4245         cast<llvm::PointerType>(TaskPrivatesMapTy));
4246   }
4247   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4248   // kmp_task_t *tt);
4249   llvm::Function *TaskEntry = emitProxyTaskFunction(
4250       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4251       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4252       TaskPrivatesMap);
4253 
4254   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4255   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4256   // kmp_routine_entry_t *task_entry);
4257   // Task flags. Format is taken from
4258   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4259   // description of kmp_tasking_flags struct.
4260   enum {
4261     TiedFlag = 0x1,
4262     FinalFlag = 0x2,
4263     DestructorsFlag = 0x8,
4264     PriorityFlag = 0x20,
4265     DetachableFlag = 0x40,
4266   };
4267   unsigned Flags = Data.Tied ? TiedFlag : 0;
4268   bool NeedsCleanup = false;
4269   if (!Privates.empty()) {
4270     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4271     if (NeedsCleanup)
4272       Flags = Flags | DestructorsFlag;
4273   }
4274   if (Data.Priority.getInt())
4275     Flags = Flags | PriorityFlag;
4276   if (D.hasClausesOfKind<OMPDetachClause>())
4277     Flags = Flags | DetachableFlag;
4278   llvm::Value *TaskFlags =
4279       Data.Final.getPointer()
4280           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4281                                      CGF.Builder.getInt32(FinalFlag),
4282                                      CGF.Builder.getInt32(/*C=*/0))
4283           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4284   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4285   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4286   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4287       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4288       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4289           TaskEntry, KmpRoutineEntryPtrTy)};
4290   llvm::Value *NewTask;
4291   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4292     // Check if we have any device clause associated with the directive.
4293     const Expr *Device = nullptr;
4294     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4295       Device = C->getDevice();
4296     // Emit device ID if any otherwise use default value.
4297     llvm::Value *DeviceID;
4298     if (Device)
4299       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4300                                            CGF.Int64Ty, /*isSigned=*/true);
4301     else
4302       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4303     AllocArgs.push_back(DeviceID);
4304     NewTask = CGF.EmitRuntimeCall(
4305         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4306             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4307         AllocArgs);
4308   } else {
4309     NewTask =
4310         CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4311                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4312                             AllocArgs);
4313   }
4314   // Emit detach clause initialization.
4315   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4316   // task_descriptor);
4317   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4318     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4319     LValue EvtLVal = CGF.EmitLValue(Evt);
4320 
4321     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4322     // int gtid, kmp_task_t *task);
4323     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4324     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4325     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4326     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4327         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4328             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4329         {Loc, Tid, NewTask});
4330     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4331                                       Evt->getExprLoc());
4332     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4333   }
4334   // Process affinity clauses.
4335   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4336     // Process list of affinity data.
4337     ASTContext &C = CGM.getContext();
4338     Address AffinitiesArray = Address::invalid();
4339     // Calculate number of elements to form the array of affinity data.
4340     llvm::Value *NumOfElements = nullptr;
4341     unsigned NumAffinities = 0;
4342     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4343       if (const Expr *Modifier = C->getModifier()) {
4344         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4345         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4346           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4347           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4348           NumOfElements =
4349               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4350         }
4351       } else {
4352         NumAffinities += C->varlist_size();
4353       }
4354     }
4355     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4356     // Fields ids in kmp_task_affinity_info record.
4357     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4358 
4359     QualType KmpTaskAffinityInfoArrayTy;
4360     if (NumOfElements) {
4361       NumOfElements = CGF.Builder.CreateNUWAdd(
4362           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4363       OpaqueValueExpr OVE(
4364           Loc,
4365           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4366           VK_RValue);
4367       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4368                                                     RValue::get(NumOfElements));
4369       KmpTaskAffinityInfoArrayTy =
4370           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4371                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4372       // Properly emit variable-sized array.
4373       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4374                                            ImplicitParamDecl::Other);
4375       CGF.EmitVarDecl(*PD);
4376       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4377       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4378                                                 /*isSigned=*/false);
4379     } else {
4380       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4381           KmpTaskAffinityInfoTy,
4382           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4383           ArrayType::Normal, /*IndexTypeQuals=*/0);
4384       AffinitiesArray =
4385           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4386       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4387       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4388                                              /*isSigned=*/false);
4389     }
4390 
4391     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4392     // Fill array by elements without iterators.
4393     unsigned Pos = 0;
4394     bool HasIterator = false;
4395     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4396       if (C->getModifier()) {
4397         HasIterator = true;
4398         continue;
4399       }
4400       for (const Expr *E : C->varlists()) {
4401         llvm::Value *Addr;
4402         llvm::Value *Size;
4403         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404         LValue Base =
4405             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4406                                KmpTaskAffinityInfoTy);
4407         // affs[i].base_addr = &<Affinities[i].second>;
4408         LValue BaseAddrLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411                               BaseAddrLVal);
4412         // affs[i].len = sizeof(<Affinities[i].second>);
4413         LValue LenLVal = CGF.EmitLValueForField(
4414             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415         CGF.EmitStoreOfScalar(Size, LenLVal);
4416         ++Pos;
4417       }
4418     }
4419     LValue PosLVal;
4420     if (HasIterator) {
4421       PosLVal = CGF.MakeAddrLValue(
4422           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4423           C.getSizeType());
4424       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4425     }
4426     // Process elements with iterators.
4427     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4428       const Expr *Modifier = C->getModifier();
4429       if (!Modifier)
4430         continue;
4431       OMPIteratorGeneratorScope IteratorScope(
4432           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4433       for (const Expr *E : C->varlists()) {
4434         llvm::Value *Addr;
4435         llvm::Value *Size;
4436         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4437         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4438         LValue Base = CGF.MakeAddrLValue(
4439             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4440                     AffinitiesArray.getAlignment()),
4441             KmpTaskAffinityInfoTy);
4442         // affs[i].base_addr = &<Affinities[i].second>;
4443         LValue BaseAddrLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4445         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4446                               BaseAddrLVal);
4447         // affs[i].len = sizeof(<Affinities[i].second>);
4448         LValue LenLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4450         CGF.EmitStoreOfScalar(Size, LenLVal);
4451         Idx = CGF.Builder.CreateNUWAdd(
4452             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4453         CGF.EmitStoreOfScalar(Idx, PosLVal);
4454       }
4455     }
4456     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4457     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4458     // naffins, kmp_task_affinity_info_t *affin_list);
4459     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4460     llvm::Value *GTid = getThreadID(CGF, Loc);
4461     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4462         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4463     // FIXME: Emit the function and ignore its result for now unless the
4464     // runtime function is properly implemented.
4465     (void)CGF.EmitRuntimeCall(
4466         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4467             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4468         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4469   }
4470   llvm::Value *NewTaskNewTaskTTy =
4471       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4472           NewTask, KmpTaskTWithPrivatesPtrTy);
4473   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4474                                                KmpTaskTWithPrivatesQTy);
4475   LValue TDBase =
4476       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4477   // Fill the data in the resulting kmp_task_t record.
4478   // Copy shareds if there are any.
4479   Address KmpTaskSharedsPtr = Address::invalid();
4480   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4481     KmpTaskSharedsPtr =
4482         Address(CGF.EmitLoadOfScalar(
4483                     CGF.EmitLValueForField(
4484                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4485                                            KmpTaskTShareds)),
4486                     Loc),
4487                 CGM.getNaturalTypeAlignment(SharedsTy));
4488     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4489     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4490     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4491   }
4492   // Emit initial values for private copies (if any).
4493   TaskResultTy Result;
4494   if (!Privates.empty()) {
4495     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4496                      SharedsTy, SharedsPtrTy, Data, Privates,
4497                      /*ForDup=*/false);
4498     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4499         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4500       Result.TaskDupFn = emitTaskDupFunction(
4501           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4502           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4503           /*WithLastIter=*/!Data.LastprivateVars.empty());
4504     }
4505   }
4506   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4507   enum { Priority = 0, Destructors = 1 };
4508   // Provide pointer to function with destructors for privates.
4509   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4510   const RecordDecl *KmpCmplrdataUD =
4511       (*FI)->getType()->getAsUnionType()->getDecl();
4512   if (NeedsCleanup) {
4513     llvm::Value *DestructorFn = emitDestructorsFunction(
4514         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4515         KmpTaskTWithPrivatesQTy);
4516     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4517     LValue DestructorsLV = CGF.EmitLValueForField(
4518         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4519     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4520                               DestructorFn, KmpRoutineEntryPtrTy),
4521                           DestructorsLV);
4522   }
4523   // Set priority.
4524   if (Data.Priority.getInt()) {
4525     LValue Data2LV = CGF.EmitLValueForField(
4526         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4527     LValue PriorityLV = CGF.EmitLValueForField(
4528         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4529     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4530   }
4531   Result.NewTask = NewTask;
4532   Result.TaskEntry = TaskEntry;
4533   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4534   Result.TDBase = TDBase;
4535   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4536   return Result;
4537 }
4538 
4539 namespace {
4540 /// Dependence kind for RTL.
4541 enum RTLDependenceKindTy {
4542   DepIn = 0x01,
4543   DepInOut = 0x3,
4544   DepMutexInOutSet = 0x4
4545 };
4546 /// Fields ids in kmp_depend_info record.
4547 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4548 } // namespace
4549 
4550 /// Translates internal dependency kind into the runtime kind.
4551 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4552   RTLDependenceKindTy DepKind;
4553   switch (K) {
4554   case OMPC_DEPEND_in:
4555     DepKind = DepIn;
4556     break;
4557   // Out and InOut dependencies must use the same code.
4558   case OMPC_DEPEND_out:
4559   case OMPC_DEPEND_inout:
4560     DepKind = DepInOut;
4561     break;
4562   case OMPC_DEPEND_mutexinoutset:
4563     DepKind = DepMutexInOutSet;
4564     break;
4565   case OMPC_DEPEND_source:
4566   case OMPC_DEPEND_sink:
4567   case OMPC_DEPEND_depobj:
4568   case OMPC_DEPEND_unknown:
4569     llvm_unreachable("Unknown task dependence type");
4570   }
4571   return DepKind;
4572 }
4573 
4574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4575 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4576                            QualType &FlagsTy) {
4577   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4578   if (KmpDependInfoTy.isNull()) {
4579     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4580     KmpDependInfoRD->startDefinition();
4581     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4582     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4583     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4584     KmpDependInfoRD->completeDefinition();
4585     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4586   }
4587 }
4588 
4589 std::pair<llvm::Value *, LValue>
4590 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4591                                    SourceLocation Loc) {
4592   ASTContext &C = CGM.getContext();
4593   QualType FlagsTy;
4594   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4595   RecordDecl *KmpDependInfoRD =
4596       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4597   LValue Base = CGF.EmitLoadOfPointerLValue(
4598       DepobjLVal.getAddress(CGF),
4599       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4600   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4601   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4602           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4603   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4604                             Base.getTBAAInfo());
4605   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4606       Addr.getPointer(),
4607       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4608   LValue NumDepsBase = CGF.MakeAddrLValue(
4609       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4610       Base.getBaseInfo(), Base.getTBAAInfo());
4611   // NumDeps = deps[i].base_addr;
4612   LValue BaseAddrLVal = CGF.EmitLValueForField(
4613       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4614   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4615   return std::make_pair(NumDeps, Base);
4616 }
4617 
4618 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4619                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4620                            const OMPTaskDataTy::DependData &Data,
4621                            Address DependenciesArray) {
4622   CodeGenModule &CGM = CGF.CGM;
4623   ASTContext &C = CGM.getContext();
4624   QualType FlagsTy;
4625   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4626   RecordDecl *KmpDependInfoRD =
4627       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4628   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4629 
4630   OMPIteratorGeneratorScope IteratorScope(
4631       CGF, cast_or_null<OMPIteratorExpr>(
4632                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4633                                  : nullptr));
4634   for (const Expr *E : Data.DepExprs) {
4635     llvm::Value *Addr;
4636     llvm::Value *Size;
4637     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4638     LValue Base;
4639     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4640       Base = CGF.MakeAddrLValue(
4641           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4642     } else {
4643       LValue &PosLVal = *Pos.get<LValue *>();
4644       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4645       Base = CGF.MakeAddrLValue(
4646           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4647                   DependenciesArray.getAlignment()),
4648           KmpDependInfoTy);
4649     }
4650     // deps[i].base_addr = &<Dependencies[i].second>;
4651     LValue BaseAddrLVal = CGF.EmitLValueForField(
4652         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4653     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4654                           BaseAddrLVal);
4655     // deps[i].len = sizeof(<Dependencies[i].second>);
4656     LValue LenLVal = CGF.EmitLValueForField(
4657         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4658     CGF.EmitStoreOfScalar(Size, LenLVal);
4659     // deps[i].flags = <Dependencies[i].first>;
4660     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4661     LValue FlagsLVal = CGF.EmitLValueForField(
4662         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4663     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4664                           FlagsLVal);
4665     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4666       ++(*P);
4667     } else {
4668       LValue &PosLVal = *Pos.get<LValue *>();
4669       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4670       Idx = CGF.Builder.CreateNUWAdd(Idx,
4671                                      llvm::ConstantInt::get(Idx->getType(), 1));
4672       CGF.EmitStoreOfScalar(Idx, PosLVal);
4673     }
4674   }
4675 }
4676 
4677 static SmallVector<llvm::Value *, 4>
4678 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4679                         const OMPTaskDataTy::DependData &Data) {
4680   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4681          "Expected depobj dependecy kind.");
4682   SmallVector<llvm::Value *, 4> Sizes;
4683   SmallVector<LValue, 4> SizeLVals;
4684   ASTContext &C = CGF.getContext();
4685   QualType FlagsTy;
4686   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4687   RecordDecl *KmpDependInfoRD =
4688       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4689   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4690   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4691   {
4692     OMPIteratorGeneratorScope IteratorScope(
4693         CGF, cast_or_null<OMPIteratorExpr>(
4694                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4695                                    : nullptr));
4696     for (const Expr *E : Data.DepExprs) {
4697       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4698       LValue Base = CGF.EmitLoadOfPointerLValue(
4699           DepobjLVal.getAddress(CGF),
4700           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4701       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4702           Base.getAddress(CGF), KmpDependInfoPtrT);
4703       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4704                                 Base.getTBAAInfo());
4705       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4706           Addr.getPointer(),
4707           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4708       LValue NumDepsBase = CGF.MakeAddrLValue(
4709           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4710           Base.getBaseInfo(), Base.getTBAAInfo());
4711       // NumDeps = deps[i].base_addr;
4712       LValue BaseAddrLVal = CGF.EmitLValueForField(
4713           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4714       llvm::Value *NumDeps =
4715           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4716       LValue NumLVal = CGF.MakeAddrLValue(
4717           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4718           C.getUIntPtrType());
4719       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4720                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4721       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4722       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4723       CGF.EmitStoreOfScalar(Add, NumLVal);
4724       SizeLVals.push_back(NumLVal);
4725     }
4726   }
4727   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4728     llvm::Value *Size =
4729         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4730     Sizes.push_back(Size);
4731   }
4732   return Sizes;
4733 }
4734 
4735 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4736                                LValue PosLVal,
4737                                const OMPTaskDataTy::DependData &Data,
4738                                Address DependenciesArray) {
4739   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4740          "Expected depobj dependecy kind.");
4741   ASTContext &C = CGF.getContext();
4742   QualType FlagsTy;
4743   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4744   RecordDecl *KmpDependInfoRD =
4745       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4746   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4747   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4748   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4749   {
4750     OMPIteratorGeneratorScope IteratorScope(
4751         CGF, cast_or_null<OMPIteratorExpr>(
4752                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4753                                    : nullptr));
4754     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4755       const Expr *E = Data.DepExprs[I];
4756       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4757       LValue Base = CGF.EmitLoadOfPointerLValue(
4758           DepobjLVal.getAddress(CGF),
4759           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4760       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4761           Base.getAddress(CGF), KmpDependInfoPtrT);
4762       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4763                                 Base.getTBAAInfo());
4764 
4765       // Get number of elements in a single depobj.
4766       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4767           Addr.getPointer(),
4768           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4769       LValue NumDepsBase = CGF.MakeAddrLValue(
4770           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4771           Base.getBaseInfo(), Base.getTBAAInfo());
4772       // NumDeps = deps[i].base_addr;
4773       LValue BaseAddrLVal = CGF.EmitLValueForField(
4774           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4775       llvm::Value *NumDeps =
4776           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4777 
4778       // memcopy dependency data.
4779       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4780           ElSize,
4781           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4782       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4783       Address DepAddr =
4784           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4785                   DependenciesArray.getAlignment());
4786       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4787 
4788       // Increase pos.
4789       // pos += size;
4790       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4791       CGF.EmitStoreOfScalar(Add, PosLVal);
4792     }
4793   }
4794 }
4795 
4796 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4797     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4798     SourceLocation Loc) {
4799   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4800         return D.DepExprs.empty();
4801       }))
4802     return std::make_pair(nullptr, Address::invalid());
4803   // Process list of dependencies.
4804   ASTContext &C = CGM.getContext();
4805   Address DependenciesArray = Address::invalid();
4806   llvm::Value *NumOfElements = nullptr;
4807   unsigned NumDependencies = std::accumulate(
4808       Dependencies.begin(), Dependencies.end(), 0,
4809       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4810         return D.DepKind == OMPC_DEPEND_depobj
4811                    ? V
4812                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4813       });
4814   QualType FlagsTy;
4815   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4816   bool HasDepobjDeps = false;
4817   bool HasRegularWithIterators = false;
4818   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4819   llvm::Value *NumOfRegularWithIterators =
4820       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4821   // Calculate number of depobj dependecies and regular deps with the iterators.
4822   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4823     if (D.DepKind == OMPC_DEPEND_depobj) {
4824       SmallVector<llvm::Value *, 4> Sizes =
4825           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4826       for (llvm::Value *Size : Sizes) {
4827         NumOfDepobjElements =
4828             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4829       }
4830       HasDepobjDeps = true;
4831       continue;
4832     }
4833     // Include number of iterations, if any.
4834     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4835       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4836         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4837         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4838         NumOfRegularWithIterators =
4839             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4840       }
4841       HasRegularWithIterators = true;
4842       continue;
4843     }
4844   }
4845 
4846   QualType KmpDependInfoArrayTy;
4847   if (HasDepobjDeps || HasRegularWithIterators) {
4848     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4849                                            /*isSigned=*/false);
4850     if (HasDepobjDeps) {
4851       NumOfElements =
4852           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4853     }
4854     if (HasRegularWithIterators) {
4855       NumOfElements =
4856           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4857     }
4858     OpaqueValueExpr OVE(Loc,
4859                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4860                         VK_RValue);
4861     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4862                                                   RValue::get(NumOfElements));
4863     KmpDependInfoArrayTy =
4864         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4865                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4866     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4867     // Properly emit variable-sized array.
4868     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4869                                          ImplicitParamDecl::Other);
4870     CGF.EmitVarDecl(*PD);
4871     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4872     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4873                                               /*isSigned=*/false);
4874   } else {
4875     KmpDependInfoArrayTy = C.getConstantArrayType(
4876         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4877         ArrayType::Normal, /*IndexTypeQuals=*/0);
4878     DependenciesArray =
4879         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4880     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4881     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4882                                            /*isSigned=*/false);
4883   }
4884   unsigned Pos = 0;
4885   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4886     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4887         Dependencies[I].IteratorExpr)
4888       continue;
4889     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4890                    DependenciesArray);
4891   }
4892   // Copy regular dependecies with iterators.
4893   LValue PosLVal = CGF.MakeAddrLValue(
4894       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4895   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4896   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4897     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4898         !Dependencies[I].IteratorExpr)
4899       continue;
4900     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4901                    DependenciesArray);
4902   }
4903   // Copy final depobj arrays without iterators.
4904   if (HasDepobjDeps) {
4905     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4906       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4907         continue;
4908       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4909                          DependenciesArray);
4910     }
4911   }
4912   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4913       DependenciesArray, CGF.VoidPtrTy);
4914   return std::make_pair(NumOfElements, DependenciesArray);
4915 }
4916 
4917 Address CGOpenMPRuntime::emitDepobjDependClause(
4918     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4919     SourceLocation Loc) {
4920   if (Dependencies.DepExprs.empty())
4921     return Address::invalid();
4922   // Process list of dependencies.
4923   ASTContext &C = CGM.getContext();
4924   Address DependenciesArray = Address::invalid();
4925   unsigned NumDependencies = Dependencies.DepExprs.size();
4926   QualType FlagsTy;
4927   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4928   RecordDecl *KmpDependInfoRD =
4929       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4930 
4931   llvm::Value *Size;
4932   // Define type kmp_depend_info[<Dependencies.size()>];
4933   // For depobj reserve one extra element to store the number of elements.
4934   // It is required to handle depobj(x) update(in) construct.
4935   // kmp_depend_info[<Dependencies.size()>] deps;
4936   llvm::Value *NumDepsVal;
4937   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4938   if (const auto *IE =
4939           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4940     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4941     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4942       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4943       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4944       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4945     }
4946     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4947                                     NumDepsVal);
4948     CharUnits SizeInBytes =
4949         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4950     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4951     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4952     NumDepsVal =
4953         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4954   } else {
4955     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4956         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4957         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4958     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4959     Size = CGM.getSize(Sz.alignTo(Align));
4960     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4961   }
4962   // Need to allocate on the dynamic memory.
4963   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4964   // Use default allocator.
4965   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4966   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4967 
4968   llvm::Value *Addr =
4969       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4970                               CGM.getModule(), OMPRTL___kmpc_alloc),
4971                           Args, ".dep.arr.addr");
4972   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4973       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4974   DependenciesArray = Address(Addr, Align);
4975   // Write number of elements in the first element of array for depobj.
4976   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4977   // deps[i].base_addr = NumDependencies;
4978   LValue BaseAddrLVal = CGF.EmitLValueForField(
4979       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4980   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4981   llvm::PointerUnion<unsigned *, LValue *> Pos;
4982   unsigned Idx = 1;
4983   LValue PosLVal;
4984   if (Dependencies.IteratorExpr) {
4985     PosLVal = CGF.MakeAddrLValue(
4986         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4987         C.getSizeType());
4988     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4989                           /*IsInit=*/true);
4990     Pos = &PosLVal;
4991   } else {
4992     Pos = &Idx;
4993   }
4994   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4995   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4996       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4997   return DependenciesArray;
4998 }
4999 
5000 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5001                                         SourceLocation Loc) {
5002   ASTContext &C = CGM.getContext();
5003   QualType FlagsTy;
5004   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5005   LValue Base = CGF.EmitLoadOfPointerLValue(
5006       DepobjLVal.getAddress(CGF),
5007       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5008   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5009   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5010       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5011   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5012       Addr.getPointer(),
5013       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5014   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5015                                                                CGF.VoidPtrTy);
5016   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5017   // Use default allocator.
5018   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5019   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5020 
5021   // _kmpc_free(gtid, addr, nullptr);
5022   (void)CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5023                                 CGM.getModule(), OMPRTL___kmpc_free),
5024                             Args);
5025 }
5026 
5027 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5028                                        OpenMPDependClauseKind NewDepKind,
5029                                        SourceLocation Loc) {
5030   ASTContext &C = CGM.getContext();
5031   QualType FlagsTy;
5032   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5033   RecordDecl *KmpDependInfoRD =
5034       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5035   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5036   llvm::Value *NumDeps;
5037   LValue Base;
5038   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5039 
5040   Address Begin = Base.getAddress(CGF);
5041   // Cast from pointer to array type to pointer to single element.
5042   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5043   // The basic structure here is a while-do loop.
5044   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5045   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5046   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5047   CGF.EmitBlock(BodyBB);
5048   llvm::PHINode *ElementPHI =
5049       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5050   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5051   Begin = Address(ElementPHI, Begin.getAlignment());
5052   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5053                             Base.getTBAAInfo());
5054   // deps[i].flags = NewDepKind;
5055   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5056   LValue FlagsLVal = CGF.EmitLValueForField(
5057       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5058   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5059                         FlagsLVal);
5060 
5061   // Shift the address forward by one element.
5062   Address ElementNext =
5063       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5064   ElementPHI->addIncoming(ElementNext.getPointer(),
5065                           CGF.Builder.GetInsertBlock());
5066   llvm::Value *IsEmpty =
5067       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5068   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5069   // Done.
5070   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5071 }
5072 
5073 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5074                                    const OMPExecutableDirective &D,
5075                                    llvm::Function *TaskFunction,
5076                                    QualType SharedsTy, Address Shareds,
5077                                    const Expr *IfCond,
5078                                    const OMPTaskDataTy &Data) {
5079   if (!CGF.HaveInsertPoint())
5080     return;
5081 
5082   TaskResultTy Result =
5083       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5084   llvm::Value *NewTask = Result.NewTask;
5085   llvm::Function *TaskEntry = Result.TaskEntry;
5086   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5087   LValue TDBase = Result.TDBase;
5088   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5089   // Process list of dependences.
5090   Address DependenciesArray = Address::invalid();
5091   llvm::Value *NumOfElements;
5092   std::tie(NumOfElements, DependenciesArray) =
5093       emitDependClause(CGF, Data.Dependences, Loc);
5094 
5095   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5096   // libcall.
5097   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5098   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5099   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5100   // list is not empty
5101   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5102   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5103   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5104   llvm::Value *DepTaskArgs[7];
5105   if (!Data.Dependences.empty()) {
5106     DepTaskArgs[0] = UpLoc;
5107     DepTaskArgs[1] = ThreadID;
5108     DepTaskArgs[2] = NewTask;
5109     DepTaskArgs[3] = NumOfElements;
5110     DepTaskArgs[4] = DependenciesArray.getPointer();
5111     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5112     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5113   }
5114   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5115                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5116     if (!Data.Tied) {
5117       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5118       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5119       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5120     }
5121     if (!Data.Dependences.empty()) {
5122       CGF.EmitRuntimeCall(
5123           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5124               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5125           DepTaskArgs);
5126     } else {
5127       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5128                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5129                           TaskArgs);
5130     }
5131     // Check if parent region is untied and build return for untied task;
5132     if (auto *Region =
5133             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5134       Region->emitUntiedSwitch(CGF);
5135   };
5136 
5137   llvm::Value *DepWaitTaskArgs[6];
5138   if (!Data.Dependences.empty()) {
5139     DepWaitTaskArgs[0] = UpLoc;
5140     DepWaitTaskArgs[1] = ThreadID;
5141     DepWaitTaskArgs[2] = NumOfElements;
5142     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5143     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5144     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5145   }
5146   auto &M = CGM.getModule();
5147   auto &&ElseCodeGen = [&M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5148                         &Data, &DepWaitTaskArgs,
5149                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5150     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5151     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5152     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5153     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5154     // is specified.
5155     if (!Data.Dependences.empty())
5156       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5157                               M, OMPRTL___kmpc_omp_wait_deps),
5158                           DepWaitTaskArgs);
5159     // Call proxy_task_entry(gtid, new_task);
5160     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5161                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5162       Action.Enter(CGF);
5163       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5164       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5165                                                           OutlinedFnArgs);
5166     };
5167 
5168     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5169     // kmp_task_t *new_task);
5170     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5171     // kmp_task_t *new_task);
5172     RegionCodeGenTy RCG(CodeGen);
5173     CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5174                               M, OMPRTL___kmpc_omp_task_begin_if0),
5175                           TaskArgs,
5176                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5177                               M, OMPRTL___kmpc_omp_task_complete_if0),
5178                           TaskArgs);
5179     RCG.setAction(Action);
5180     RCG(CGF);
5181   };
5182 
5183   if (IfCond) {
5184     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5185   } else {
5186     RegionCodeGenTy ThenRCG(ThenCodeGen);
5187     ThenRCG(CGF);
5188   }
5189 }
5190 
5191 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5192                                        const OMPLoopDirective &D,
5193                                        llvm::Function *TaskFunction,
5194                                        QualType SharedsTy, Address Shareds,
5195                                        const Expr *IfCond,
5196                                        const OMPTaskDataTy &Data) {
5197   if (!CGF.HaveInsertPoint())
5198     return;
5199   TaskResultTy Result =
5200       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5201   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5202   // libcall.
5203   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5204   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5205   // sched, kmp_uint64 grainsize, void *task_dup);
5206   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5207   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5208   llvm::Value *IfVal;
5209   if (IfCond) {
5210     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5211                                       /*isSigned=*/true);
5212   } else {
5213     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5214   }
5215 
5216   LValue LBLVal = CGF.EmitLValueForField(
5217       Result.TDBase,
5218       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5219   const auto *LBVar =
5220       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5221   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5222                        LBLVal.getQuals(),
5223                        /*IsInitializer=*/true);
5224   LValue UBLVal = CGF.EmitLValueForField(
5225       Result.TDBase,
5226       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5227   const auto *UBVar =
5228       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5229   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5230                        UBLVal.getQuals(),
5231                        /*IsInitializer=*/true);
5232   LValue StLVal = CGF.EmitLValueForField(
5233       Result.TDBase,
5234       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5235   const auto *StVar =
5236       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5237   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5238                        StLVal.getQuals(),
5239                        /*IsInitializer=*/true);
5240   // Store reductions address.
5241   LValue RedLVal = CGF.EmitLValueForField(
5242       Result.TDBase,
5243       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5244   if (Data.Reductions) {
5245     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5246   } else {
5247     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5248                                CGF.getContext().VoidPtrTy);
5249   }
5250   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5251   llvm::Value *TaskArgs[] = {
5252       UpLoc,
5253       ThreadID,
5254       Result.NewTask,
5255       IfVal,
5256       LBLVal.getPointer(CGF),
5257       UBLVal.getPointer(CGF),
5258       CGF.EmitLoadOfScalar(StLVal, Loc),
5259       llvm::ConstantInt::getSigned(
5260           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5261       llvm::ConstantInt::getSigned(
5262           CGF.IntTy, Data.Schedule.getPointer()
5263                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5264                          : NoSchedule),
5265       Data.Schedule.getPointer()
5266           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5267                                       /*isSigned=*/false)
5268           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5269       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5270                              Result.TaskDupFn, CGF.VoidPtrTy)
5271                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5272   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5273                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5274                       TaskArgs);
5275 }
5276 
5277 /// Emit reduction operation for each element of array (required for
5278 /// array sections) LHS op = RHS.
5279 /// \param Type Type of array.
5280 /// \param LHSVar Variable on the left side of the reduction operation
5281 /// (references element of array in original variable).
5282 /// \param RHSVar Variable on the right side of the reduction operation
5283 /// (references element of array in original variable).
5284 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5285 /// RHSVar.
5286 static void EmitOMPAggregateReduction(
5287     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5288     const VarDecl *RHSVar,
5289     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5290                                   const Expr *, const Expr *)> &RedOpGen,
5291     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5292     const Expr *UpExpr = nullptr) {
5293   // Perform element-by-element initialization.
5294   QualType ElementTy;
5295   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5296   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5297 
5298   // Drill down to the base element type on both arrays.
5299   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5300   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5301 
5302   llvm::Value *RHSBegin = RHSAddr.getPointer();
5303   llvm::Value *LHSBegin = LHSAddr.getPointer();
5304   // Cast from pointer to array type to pointer to single element.
5305   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5306   // The basic structure here is a while-do loop.
5307   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5308   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5309   llvm::Value *IsEmpty =
5310       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5311   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5312 
5313   // Enter the loop body, making that address the current address.
5314   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5315   CGF.EmitBlock(BodyBB);
5316 
5317   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5318 
5319   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5320       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5321   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5322   Address RHSElementCurrent =
5323       Address(RHSElementPHI,
5324               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5325 
5326   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5327       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5328   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5329   Address LHSElementCurrent =
5330       Address(LHSElementPHI,
5331               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5332 
5333   // Emit copy.
5334   CodeGenFunction::OMPPrivateScope Scope(CGF);
5335   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5336   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5337   Scope.Privatize();
5338   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5339   Scope.ForceCleanup();
5340 
5341   // Shift the address forward by one element.
5342   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5343       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5344   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5345       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5346   // Check whether we've reached the end.
5347   llvm::Value *Done =
5348       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5349   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5350   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5351   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5352 
5353   // Done.
5354   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5355 }
5356 
5357 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5358 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5359 /// UDR combiner function.
5360 static void emitReductionCombiner(CodeGenFunction &CGF,
5361                                   const Expr *ReductionOp) {
5362   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5363     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5364       if (const auto *DRE =
5365               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5366         if (const auto *DRD =
5367                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5368           std::pair<llvm::Function *, llvm::Function *> Reduction =
5369               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5370           RValue Func = RValue::get(Reduction.first);
5371           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5372           CGF.EmitIgnoredExpr(ReductionOp);
5373           return;
5374         }
5375   CGF.EmitIgnoredExpr(ReductionOp);
5376 }
5377 
5378 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5379     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5380     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5381     ArrayRef<const Expr *> ReductionOps) {
5382   ASTContext &C = CGM.getContext();
5383 
5384   // void reduction_func(void *LHSArg, void *RHSArg);
5385   FunctionArgList Args;
5386   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5387                            ImplicitParamDecl::Other);
5388   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5389                            ImplicitParamDecl::Other);
5390   Args.push_back(&LHSArg);
5391   Args.push_back(&RHSArg);
5392   const auto &CGFI =
5393       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5394   std::string Name = getName({"omp", "reduction", "reduction_func"});
5395   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5396                                     llvm::GlobalValue::InternalLinkage, Name,
5397                                     &CGM.getModule());
5398   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5399   Fn->setDoesNotRecurse();
5400   CodeGenFunction CGF(CGM);
5401   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5402 
5403   // Dst = (void*[n])(LHSArg);
5404   // Src = (void*[n])(RHSArg);
5405   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5406       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5407       ArgsType), CGF.getPointerAlign());
5408   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5409       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5410       ArgsType), CGF.getPointerAlign());
5411 
5412   //  ...
5413   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5414   //  ...
5415   CodeGenFunction::OMPPrivateScope Scope(CGF);
5416   auto IPriv = Privates.begin();
5417   unsigned Idx = 0;
5418   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5419     const auto *RHSVar =
5420         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5421     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5422       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5423     });
5424     const auto *LHSVar =
5425         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5426     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5427       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5428     });
5429     QualType PrivTy = (*IPriv)->getType();
5430     if (PrivTy->isVariablyModifiedType()) {
5431       // Get array size and emit VLA type.
5432       ++Idx;
5433       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5434       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5435       const VariableArrayType *VLA =
5436           CGF.getContext().getAsVariableArrayType(PrivTy);
5437       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5438       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5439           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5440       CGF.EmitVariablyModifiedType(PrivTy);
5441     }
5442   }
5443   Scope.Privatize();
5444   IPriv = Privates.begin();
5445   auto ILHS = LHSExprs.begin();
5446   auto IRHS = RHSExprs.begin();
5447   for (const Expr *E : ReductionOps) {
5448     if ((*IPriv)->getType()->isArrayType()) {
5449       // Emit reduction for array section.
5450       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5451       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5452       EmitOMPAggregateReduction(
5453           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5454           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5455             emitReductionCombiner(CGF, E);
5456           });
5457     } else {
5458       // Emit reduction for array subscript or single variable.
5459       emitReductionCombiner(CGF, E);
5460     }
5461     ++IPriv;
5462     ++ILHS;
5463     ++IRHS;
5464   }
5465   Scope.ForceCleanup();
5466   CGF.FinishFunction();
5467   return Fn;
5468 }
5469 
5470 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5471                                                   const Expr *ReductionOp,
5472                                                   const Expr *PrivateRef,
5473                                                   const DeclRefExpr *LHS,
5474                                                   const DeclRefExpr *RHS) {
5475   if (PrivateRef->getType()->isArrayType()) {
5476     // Emit reduction for array section.
5477     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5478     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5479     EmitOMPAggregateReduction(
5480         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5481         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5482           emitReductionCombiner(CGF, ReductionOp);
5483         });
5484   } else {
5485     // Emit reduction for array subscript or single variable.
5486     emitReductionCombiner(CGF, ReductionOp);
5487   }
5488 }
5489 
5490 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5491                                     ArrayRef<const Expr *> Privates,
5492                                     ArrayRef<const Expr *> LHSExprs,
5493                                     ArrayRef<const Expr *> RHSExprs,
5494                                     ArrayRef<const Expr *> ReductionOps,
5495                                     ReductionOptionsTy Options) {
5496   if (!CGF.HaveInsertPoint())
5497     return;
5498 
5499   bool WithNowait = Options.WithNowait;
5500   bool SimpleReduction = Options.SimpleReduction;
5501 
5502   // Next code should be emitted for reduction:
5503   //
5504   // static kmp_critical_name lock = { 0 };
5505   //
5506   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5507   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5508   //  ...
5509   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5510   //  *(Type<n>-1*)rhs[<n>-1]);
5511   // }
5512   //
5513   // ...
5514   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5515   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5516   // RedList, reduce_func, &<lock>)) {
5517   // case 1:
5518   //  ...
5519   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5520   //  ...
5521   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5522   // break;
5523   // case 2:
5524   //  ...
5525   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5526   //  ...
5527   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5528   // break;
5529   // default:;
5530   // }
5531   //
5532   // if SimpleReduction is true, only the next code is generated:
5533   //  ...
5534   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5535   //  ...
5536 
5537   ASTContext &C = CGM.getContext();
5538 
5539   if (SimpleReduction) {
5540     CodeGenFunction::RunCleanupsScope Scope(CGF);
5541     auto IPriv = Privates.begin();
5542     auto ILHS = LHSExprs.begin();
5543     auto IRHS = RHSExprs.begin();
5544     for (const Expr *E : ReductionOps) {
5545       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5546                                   cast<DeclRefExpr>(*IRHS));
5547       ++IPriv;
5548       ++ILHS;
5549       ++IRHS;
5550     }
5551     return;
5552   }
5553 
5554   // 1. Build a list of reduction variables.
5555   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5556   auto Size = RHSExprs.size();
5557   for (const Expr *E : Privates) {
5558     if (E->getType()->isVariablyModifiedType())
5559       // Reserve place for array size.
5560       ++Size;
5561   }
5562   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5563   QualType ReductionArrayTy =
5564       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5565                              /*IndexTypeQuals=*/0);
5566   Address ReductionList =
5567       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5568   auto IPriv = Privates.begin();
5569   unsigned Idx = 0;
5570   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5571     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5572     CGF.Builder.CreateStore(
5573         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5574             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5575         Elem);
5576     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5577       // Store array size.
5578       ++Idx;
5579       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5580       llvm::Value *Size = CGF.Builder.CreateIntCast(
5581           CGF.getVLASize(
5582                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5583               .NumElts,
5584           CGF.SizeTy, /*isSigned=*/false);
5585       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5586                               Elem);
5587     }
5588   }
5589 
5590   // 2. Emit reduce_func().
5591   llvm::Function *ReductionFn = emitReductionFunction(
5592       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5593       LHSExprs, RHSExprs, ReductionOps);
5594 
5595   // 3. Create static kmp_critical_name lock = { 0 };
5596   std::string Name = getName({"reduction"});
5597   llvm::Value *Lock = getCriticalRegionLock(Name);
5598 
5599   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5600   // RedList, reduce_func, &<lock>);
5601   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5602   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5603   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5604   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5605       ReductionList.getPointer(), CGF.VoidPtrTy);
5606   llvm::Value *Args[] = {
5607       IdentTLoc,                             // ident_t *<loc>
5608       ThreadId,                              // i32 <gtid>
5609       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5610       ReductionArrayTySize,                  // size_type sizeof(RedList)
5611       RL,                                    // void *RedList
5612       ReductionFn, // void (*) (void *, void *) <reduce_func>
5613       Lock         // kmp_critical_name *&<lock>
5614   };
5615   llvm::Value *Res = CGF.EmitRuntimeCall(
5616       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5617           CGM.getModule(),
5618           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5619       Args);
5620 
5621   // 5. Build switch(res)
5622   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5623   llvm::SwitchInst *SwInst =
5624       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5625 
5626   // 6. Build case 1:
5627   //  ...
5628   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5629   //  ...
5630   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5631   // break;
5632   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5633   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5634   CGF.EmitBlock(Case1BB);
5635 
5636   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5637   llvm::Value *EndArgs[] = {
5638       IdentTLoc, // ident_t *<loc>
5639       ThreadId,  // i32 <gtid>
5640       Lock       // kmp_critical_name *&<lock>
5641   };
5642   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5643                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5644     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5645     auto IPriv = Privates.begin();
5646     auto ILHS = LHSExprs.begin();
5647     auto IRHS = RHSExprs.begin();
5648     for (const Expr *E : ReductionOps) {
5649       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5650                                      cast<DeclRefExpr>(*IRHS));
5651       ++IPriv;
5652       ++ILHS;
5653       ++IRHS;
5654     }
5655   };
5656   RegionCodeGenTy RCG(CodeGen);
5657   CommonActionTy Action(
5658       nullptr, llvm::None,
5659       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5660           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5661                                       : OMPRTL___kmpc_end_reduce),
5662       EndArgs);
5663   RCG.setAction(Action);
5664   RCG(CGF);
5665 
5666   CGF.EmitBranch(DefaultBB);
5667 
5668   // 7. Build case 2:
5669   //  ...
5670   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5671   //  ...
5672   // break;
5673   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5674   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5675   CGF.EmitBlock(Case2BB);
5676 
5677   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5678                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5679     auto ILHS = LHSExprs.begin();
5680     auto IRHS = RHSExprs.begin();
5681     auto IPriv = Privates.begin();
5682     for (const Expr *E : ReductionOps) {
5683       const Expr *XExpr = nullptr;
5684       const Expr *EExpr = nullptr;
5685       const Expr *UpExpr = nullptr;
5686       BinaryOperatorKind BO = BO_Comma;
5687       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5688         if (BO->getOpcode() == BO_Assign) {
5689           XExpr = BO->getLHS();
5690           UpExpr = BO->getRHS();
5691         }
5692       }
5693       // Try to emit update expression as a simple atomic.
5694       const Expr *RHSExpr = UpExpr;
5695       if (RHSExpr) {
5696         // Analyze RHS part of the whole expression.
5697         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5698                 RHSExpr->IgnoreParenImpCasts())) {
5699           // If this is a conditional operator, analyze its condition for
5700           // min/max reduction operator.
5701           RHSExpr = ACO->getCond();
5702         }
5703         if (const auto *BORHS =
5704                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5705           EExpr = BORHS->getRHS();
5706           BO = BORHS->getOpcode();
5707         }
5708       }
5709       if (XExpr) {
5710         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5711         auto &&AtomicRedGen = [BO, VD,
5712                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5713                                     const Expr *EExpr, const Expr *UpExpr) {
5714           LValue X = CGF.EmitLValue(XExpr);
5715           RValue E;
5716           if (EExpr)
5717             E = CGF.EmitAnyExpr(EExpr);
5718           CGF.EmitOMPAtomicSimpleUpdateExpr(
5719               X, E, BO, /*IsXLHSInRHSPart=*/true,
5720               llvm::AtomicOrdering::Monotonic, Loc,
5721               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5722                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5723                 PrivateScope.addPrivate(
5724                     VD, [&CGF, VD, XRValue, Loc]() {
5725                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5726                       CGF.emitOMPSimpleStore(
5727                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5728                           VD->getType().getNonReferenceType(), Loc);
5729                       return LHSTemp;
5730                     });
5731                 (void)PrivateScope.Privatize();
5732                 return CGF.EmitAnyExpr(UpExpr);
5733               });
5734         };
5735         if ((*IPriv)->getType()->isArrayType()) {
5736           // Emit atomic reduction for array section.
5737           const auto *RHSVar =
5738               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5739           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5740                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5741         } else {
5742           // Emit atomic reduction for array subscript or single variable.
5743           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5744         }
5745       } else {
5746         // Emit as a critical region.
5747         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5748                                            const Expr *, const Expr *) {
5749           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5750           std::string Name = RT.getName({"atomic_reduction"});
5751           RT.emitCriticalRegion(
5752               CGF, Name,
5753               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5754                 Action.Enter(CGF);
5755                 emitReductionCombiner(CGF, E);
5756               },
5757               Loc);
5758         };
5759         if ((*IPriv)->getType()->isArrayType()) {
5760           const auto *LHSVar =
5761               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5762           const auto *RHSVar =
5763               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5764           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5765                                     CritRedGen);
5766         } else {
5767           CritRedGen(CGF, nullptr, nullptr, nullptr);
5768         }
5769       }
5770       ++ILHS;
5771       ++IRHS;
5772       ++IPriv;
5773     }
5774   };
5775   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5776   if (!WithNowait) {
5777     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5778     llvm::Value *EndArgs[] = {
5779         IdentTLoc, // ident_t *<loc>
5780         ThreadId,  // i32 <gtid>
5781         Lock       // kmp_critical_name *&<lock>
5782     };
5783     CommonActionTy Action(nullptr, llvm::None,
5784                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5785                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5786                           EndArgs);
5787     AtomicRCG.setAction(Action);
5788     AtomicRCG(CGF);
5789   } else {
5790     AtomicRCG(CGF);
5791   }
5792 
5793   CGF.EmitBranch(DefaultBB);
5794   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5795 }
5796 
5797 /// Generates unique name for artificial threadprivate variables.
5798 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5799 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5800                                       const Expr *Ref) {
5801   SmallString<256> Buffer;
5802   llvm::raw_svector_ostream Out(Buffer);
5803   const clang::DeclRefExpr *DE;
5804   const VarDecl *D = ::getBaseDecl(Ref, DE);
5805   if (!D)
5806     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5807   D = D->getCanonicalDecl();
5808   std::string Name = CGM.getOpenMPRuntime().getName(
5809       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5810   Out << Prefix << Name << "_"
5811       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5812   return std::string(Out.str());
5813 }
5814 
5815 /// Emits reduction initializer function:
5816 /// \code
5817 /// void @.red_init(void* %arg, void* %orig) {
5818 /// %0 = bitcast void* %arg to <type>*
5819 /// store <type> <init>, <type>* %0
5820 /// ret void
5821 /// }
5822 /// \endcode
5823 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5824                                            SourceLocation Loc,
5825                                            ReductionCodeGen &RCG, unsigned N) {
5826   ASTContext &C = CGM.getContext();
5827   QualType VoidPtrTy = C.VoidPtrTy;
5828   VoidPtrTy.addRestrict();
5829   FunctionArgList Args;
5830   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5831                           ImplicitParamDecl::Other);
5832   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5833                               ImplicitParamDecl::Other);
5834   Args.emplace_back(&Param);
5835   Args.emplace_back(&ParamOrig);
5836   const auto &FnInfo =
5837       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5838   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5839   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5840   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5841                                     Name, &CGM.getModule());
5842   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5843   Fn->setDoesNotRecurse();
5844   CodeGenFunction CGF(CGM);
5845   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5846   Address PrivateAddr = CGF.EmitLoadOfPointer(
5847       CGF.GetAddrOfLocalVar(&Param),
5848       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5849   llvm::Value *Size = nullptr;
5850   // If the size of the reduction item is non-constant, load it from global
5851   // threadprivate variable.
5852   if (RCG.getSizes(N).second) {
5853     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5854         CGF, CGM.getContext().getSizeType(),
5855         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5856     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5857                                 CGM.getContext().getSizeType(), Loc);
5858   }
5859   RCG.emitAggregateType(CGF, N, Size);
5860   LValue OrigLVal;
5861   // If initializer uses initializer from declare reduction construct, emit a
5862   // pointer to the address of the original reduction item (reuired by reduction
5863   // initializer)
5864   if (RCG.usesReductionInitializer(N)) {
5865     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5866     SharedAddr = CGF.EmitLoadOfPointer(
5867         SharedAddr,
5868         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5869     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5870   } else {
5871     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5872         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5873         CGM.getContext().VoidPtrTy);
5874   }
5875   // Emit the initializer:
5876   // %0 = bitcast void* %arg to <type>*
5877   // store <type> <init>, <type>* %0
5878   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5879                          [](CodeGenFunction &) { return false; });
5880   CGF.FinishFunction();
5881   return Fn;
5882 }
5883 
5884 /// Emits reduction combiner function:
5885 /// \code
5886 /// void @.red_comb(void* %arg0, void* %arg1) {
5887 /// %lhs = bitcast void* %arg0 to <type>*
5888 /// %rhs = bitcast void* %arg1 to <type>*
5889 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5890 /// store <type> %2, <type>* %lhs
5891 /// ret void
5892 /// }
5893 /// \endcode
5894 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5895                                            SourceLocation Loc,
5896                                            ReductionCodeGen &RCG, unsigned N,
5897                                            const Expr *ReductionOp,
5898                                            const Expr *LHS, const Expr *RHS,
5899                                            const Expr *PrivateRef) {
5900   ASTContext &C = CGM.getContext();
5901   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5902   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5903   FunctionArgList Args;
5904   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5905                                C.VoidPtrTy, ImplicitParamDecl::Other);
5906   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5907                             ImplicitParamDecl::Other);
5908   Args.emplace_back(&ParamInOut);
5909   Args.emplace_back(&ParamIn);
5910   const auto &FnInfo =
5911       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5912   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5913   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5914   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5915                                     Name, &CGM.getModule());
5916   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5917   Fn->setDoesNotRecurse();
5918   CodeGenFunction CGF(CGM);
5919   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5920   llvm::Value *Size = nullptr;
5921   // If the size of the reduction item is non-constant, load it from global
5922   // threadprivate variable.
5923   if (RCG.getSizes(N).second) {
5924     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5925         CGF, CGM.getContext().getSizeType(),
5926         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5927     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5928                                 CGM.getContext().getSizeType(), Loc);
5929   }
5930   RCG.emitAggregateType(CGF, N, Size);
5931   // Remap lhs and rhs variables to the addresses of the function arguments.
5932   // %lhs = bitcast void* %arg0 to <type>*
5933   // %rhs = bitcast void* %arg1 to <type>*
5934   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5935   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5936     // Pull out the pointer to the variable.
5937     Address PtrAddr = CGF.EmitLoadOfPointer(
5938         CGF.GetAddrOfLocalVar(&ParamInOut),
5939         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5940     return CGF.Builder.CreateElementBitCast(
5941         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5942   });
5943   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5944     // Pull out the pointer to the variable.
5945     Address PtrAddr = CGF.EmitLoadOfPointer(
5946         CGF.GetAddrOfLocalVar(&ParamIn),
5947         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5948     return CGF.Builder.CreateElementBitCast(
5949         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5950   });
5951   PrivateScope.Privatize();
5952   // Emit the combiner body:
5953   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5954   // store <type> %2, <type>* %lhs
5955   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5956       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5957       cast<DeclRefExpr>(RHS));
5958   CGF.FinishFunction();
5959   return Fn;
5960 }
5961 
5962 /// Emits reduction finalizer function:
5963 /// \code
5964 /// void @.red_fini(void* %arg) {
5965 /// %0 = bitcast void* %arg to <type>*
5966 /// <destroy>(<type>* %0)
5967 /// ret void
5968 /// }
5969 /// \endcode
5970 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5971                                            SourceLocation Loc,
5972                                            ReductionCodeGen &RCG, unsigned N) {
5973   if (!RCG.needCleanups(N))
5974     return nullptr;
5975   ASTContext &C = CGM.getContext();
5976   FunctionArgList Args;
5977   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5978                           ImplicitParamDecl::Other);
5979   Args.emplace_back(&Param);
5980   const auto &FnInfo =
5981       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5982   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5983   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5984   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5985                                     Name, &CGM.getModule());
5986   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5987   Fn->setDoesNotRecurse();
5988   CodeGenFunction CGF(CGM);
5989   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5990   Address PrivateAddr = CGF.EmitLoadOfPointer(
5991       CGF.GetAddrOfLocalVar(&Param),
5992       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5993   llvm::Value *Size = nullptr;
5994   // If the size of the reduction item is non-constant, load it from global
5995   // threadprivate variable.
5996   if (RCG.getSizes(N).second) {
5997     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5998         CGF, CGM.getContext().getSizeType(),
5999         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6000     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6001                                 CGM.getContext().getSizeType(), Loc);
6002   }
6003   RCG.emitAggregateType(CGF, N, Size);
6004   // Emit the finalizer body:
6005   // <destroy>(<type>* %0)
6006   RCG.emitCleanups(CGF, N, PrivateAddr);
6007   CGF.FinishFunction(Loc);
6008   return Fn;
6009 }
6010 
6011 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6012     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6013     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6014   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6015     return nullptr;
6016 
6017   // Build typedef struct:
6018   // kmp_taskred_input {
6019   //   void *reduce_shar; // shared reduction item
6020   //   void *reduce_orig; // original reduction item used for initialization
6021   //   size_t reduce_size; // size of data item
6022   //   void *reduce_init; // data initialization routine
6023   //   void *reduce_fini; // data finalization routine
6024   //   void *reduce_comb; // data combiner routine
6025   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6026   // } kmp_taskred_input_t;
6027   ASTContext &C = CGM.getContext();
6028   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6029   RD->startDefinition();
6030   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6032   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6033   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6034   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6036   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6037       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6038   RD->completeDefinition();
6039   QualType RDType = C.getRecordType(RD);
6040   unsigned Size = Data.ReductionVars.size();
6041   llvm::APInt ArraySize(/*numBits=*/64, Size);
6042   QualType ArrayRDType = C.getConstantArrayType(
6043       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6044   // kmp_task_red_input_t .rd_input.[Size];
6045   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6046   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6047                        Data.ReductionCopies, Data.ReductionOps);
6048   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6049     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6050     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6051                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6052     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6053         TaskRedInput.getPointer(), Idxs,
6054         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6055         ".rd_input.gep.");
6056     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6057     // ElemLVal.reduce_shar = &Shareds[Cnt];
6058     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6059     RCG.emitSharedOrigLValue(CGF, Cnt);
6060     llvm::Value *CastedShared =
6061         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6062     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6063     // ElemLVal.reduce_orig = &Origs[Cnt];
6064     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6065     llvm::Value *CastedOrig =
6066         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6067     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6068     RCG.emitAggregateType(CGF, Cnt);
6069     llvm::Value *SizeValInChars;
6070     llvm::Value *SizeVal;
6071     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6072     // We use delayed creation/initialization for VLAs and array sections. It is
6073     // required because runtime does not provide the way to pass the sizes of
6074     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6075     // threadprivate global variables are used to store these values and use
6076     // them in the functions.
6077     bool DelayedCreation = !!SizeVal;
6078     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6079                                                /*isSigned=*/false);
6080     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6081     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6082     // ElemLVal.reduce_init = init;
6083     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6084     llvm::Value *InitAddr =
6085         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6086     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6087     // ElemLVal.reduce_fini = fini;
6088     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6089     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6090     llvm::Value *FiniAddr = Fini
6091                                 ? CGF.EmitCastToVoidPtr(Fini)
6092                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6093     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6094     // ElemLVal.reduce_comb = comb;
6095     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6096     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6097         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6098         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6099     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6100     // ElemLVal.flags = 0;
6101     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6102     if (DelayedCreation) {
6103       CGF.EmitStoreOfScalar(
6104           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6105           FlagsLVal);
6106     } else
6107       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6108                                  FlagsLVal.getType());
6109   }
6110   if (Data.IsReductionWithTaskMod) {
6111     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6112     // is_ws, int num, void *data);
6113     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6114     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6115                                                   CGM.IntTy, /*isSigned=*/true);
6116     llvm::Value *Args[] = {
6117         IdentTLoc, GTid,
6118         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6119                                /*isSigned=*/true),
6120         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6121         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6122             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6123     return CGF.EmitRuntimeCall(
6124         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6125             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6126         Args);
6127   }
6128   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6129   llvm::Value *Args[] = {
6130       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6131                                 /*isSigned=*/true),
6132       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6133       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6134                                                       CGM.VoidPtrTy)};
6135   return CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6136                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6137                              Args);
6138 }
6139 
6140 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6141                                             SourceLocation Loc,
6142                                             bool IsWorksharingReduction) {
6143   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6144   // is_ws, int num, void *data);
6145   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6146   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6147                                                 CGM.IntTy, /*isSigned=*/true);
6148   llvm::Value *Args[] = {IdentTLoc, GTid,
6149                          llvm::ConstantInt::get(CGM.IntTy,
6150                                                 IsWorksharingReduction ? 1 : 0,
6151                                                 /*isSigned=*/true)};
6152   (void)CGF.EmitRuntimeCall(
6153       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6154           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6155       Args);
6156 }
6157 
6158 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6159                                               SourceLocation Loc,
6160                                               ReductionCodeGen &RCG,
6161                                               unsigned N) {
6162   auto Sizes = RCG.getSizes(N);
6163   // Emit threadprivate global variable if the type is non-constant
6164   // (Sizes.second = nullptr).
6165   if (Sizes.second) {
6166     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6167                                                      /*isSigned=*/false);
6168     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6169         CGF, CGM.getContext().getSizeType(),
6170         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6171     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6172   }
6173 }
6174 
6175 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6176                                               SourceLocation Loc,
6177                                               llvm::Value *ReductionsPtr,
6178                                               LValue SharedLVal) {
6179   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6180   // *d);
6181   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6182                                                    CGM.IntTy,
6183                                                    /*isSigned=*/true),
6184                          ReductionsPtr,
6185                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6186                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6187   return Address(
6188       CGF.EmitRuntimeCall(
6189           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6190               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6191           Args),
6192       SharedLVal.getAlignment());
6193 }
6194 
6195 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6196                                        SourceLocation Loc) {
6197   if (!CGF.HaveInsertPoint())
6198     return;
6199 
6200   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6201   if (OMPBuilder) {
6202     OMPBuilder->CreateTaskwait(CGF.Builder);
6203   } else {
6204     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6205     // global_tid);
6206     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6207     // Ignore return result until untied tasks are supported.
6208     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6209                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6210                         Args);
6211   }
6212 
6213   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6214     Region->emitUntiedSwitch(CGF);
6215 }
6216 
6217 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6218                                            OpenMPDirectiveKind InnerKind,
6219                                            const RegionCodeGenTy &CodeGen,
6220                                            bool HasCancel) {
6221   if (!CGF.HaveInsertPoint())
6222     return;
6223   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6224   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6225 }
6226 
6227 namespace {
6228 enum RTCancelKind {
6229   CancelNoreq = 0,
6230   CancelParallel = 1,
6231   CancelLoop = 2,
6232   CancelSections = 3,
6233   CancelTaskgroup = 4
6234 };
6235 } // anonymous namespace
6236 
6237 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6238   RTCancelKind CancelKind = CancelNoreq;
6239   if (CancelRegion == OMPD_parallel)
6240     CancelKind = CancelParallel;
6241   else if (CancelRegion == OMPD_for)
6242     CancelKind = CancelLoop;
6243   else if (CancelRegion == OMPD_sections)
6244     CancelKind = CancelSections;
6245   else {
6246     assert(CancelRegion == OMPD_taskgroup);
6247     CancelKind = CancelTaskgroup;
6248   }
6249   return CancelKind;
6250 }
6251 
6252 void CGOpenMPRuntime::emitCancellationPointCall(
6253     CodeGenFunction &CGF, SourceLocation Loc,
6254     OpenMPDirectiveKind CancelRegion) {
6255   if (!CGF.HaveInsertPoint())
6256     return;
6257   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6258   // global_tid, kmp_int32 cncl_kind);
6259   if (auto *OMPRegionInfo =
6260           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6261     // For 'cancellation point taskgroup', the task region info may not have a
6262     // cancel. This may instead happen in another adjacent task.
6263     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6264       llvm::Value *Args[] = {
6265           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6266           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6267       // Ignore return result until untied tasks are supported.
6268       llvm::Value *Result = CGF.EmitRuntimeCall(
6269           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6270               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6271           Args);
6272       // if (__kmpc_cancellationpoint()) {
6273       //   exit from construct;
6274       // }
6275       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6276       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6277       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6278       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6279       CGF.EmitBlock(ExitBB);
6280       // exit from construct;
6281       CodeGenFunction::JumpDest CancelDest =
6282           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6283       CGF.EmitBranchThroughCleanup(CancelDest);
6284       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6285     }
6286   }
6287 }
6288 
6289 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6290                                      const Expr *IfCond,
6291                                      OpenMPDirectiveKind CancelRegion) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6295   // kmp_int32 cncl_kind);
6296   auto &M = CGM.getModule();
6297   if (auto *OMPRegionInfo =
6298           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6299     auto &&ThenGen = [&M, Loc, CancelRegion,
6300                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6301       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6302       llvm::Value *Args[] = {
6303           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6304           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6305       // Ignore return result until untied tasks are supported.
6306       llvm::Value *Result =
6307           CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6308                                   M, OMPRTL___kmpc_cancel),
6309                               Args);
6310       // if (__kmpc_cancel()) {
6311       //   exit from construct;
6312       // }
6313       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6314       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6315       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6316       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6317       CGF.EmitBlock(ExitBB);
6318       // exit from construct;
6319       CodeGenFunction::JumpDest CancelDest =
6320           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6321       CGF.EmitBranchThroughCleanup(CancelDest);
6322       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6323     };
6324     if (IfCond) {
6325       emitIfClause(CGF, IfCond, ThenGen,
6326                    [](CodeGenFunction &, PrePostActionTy &) {});
6327     } else {
6328       RegionCodeGenTy ThenRCG(ThenGen);
6329       ThenRCG(CGF);
6330     }
6331   }
6332 }
6333 
6334 namespace {
6335 /// Cleanup action for uses_allocators support.
6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6337   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6338 
6339 public:
6340   OMPUsesAllocatorsActionTy(
6341       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6342       : Allocators(Allocators) {}
6343   void Enter(CodeGenFunction &CGF) override {
6344     if (!CGF.HaveInsertPoint())
6345       return;
6346     for (const auto &AllocatorData : Allocators) {
6347       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6348           CGF, AllocatorData.first, AllocatorData.second);
6349     }
6350   }
6351   void Exit(CodeGenFunction &CGF) override {
6352     if (!CGF.HaveInsertPoint())
6353       return;
6354     for (const auto &AllocatorData : Allocators) {
6355       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6356                                                         AllocatorData.first);
6357     }
6358   }
6359 };
6360 } // namespace
6361 
6362 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6363     const OMPExecutableDirective &D, StringRef ParentName,
6364     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6365     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6366   assert(!ParentName.empty() && "Invalid target region parent name!");
6367   HasEmittedTargetRegion = true;
6368   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6369   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6370     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6371       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6372       if (!D.AllocatorTraits)
6373         continue;
6374       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6375     }
6376   }
6377   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6378   CodeGen.setAction(UsesAllocatorAction);
6379   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6380                                    IsOffloadEntry, CodeGen);
6381 }
6382 
6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6384                                              const Expr *Allocator,
6385                                              const Expr *AllocatorTraits) {
6386   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6387   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6388   // Use default memspace handle.
6389   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6390   llvm::Value *NumTraits = llvm::ConstantInt::get(
6391       CGF.IntTy, cast<ConstantArrayType>(
6392                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6393                      ->getSize()
6394                      .getLimitedValue());
6395   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6396   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6397       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6398   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6399                                            AllocatorTraitsLVal.getBaseInfo(),
6400                                            AllocatorTraitsLVal.getTBAAInfo());
6401   llvm::Value *Traits =
6402       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6403 
6404   llvm::Value *AllocatorVal =
6405       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6406                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6407                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6408   // Store to allocator.
6409   CGF.EmitVarDecl(*cast<VarDecl>(
6410       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6411   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6412   AllocatorVal =
6413       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6414                                Allocator->getType(), Allocator->getExprLoc());
6415   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6416 }
6417 
6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6419                                              const Expr *Allocator) {
6420   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6421   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6422   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6423   llvm::Value *AllocatorVal =
6424       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6425   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6426                                           CGF.getContext().VoidPtrTy,
6427                                           Allocator->getExprLoc());
6428   (void)CGF.EmitRuntimeCall(
6429       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6430           CGM.getModule(), OMPRTL___kmpc_destroy_allocator),
6431       {ThreadId, AllocatorVal});
6432 }
6433 
6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6435     const OMPExecutableDirective &D, StringRef ParentName,
6436     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6437     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6438   // Create a unique name for the entry function using the source location
6439   // information of the current target region. The name will be something like:
6440   //
6441   // __omp_offloading_DD_FFFF_PP_lBB
6442   //
6443   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6444   // mangled name of the function that encloses the target region and BB is the
6445   // line number of the target region.
6446 
6447   unsigned DeviceID;
6448   unsigned FileID;
6449   unsigned Line;
6450   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6451                            Line);
6452   SmallString<64> EntryFnName;
6453   {
6454     llvm::raw_svector_ostream OS(EntryFnName);
6455     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6456        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6457   }
6458 
6459   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6460 
6461   CodeGenFunction CGF(CGM, true);
6462   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6463   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6464 
6465   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6466 
6467   // If this target outline function is not an offload entry, we don't need to
6468   // register it.
6469   if (!IsOffloadEntry)
6470     return;
6471 
6472   // The target region ID is used by the runtime library to identify the current
6473   // target region, so it only has to be unique and not necessarily point to
6474   // anything. It could be the pointer to the outlined function that implements
6475   // the target region, but we aren't using that so that the compiler doesn't
6476   // need to keep that, and could therefore inline the host function if proven
6477   // worthwhile during optimization. In the other hand, if emitting code for the
6478   // device, the ID has to be the function address so that it can retrieved from
6479   // the offloading entry and launched by the runtime library. We also mark the
6480   // outlined function to have external linkage in case we are emitting code for
6481   // the device, because these functions will be entry points to the device.
6482 
6483   if (CGM.getLangOpts().OpenMPIsDevice) {
6484     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6485     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6486     OutlinedFn->setDSOLocal(false);
6487   } else {
6488     std::string Name = getName({EntryFnName, "region_id"});
6489     OutlinedFnID = new llvm::GlobalVariable(
6490         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6491         llvm::GlobalValue::WeakAnyLinkage,
6492         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6493   }
6494 
6495   // Register the information for the entry associated with this target region.
6496   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6497       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6498       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6499 }
6500 
6501 /// Checks if the expression is constant or does not have non-trivial function
6502 /// calls.
6503 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6504   // We can skip constant expressions.
6505   // We can skip expressions with trivial calls or simple expressions.
6506   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6507           !E->hasNonTrivialCall(Ctx)) &&
6508          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6509 }
6510 
6511 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6512                                                     const Stmt *Body) {
6513   const Stmt *Child = Body->IgnoreContainers();
6514   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6515     Child = nullptr;
6516     for (const Stmt *S : C->body()) {
6517       if (const auto *E = dyn_cast<Expr>(S)) {
6518         if (isTrivial(Ctx, E))
6519           continue;
6520       }
6521       // Some of the statements can be ignored.
6522       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6523           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6524         continue;
6525       // Analyze declarations.
6526       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6527         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6528               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6529                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6530                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6531                   isa<UsingDirectiveDecl>(D) ||
6532                   isa<OMPDeclareReductionDecl>(D) ||
6533                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6534                 return true;
6535               const auto *VD = dyn_cast<VarDecl>(D);
6536               if (!VD)
6537                 return false;
6538               return VD->isConstexpr() ||
6539                      ((VD->getType().isTrivialType(Ctx) ||
6540                        VD->getType()->isReferenceType()) &&
6541                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6542             }))
6543           continue;
6544       }
6545       // Found multiple children - cannot get the one child only.
6546       if (Child)
6547         return nullptr;
6548       Child = S;
6549     }
6550     if (Child)
6551       Child = Child->IgnoreContainers();
6552   }
6553   return Child;
6554 }
6555 
6556 /// Emit the number of teams for a target directive.  Inspect the num_teams
6557 /// clause associated with a teams construct combined or closely nested
6558 /// with the target directive.
6559 ///
6560 /// Emit a team of size one for directives such as 'target parallel' that
6561 /// have no associated teams construct.
6562 ///
6563 /// Otherwise, return nullptr.
6564 static llvm::Value *
6565 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6566                                const OMPExecutableDirective &D) {
6567   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6568          "Clauses associated with the teams directive expected to be emitted "
6569          "only for the host!");
6570   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6571   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6572          "Expected target-based executable directive.");
6573   CGBuilderTy &Bld = CGF.Builder;
6574   switch (DirectiveKind) {
6575   case OMPD_target: {
6576     const auto *CS = D.getInnermostCapturedStmt();
6577     const auto *Body =
6578         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6579     const Stmt *ChildStmt =
6580         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6581     if (const auto *NestedDir =
6582             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6583       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6584         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6585           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6586           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6587           const Expr *NumTeams =
6588               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6589           llvm::Value *NumTeamsVal =
6590               CGF.EmitScalarExpr(NumTeams,
6591                                  /*IgnoreResultAssign*/ true);
6592           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6593                                    /*isSigned=*/true);
6594         }
6595         return Bld.getInt32(0);
6596       }
6597       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6598           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6599         return Bld.getInt32(1);
6600       return Bld.getInt32(0);
6601     }
6602     return nullptr;
6603   }
6604   case OMPD_target_teams:
6605   case OMPD_target_teams_distribute:
6606   case OMPD_target_teams_distribute_simd:
6607   case OMPD_target_teams_distribute_parallel_for:
6608   case OMPD_target_teams_distribute_parallel_for_simd: {
6609     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6610       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6611       const Expr *NumTeams =
6612           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6613       llvm::Value *NumTeamsVal =
6614           CGF.EmitScalarExpr(NumTeams,
6615                              /*IgnoreResultAssign*/ true);
6616       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6617                                /*isSigned=*/true);
6618     }
6619     return Bld.getInt32(0);
6620   }
6621   case OMPD_target_parallel:
6622   case OMPD_target_parallel_for:
6623   case OMPD_target_parallel_for_simd:
6624   case OMPD_target_simd:
6625     return Bld.getInt32(1);
6626   case OMPD_parallel:
6627   case OMPD_for:
6628   case OMPD_parallel_for:
6629   case OMPD_parallel_master:
6630   case OMPD_parallel_sections:
6631   case OMPD_for_simd:
6632   case OMPD_parallel_for_simd:
6633   case OMPD_cancel:
6634   case OMPD_cancellation_point:
6635   case OMPD_ordered:
6636   case OMPD_threadprivate:
6637   case OMPD_allocate:
6638   case OMPD_task:
6639   case OMPD_simd:
6640   case OMPD_sections:
6641   case OMPD_section:
6642   case OMPD_single:
6643   case OMPD_master:
6644   case OMPD_critical:
6645   case OMPD_taskyield:
6646   case OMPD_barrier:
6647   case OMPD_taskwait:
6648   case OMPD_taskgroup:
6649   case OMPD_atomic:
6650   case OMPD_flush:
6651   case OMPD_depobj:
6652   case OMPD_scan:
6653   case OMPD_teams:
6654   case OMPD_target_data:
6655   case OMPD_target_exit_data:
6656   case OMPD_target_enter_data:
6657   case OMPD_distribute:
6658   case OMPD_distribute_simd:
6659   case OMPD_distribute_parallel_for:
6660   case OMPD_distribute_parallel_for_simd:
6661   case OMPD_teams_distribute:
6662   case OMPD_teams_distribute_simd:
6663   case OMPD_teams_distribute_parallel_for:
6664   case OMPD_teams_distribute_parallel_for_simd:
6665   case OMPD_target_update:
6666   case OMPD_declare_simd:
6667   case OMPD_declare_variant:
6668   case OMPD_begin_declare_variant:
6669   case OMPD_end_declare_variant:
6670   case OMPD_declare_target:
6671   case OMPD_end_declare_target:
6672   case OMPD_declare_reduction:
6673   case OMPD_declare_mapper:
6674   case OMPD_taskloop:
6675   case OMPD_taskloop_simd:
6676   case OMPD_master_taskloop:
6677   case OMPD_master_taskloop_simd:
6678   case OMPD_parallel_master_taskloop:
6679   case OMPD_parallel_master_taskloop_simd:
6680   case OMPD_requires:
6681   case OMPD_unknown:
6682     break;
6683   }
6684   llvm_unreachable("Unexpected directive kind.");
6685 }
6686 
6687 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6688                                   llvm::Value *DefaultThreadLimitVal) {
6689   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6690       CGF.getContext(), CS->getCapturedStmt());
6691   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6692     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6693       llvm::Value *NumThreads = nullptr;
6694       llvm::Value *CondVal = nullptr;
6695       // Handle if clause. If if clause present, the number of threads is
6696       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6697       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6698         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6699         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6700         const OMPIfClause *IfClause = nullptr;
6701         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6702           if (C->getNameModifier() == OMPD_unknown ||
6703               C->getNameModifier() == OMPD_parallel) {
6704             IfClause = C;
6705             break;
6706           }
6707         }
6708         if (IfClause) {
6709           const Expr *Cond = IfClause->getCondition();
6710           bool Result;
6711           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6712             if (!Result)
6713               return CGF.Builder.getInt32(1);
6714           } else {
6715             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6716             if (const auto *PreInit =
6717                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6718               for (const auto *I : PreInit->decls()) {
6719                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6720                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6721                 } else {
6722                   CodeGenFunction::AutoVarEmission Emission =
6723                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6724                   CGF.EmitAutoVarCleanups(Emission);
6725                 }
6726               }
6727             }
6728             CondVal = CGF.EvaluateExprAsBool(Cond);
6729           }
6730         }
6731       }
6732       // Check the value of num_threads clause iff if clause was not specified
6733       // or is not evaluated to false.
6734       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6735         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6736         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6737         const auto *NumThreadsClause =
6738             Dir->getSingleClause<OMPNumThreadsClause>();
6739         CodeGenFunction::LexicalScope Scope(
6740             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6741         if (const auto *PreInit =
6742                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6743           for (const auto *I : PreInit->decls()) {
6744             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6745               CGF.EmitVarDecl(cast<VarDecl>(*I));
6746             } else {
6747               CodeGenFunction::AutoVarEmission Emission =
6748                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6749               CGF.EmitAutoVarCleanups(Emission);
6750             }
6751           }
6752         }
6753         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6754         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6755                                                /*isSigned=*/false);
6756         if (DefaultThreadLimitVal)
6757           NumThreads = CGF.Builder.CreateSelect(
6758               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6759               DefaultThreadLimitVal, NumThreads);
6760       } else {
6761         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6762                                            : CGF.Builder.getInt32(0);
6763       }
6764       // Process condition of the if clause.
6765       if (CondVal) {
6766         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6767                                               CGF.Builder.getInt32(1));
6768       }
6769       return NumThreads;
6770     }
6771     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6772       return CGF.Builder.getInt32(1);
6773     return DefaultThreadLimitVal;
6774   }
6775   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6776                                : CGF.Builder.getInt32(0);
6777 }
6778 
6779 /// Emit the number of threads for a target directive.  Inspect the
6780 /// thread_limit clause associated with a teams construct combined or closely
6781 /// nested with the target directive.
6782 ///
6783 /// Emit the num_threads clause for directives such as 'target parallel' that
6784 /// have no associated teams construct.
6785 ///
6786 /// Otherwise, return nullptr.
6787 static llvm::Value *
6788 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6789                                  const OMPExecutableDirective &D) {
6790   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6791          "Clauses associated with the teams directive expected to be emitted "
6792          "only for the host!");
6793   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6794   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6795          "Expected target-based executable directive.");
6796   CGBuilderTy &Bld = CGF.Builder;
6797   llvm::Value *ThreadLimitVal = nullptr;
6798   llvm::Value *NumThreadsVal = nullptr;
6799   switch (DirectiveKind) {
6800   case OMPD_target: {
6801     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6802     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6803       return NumThreads;
6804     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6805         CGF.getContext(), CS->getCapturedStmt());
6806     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6807       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6808         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6809         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6810         const auto *ThreadLimitClause =
6811             Dir->getSingleClause<OMPThreadLimitClause>();
6812         CodeGenFunction::LexicalScope Scope(
6813             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6814         if (const auto *PreInit =
6815                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6816           for (const auto *I : PreInit->decls()) {
6817             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6818               CGF.EmitVarDecl(cast<VarDecl>(*I));
6819             } else {
6820               CodeGenFunction::AutoVarEmission Emission =
6821                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6822               CGF.EmitAutoVarCleanups(Emission);
6823             }
6824           }
6825         }
6826         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6827             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6828         ThreadLimitVal =
6829             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6830       }
6831       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6832           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6833         CS = Dir->getInnermostCapturedStmt();
6834         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6835             CGF.getContext(), CS->getCapturedStmt());
6836         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6837       }
6838       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6839           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6840         CS = Dir->getInnermostCapturedStmt();
6841         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6842           return NumThreads;
6843       }
6844       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6845         return Bld.getInt32(1);
6846     }
6847     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6848   }
6849   case OMPD_target_teams: {
6850     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6851       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6852       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6853       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6854           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6855       ThreadLimitVal =
6856           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6857     }
6858     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6859     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6860       return NumThreads;
6861     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6862         CGF.getContext(), CS->getCapturedStmt());
6863     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6864       if (Dir->getDirectiveKind() == OMPD_distribute) {
6865         CS = Dir->getInnermostCapturedStmt();
6866         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6867           return NumThreads;
6868       }
6869     }
6870     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6871   }
6872   case OMPD_target_teams_distribute:
6873     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6874       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6875       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6876       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6877           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6878       ThreadLimitVal =
6879           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6880     }
6881     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6882   case OMPD_target_parallel:
6883   case OMPD_target_parallel_for:
6884   case OMPD_target_parallel_for_simd:
6885   case OMPD_target_teams_distribute_parallel_for:
6886   case OMPD_target_teams_distribute_parallel_for_simd: {
6887     llvm::Value *CondVal = nullptr;
6888     // Handle if clause. If if clause present, the number of threads is
6889     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6890     if (D.hasClausesOfKind<OMPIfClause>()) {
6891       const OMPIfClause *IfClause = nullptr;
6892       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6893         if (C->getNameModifier() == OMPD_unknown ||
6894             C->getNameModifier() == OMPD_parallel) {
6895           IfClause = C;
6896           break;
6897         }
6898       }
6899       if (IfClause) {
6900         const Expr *Cond = IfClause->getCondition();
6901         bool Result;
6902         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6903           if (!Result)
6904             return Bld.getInt32(1);
6905         } else {
6906           CodeGenFunction::RunCleanupsScope Scope(CGF);
6907           CondVal = CGF.EvaluateExprAsBool(Cond);
6908         }
6909       }
6910     }
6911     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6912       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6913       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6914       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6915           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6916       ThreadLimitVal =
6917           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6918     }
6919     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6920       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6921       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6922       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6923           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6924       NumThreadsVal =
6925           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6926       ThreadLimitVal = ThreadLimitVal
6927                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6928                                                                 ThreadLimitVal),
6929                                               NumThreadsVal, ThreadLimitVal)
6930                            : NumThreadsVal;
6931     }
6932     if (!ThreadLimitVal)
6933       ThreadLimitVal = Bld.getInt32(0);
6934     if (CondVal)
6935       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6936     return ThreadLimitVal;
6937   }
6938   case OMPD_target_teams_distribute_simd:
6939   case OMPD_target_simd:
6940     return Bld.getInt32(1);
6941   case OMPD_parallel:
6942   case OMPD_for:
6943   case OMPD_parallel_for:
6944   case OMPD_parallel_master:
6945   case OMPD_parallel_sections:
6946   case OMPD_for_simd:
6947   case OMPD_parallel_for_simd:
6948   case OMPD_cancel:
6949   case OMPD_cancellation_point:
6950   case OMPD_ordered:
6951   case OMPD_threadprivate:
6952   case OMPD_allocate:
6953   case OMPD_task:
6954   case OMPD_simd:
6955   case OMPD_sections:
6956   case OMPD_section:
6957   case OMPD_single:
6958   case OMPD_master:
6959   case OMPD_critical:
6960   case OMPD_taskyield:
6961   case OMPD_barrier:
6962   case OMPD_taskwait:
6963   case OMPD_taskgroup:
6964   case OMPD_atomic:
6965   case OMPD_flush:
6966   case OMPD_depobj:
6967   case OMPD_scan:
6968   case OMPD_teams:
6969   case OMPD_target_data:
6970   case OMPD_target_exit_data:
6971   case OMPD_target_enter_data:
6972   case OMPD_distribute:
6973   case OMPD_distribute_simd:
6974   case OMPD_distribute_parallel_for:
6975   case OMPD_distribute_parallel_for_simd:
6976   case OMPD_teams_distribute:
6977   case OMPD_teams_distribute_simd:
6978   case OMPD_teams_distribute_parallel_for:
6979   case OMPD_teams_distribute_parallel_for_simd:
6980   case OMPD_target_update:
6981   case OMPD_declare_simd:
6982   case OMPD_declare_variant:
6983   case OMPD_begin_declare_variant:
6984   case OMPD_end_declare_variant:
6985   case OMPD_declare_target:
6986   case OMPD_end_declare_target:
6987   case OMPD_declare_reduction:
6988   case OMPD_declare_mapper:
6989   case OMPD_taskloop:
6990   case OMPD_taskloop_simd:
6991   case OMPD_master_taskloop:
6992   case OMPD_master_taskloop_simd:
6993   case OMPD_parallel_master_taskloop:
6994   case OMPD_parallel_master_taskloop_simd:
6995   case OMPD_requires:
6996   case OMPD_unknown:
6997     break;
6998   }
6999   llvm_unreachable("Unsupported directive kind.");
7000 }
7001 
7002 namespace {
7003 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7004 
7005 // Utility to handle information from clauses associated with a given
7006 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7007 // It provides a convenient interface to obtain the information and generate
7008 // code for that information.
7009 class MappableExprsHandler {
7010 public:
7011   /// Values for bit flags used to specify the mapping type for
7012   /// offloading.
7013   enum OpenMPOffloadMappingFlags : uint64_t {
7014     /// No flags
7015     OMP_MAP_NONE = 0x0,
7016     /// Allocate memory on the device and move data from host to device.
7017     OMP_MAP_TO = 0x01,
7018     /// Allocate memory on the device and move data from device to host.
7019     OMP_MAP_FROM = 0x02,
7020     /// Always perform the requested mapping action on the element, even
7021     /// if it was already mapped before.
7022     OMP_MAP_ALWAYS = 0x04,
7023     /// Delete the element from the device environment, ignoring the
7024     /// current reference count associated with the element.
7025     OMP_MAP_DELETE = 0x08,
7026     /// The element being mapped is a pointer-pointee pair; both the
7027     /// pointer and the pointee should be mapped.
7028     OMP_MAP_PTR_AND_OBJ = 0x10,
7029     /// This flags signals that the base address of an entry should be
7030     /// passed to the target kernel as an argument.
7031     OMP_MAP_TARGET_PARAM = 0x20,
7032     /// Signal that the runtime library has to return the device pointer
7033     /// in the current position for the data being mapped. Used when we have the
7034     /// use_device_ptr or use_device_addr clause.
7035     OMP_MAP_RETURN_PARAM = 0x40,
7036     /// This flag signals that the reference being passed is a pointer to
7037     /// private data.
7038     OMP_MAP_PRIVATE = 0x80,
7039     /// Pass the element to the device by value.
7040     OMP_MAP_LITERAL = 0x100,
7041     /// Implicit map
7042     OMP_MAP_IMPLICIT = 0x200,
7043     /// Close is a hint to the runtime to allocate memory close to
7044     /// the target device.
7045     OMP_MAP_CLOSE = 0x400,
7046     /// The 16 MSBs of the flags indicate whether the entry is member of some
7047     /// struct/class.
7048     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7049     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7050   };
7051 
7052   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7053   static unsigned getFlagMemberOffset() {
7054     unsigned Offset = 0;
7055     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7056          Remain = Remain >> 1)
7057       Offset++;
7058     return Offset;
7059   }
7060 
7061   /// Class that associates information with a base pointer to be passed to the
7062   /// runtime library.
7063   class BasePointerInfo {
7064     /// The base pointer.
7065     llvm::Value *Ptr = nullptr;
7066     /// The base declaration that refers to this device pointer, or null if
7067     /// there is none.
7068     const ValueDecl *DevPtrDecl = nullptr;
7069 
7070   public:
7071     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7072         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7073     llvm::Value *operator*() const { return Ptr; }
7074     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7075     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7076   };
7077 
7078   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7079   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7080   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7081 
7082   /// Map between a struct and the its lowest & highest elements which have been
7083   /// mapped.
7084   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7085   ///                    HE(FieldIndex, Pointer)}
7086   struct StructRangeInfoTy {
7087     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7088         0, Address::invalid()};
7089     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7090         0, Address::invalid()};
7091     Address Base = Address::invalid();
7092   };
7093 
7094 private:
7095   /// Kind that defines how a device pointer has to be returned.
7096   struct MapInfo {
7097     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7098     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7099     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7100     bool ReturnDevicePointer = false;
7101     bool IsImplicit = false;
7102     bool ForDeviceAddr = false;
7103 
7104     MapInfo() = default;
7105     MapInfo(
7106         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7107         OpenMPMapClauseKind MapType,
7108         ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7109         bool IsImplicit, bool ForDeviceAddr = false)
7110         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7111           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7112           ForDeviceAddr(ForDeviceAddr) {}
7113   };
7114 
7115   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7116   /// member and there is no map information about it, then emission of that
7117   /// entry is deferred until the whole struct has been processed.
7118   struct DeferredDevicePtrEntryTy {
7119     const Expr *IE = nullptr;
7120     const ValueDecl *VD = nullptr;
7121     bool ForDeviceAddr = false;
7122 
7123     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7124                              bool ForDeviceAddr)
7125         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7126   };
7127 
7128   /// The target directive from where the mappable clauses were extracted. It
7129   /// is either a executable directive or a user-defined mapper directive.
7130   llvm::PointerUnion<const OMPExecutableDirective *,
7131                      const OMPDeclareMapperDecl *>
7132       CurDir;
7133 
7134   /// Function the directive is being generated for.
7135   CodeGenFunction &CGF;
7136 
7137   /// Set of all first private variables in the current directive.
7138   /// bool data is set to true if the variable is implicitly marked as
7139   /// firstprivate, false otherwise.
7140   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7141 
7142   /// Map between device pointer declarations and their expression components.
7143   /// The key value for declarations in 'this' is null.
7144   llvm::DenseMap<
7145       const ValueDecl *,
7146       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7147       DevPointersMap;
7148 
7149   llvm::Value *getExprTypeSize(const Expr *E) const {
7150     QualType ExprTy = E->getType().getCanonicalType();
7151 
7152     // Calculate the size for array shaping expression.
7153     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7154       llvm::Value *Size =
7155           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7156       for (const Expr *SE : OAE->getDimensions()) {
7157         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7158         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7159                                       CGF.getContext().getSizeType(),
7160                                       SE->getExprLoc());
7161         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7162       }
7163       return Size;
7164     }
7165 
7166     // Reference types are ignored for mapping purposes.
7167     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7168       ExprTy = RefTy->getPointeeType().getCanonicalType();
7169 
7170     // Given that an array section is considered a built-in type, we need to
7171     // do the calculation based on the length of the section instead of relying
7172     // on CGF.getTypeSize(E->getType()).
7173     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7174       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7175                             OAE->getBase()->IgnoreParenImpCasts())
7176                             .getCanonicalType();
7177 
7178       // If there is no length associated with the expression and lower bound is
7179       // not specified too, that means we are using the whole length of the
7180       // base.
7181       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7182           !OAE->getLowerBound())
7183         return CGF.getTypeSize(BaseTy);
7184 
7185       llvm::Value *ElemSize;
7186       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7187         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7188       } else {
7189         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7190         assert(ATy && "Expecting array type if not a pointer type.");
7191         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7192       }
7193 
7194       // If we don't have a length at this point, that is because we have an
7195       // array section with a single element.
7196       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7197         return ElemSize;
7198 
7199       if (const Expr *LenExpr = OAE->getLength()) {
7200         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7201         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7202                                              CGF.getContext().getSizeType(),
7203                                              LenExpr->getExprLoc());
7204         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7205       }
7206       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7207              OAE->getLowerBound() && "expected array_section[lb:].");
7208       // Size = sizetype - lb * elemtype;
7209       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7210       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7211       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7212                                        CGF.getContext().getSizeType(),
7213                                        OAE->getLowerBound()->getExprLoc());
7214       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7215       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7216       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7217       LengthVal = CGF.Builder.CreateSelect(
7218           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7219       return LengthVal;
7220     }
7221     return CGF.getTypeSize(ExprTy);
7222   }
7223 
7224   /// Return the corresponding bits for a given map clause modifier. Add
7225   /// a flag marking the map as a pointer if requested. Add a flag marking the
7226   /// map as the first one of a series of maps that relate to the same map
7227   /// expression.
7228   OpenMPOffloadMappingFlags getMapTypeBits(
7229       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7230       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7231     OpenMPOffloadMappingFlags Bits =
7232         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7233     switch (MapType) {
7234     case OMPC_MAP_alloc:
7235     case OMPC_MAP_release:
7236       // alloc and release is the default behavior in the runtime library,  i.e.
7237       // if we don't pass any bits alloc/release that is what the runtime is
7238       // going to do. Therefore, we don't need to signal anything for these two
7239       // type modifiers.
7240       break;
7241     case OMPC_MAP_to:
7242       Bits |= OMP_MAP_TO;
7243       break;
7244     case OMPC_MAP_from:
7245       Bits |= OMP_MAP_FROM;
7246       break;
7247     case OMPC_MAP_tofrom:
7248       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7249       break;
7250     case OMPC_MAP_delete:
7251       Bits |= OMP_MAP_DELETE;
7252       break;
7253     case OMPC_MAP_unknown:
7254       llvm_unreachable("Unexpected map type!");
7255     }
7256     if (AddPtrFlag)
7257       Bits |= OMP_MAP_PTR_AND_OBJ;
7258     if (AddIsTargetParamFlag)
7259       Bits |= OMP_MAP_TARGET_PARAM;
7260     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7261         != MapModifiers.end())
7262       Bits |= OMP_MAP_ALWAYS;
7263     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7264         != MapModifiers.end())
7265       Bits |= OMP_MAP_CLOSE;
7266     return Bits;
7267   }
7268 
7269   /// Return true if the provided expression is a final array section. A
7270   /// final array section, is one whose length can't be proved to be one.
7271   bool isFinalArraySectionExpression(const Expr *E) const {
7272     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7273 
7274     // It is not an array section and therefore not a unity-size one.
7275     if (!OASE)
7276       return false;
7277 
7278     // An array section with no colon always refer to a single element.
7279     if (OASE->getColonLoc().isInvalid())
7280       return false;
7281 
7282     const Expr *Length = OASE->getLength();
7283 
7284     // If we don't have a length we have to check if the array has size 1
7285     // for this dimension. Also, we should always expect a length if the
7286     // base type is pointer.
7287     if (!Length) {
7288       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7289                              OASE->getBase()->IgnoreParenImpCasts())
7290                              .getCanonicalType();
7291       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7292         return ATy->getSize().getSExtValue() != 1;
7293       // If we don't have a constant dimension length, we have to consider
7294       // the current section as having any size, so it is not necessarily
7295       // unitary. If it happen to be unity size, that's user fault.
7296       return true;
7297     }
7298 
7299     // Check if the length evaluates to 1.
7300     Expr::EvalResult Result;
7301     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7302       return true; // Can have more that size 1.
7303 
7304     llvm::APSInt ConstLength = Result.Val.getInt();
7305     return ConstLength.getSExtValue() != 1;
7306   }
7307 
7308   /// Generate the base pointers, section pointers, sizes and map type
7309   /// bits for the provided map type, map modifier, and expression components.
7310   /// \a IsFirstComponent should be set to true if the provided set of
7311   /// components is the first associated with a capture.
7312   void generateInfoForComponentList(
7313       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7314       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7315       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7316       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7317       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7318       bool IsImplicit, bool ForDeviceAddr = false,
7319       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7320           OverlappedElements = llvm::None) const {
7321     // The following summarizes what has to be generated for each map and the
7322     // types below. The generated information is expressed in this order:
7323     // base pointer, section pointer, size, flags
7324     // (to add to the ones that come from the map type and modifier).
7325     //
7326     // double d;
7327     // int i[100];
7328     // float *p;
7329     //
7330     // struct S1 {
7331     //   int i;
7332     //   float f[50];
7333     // }
7334     // struct S2 {
7335     //   int i;
7336     //   float f[50];
7337     //   S1 s;
7338     //   double *p;
7339     //   struct S2 *ps;
7340     // }
7341     // S2 s;
7342     // S2 *ps;
7343     //
7344     // map(d)
7345     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7346     //
7347     // map(i)
7348     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7349     //
7350     // map(i[1:23])
7351     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7352     //
7353     // map(p)
7354     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7355     //
7356     // map(p[1:24])
7357     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7358     //
7359     // map(s)
7360     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7361     //
7362     // map(s.i)
7363     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7364     //
7365     // map(s.s.f)
7366     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7367     //
7368     // map(s.p)
7369     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7370     //
7371     // map(to: s.p[:22])
7372     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7373     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7374     // &(s.p), &(s.p[0]), 22*sizeof(double),
7375     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7376     // (*) alloc space for struct members, only this is a target parameter
7377     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7378     //      optimizes this entry out, same in the examples below)
7379     // (***) map the pointee (map: to)
7380     //
7381     // map(s.ps)
7382     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7383     //
7384     // map(from: s.ps->s.i)
7385     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7386     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7387     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7388     //
7389     // map(to: s.ps->ps)
7390     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7391     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7392     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7393     //
7394     // map(s.ps->ps->ps)
7395     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7396     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7397     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7398     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7399     //
7400     // map(to: s.ps->ps->s.f[:22])
7401     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7402     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7403     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7404     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7405     //
7406     // map(ps)
7407     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7408     //
7409     // map(ps->i)
7410     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7411     //
7412     // map(ps->s.f)
7413     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7414     //
7415     // map(from: ps->p)
7416     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7417     //
7418     // map(to: ps->p[:22])
7419     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7420     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7421     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7422     //
7423     // map(ps->ps)
7424     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7425     //
7426     // map(from: ps->ps->s.i)
7427     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7428     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7429     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7430     //
7431     // map(from: ps->ps->ps)
7432     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7433     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7434     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7435     //
7436     // map(ps->ps->ps->ps)
7437     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7438     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7439     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7440     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7441     //
7442     // map(to: ps->ps->ps->s.f[:22])
7443     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7444     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7445     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7446     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7447     //
7448     // map(to: s.f[:22]) map(from: s.p[:33])
7449     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7450     //     sizeof(double*) (**), TARGET_PARAM
7451     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7452     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7453     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7454     // (*) allocate contiguous space needed to fit all mapped members even if
7455     //     we allocate space for members not mapped (in this example,
7456     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7457     //     them as well because they fall between &s.f[0] and &s.p)
7458     //
7459     // map(from: s.f[:22]) map(to: ps->p[:33])
7460     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7461     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7462     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7463     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7464     // (*) the struct this entry pertains to is the 2nd element in the list of
7465     //     arguments, hence MEMBER_OF(2)
7466     //
7467     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7468     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7469     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7470     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7471     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7472     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7473     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7474     // (*) the struct this entry pertains to is the 4th element in the list
7475     //     of arguments, hence MEMBER_OF(4)
7476 
7477     // Track if the map information being generated is the first for a capture.
7478     bool IsCaptureFirstInfo = IsFirstComponentList;
7479     // When the variable is on a declare target link or in a to clause with
7480     // unified memory, a reference is needed to hold the host/device address
7481     // of the variable.
7482     bool RequiresReference = false;
7483 
7484     // Scan the components from the base to the complete expression.
7485     auto CI = Components.rbegin();
7486     auto CE = Components.rend();
7487     auto I = CI;
7488 
7489     // Track if the map information being generated is the first for a list of
7490     // components.
7491     bool IsExpressionFirstInfo = true;
7492     Address BP = Address::invalid();
7493     const Expr *AssocExpr = I->getAssociatedExpression();
7494     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7495     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7496     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7497 
7498     if (isa<MemberExpr>(AssocExpr)) {
7499       // The base is the 'this' pointer. The content of the pointer is going
7500       // to be the base of the field being mapped.
7501       BP = CGF.LoadCXXThisAddress();
7502     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7503                (OASE &&
7504                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7505       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7506     } else if (OAShE &&
7507                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7508       BP = Address(
7509           CGF.EmitScalarExpr(OAShE->getBase()),
7510           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7511     } else {
7512       // The base is the reference to the variable.
7513       // BP = &Var.
7514       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7515       if (const auto *VD =
7516               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7517         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7518                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7519           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7520               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7521                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7522             RequiresReference = true;
7523             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7524           }
7525         }
7526       }
7527 
7528       // If the variable is a pointer and is being dereferenced (i.e. is not
7529       // the last component), the base has to be the pointer itself, not its
7530       // reference. References are ignored for mapping purposes.
7531       QualType Ty =
7532           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7533       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7534         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7535 
7536         // We do not need to generate individual map information for the
7537         // pointer, it can be associated with the combined storage.
7538         ++I;
7539       }
7540     }
7541 
7542     // Track whether a component of the list should be marked as MEMBER_OF some
7543     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7544     // in a component list should be marked as MEMBER_OF, all subsequent entries
7545     // do not belong to the base struct. E.g.
7546     // struct S2 s;
7547     // s.ps->ps->ps->f[:]
7548     //   (1) (2) (3) (4)
7549     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7550     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7551     // is the pointee of ps(2) which is not member of struct s, so it should not
7552     // be marked as such (it is still PTR_AND_OBJ).
7553     // The variable is initialized to false so that PTR_AND_OBJ entries which
7554     // are not struct members are not considered (e.g. array of pointers to
7555     // data).
7556     bool ShouldBeMemberOf = false;
7557 
7558     // Variable keeping track of whether or not we have encountered a component
7559     // in the component list which is a member expression. Useful when we have a
7560     // pointer or a final array section, in which case it is the previous
7561     // component in the list which tells us whether we have a member expression.
7562     // E.g. X.f[:]
7563     // While processing the final array section "[:]" it is "f" which tells us
7564     // whether we are dealing with a member of a declared struct.
7565     const MemberExpr *EncounteredME = nullptr;
7566 
7567     for (; I != CE; ++I) {
7568       // If the current component is member of a struct (parent struct) mark it.
7569       if (!EncounteredME) {
7570         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7571         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7572         // as MEMBER_OF the parent struct.
7573         if (EncounteredME)
7574           ShouldBeMemberOf = true;
7575       }
7576 
7577       auto Next = std::next(I);
7578 
7579       // We need to generate the addresses and sizes if this is the last
7580       // component, if the component is a pointer or if it is an array section
7581       // whose length can't be proved to be one. If this is a pointer, it
7582       // becomes the base address for the following components.
7583 
7584       // A final array section, is one whose length can't be proved to be one.
7585       bool IsFinalArraySection =
7586           isFinalArraySectionExpression(I->getAssociatedExpression());
7587 
7588       // Get information on whether the element is a pointer. Have to do a
7589       // special treatment for array sections given that they are built-in
7590       // types.
7591       const auto *OASE =
7592           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7593       const auto *OAShE =
7594           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7595       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7596       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7597       bool IsPointer =
7598           OAShE ||
7599           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7600                        .getCanonicalType()
7601                        ->isAnyPointerType()) ||
7602           I->getAssociatedExpression()->getType()->isAnyPointerType();
7603       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7604 
7605       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7606         // If this is not the last component, we expect the pointer to be
7607         // associated with an array expression or member expression.
7608         assert((Next == CE ||
7609                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7610                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7611                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7612                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7613                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7614                "Unexpected expression");
7615 
7616         Address LB = Address::invalid();
7617         if (OAShE) {
7618           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7619                        CGF.getContext().getTypeAlignInChars(
7620                            OAShE->getBase()->getType()));
7621         } else {
7622           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7623                    .getAddress(CGF);
7624         }
7625 
7626         // If this component is a pointer inside the base struct then we don't
7627         // need to create any entry for it - it will be combined with the object
7628         // it is pointing to into a single PTR_AND_OBJ entry.
7629         bool IsMemberPointerOrAddr =
7630             (IsPointer || ForDeviceAddr) && EncounteredME &&
7631             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7632              EncounteredME);
7633         if (!OverlappedElements.empty()) {
7634           // Handle base element with the info for overlapped elements.
7635           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7636           assert(Next == CE &&
7637                  "Expected last element for the overlapped elements.");
7638           assert(!IsPointer &&
7639                  "Unexpected base element with the pointer type.");
7640           // Mark the whole struct as the struct that requires allocation on the
7641           // device.
7642           PartialStruct.LowestElem = {0, LB};
7643           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7644               I->getAssociatedExpression()->getType());
7645           Address HB = CGF.Builder.CreateConstGEP(
7646               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7647                                                               CGF.VoidPtrTy),
7648               TypeSize.getQuantity() - 1);
7649           PartialStruct.HighestElem = {
7650               std::numeric_limits<decltype(
7651                   PartialStruct.HighestElem.first)>::max(),
7652               HB};
7653           PartialStruct.Base = BP;
7654           // Emit data for non-overlapped data.
7655           OpenMPOffloadMappingFlags Flags =
7656               OMP_MAP_MEMBER_OF |
7657               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7658                              /*AddPtrFlag=*/false,
7659                              /*AddIsTargetParamFlag=*/false);
7660           LB = BP;
7661           llvm::Value *Size = nullptr;
7662           // Do bitcopy of all non-overlapped structure elements.
7663           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7664                    Component : OverlappedElements) {
7665             Address ComponentLB = Address::invalid();
7666             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7667                  Component) {
7668               if (MC.getAssociatedDeclaration()) {
7669                 ComponentLB =
7670                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7671                         .getAddress(CGF);
7672                 Size = CGF.Builder.CreatePtrDiff(
7673                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7674                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7675                 break;
7676               }
7677             }
7678             BasePointers.push_back(BP.getPointer());
7679             Pointers.push_back(LB.getPointer());
7680             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7681                                                       /*isSigned=*/true));
7682             Types.push_back(Flags);
7683             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7684           }
7685           BasePointers.push_back(BP.getPointer());
7686           Pointers.push_back(LB.getPointer());
7687           Size = CGF.Builder.CreatePtrDiff(
7688               CGF.EmitCastToVoidPtr(
7689                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7690               CGF.EmitCastToVoidPtr(LB.getPointer()));
7691           Sizes.push_back(
7692               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7693           Types.push_back(Flags);
7694           break;
7695         }
7696         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7697         if (!IsMemberPointerOrAddr) {
7698           BasePointers.push_back(BP.getPointer());
7699           Pointers.push_back(LB.getPointer());
7700           Sizes.push_back(
7701               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7702 
7703           // We need to add a pointer flag for each map that comes from the
7704           // same expression except for the first one. We also need to signal
7705           // this map is the first one that relates with the current capture
7706           // (there is a set of entries for each capture).
7707           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7708               MapType, MapModifiers, IsImplicit,
7709               !IsExpressionFirstInfo || RequiresReference,
7710               IsCaptureFirstInfo && !RequiresReference);
7711 
7712           if (!IsExpressionFirstInfo) {
7713             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7714             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7715             if (IsPointer)
7716               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7717                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7718 
7719             if (ShouldBeMemberOf) {
7720               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7721               // should be later updated with the correct value of MEMBER_OF.
7722               Flags |= OMP_MAP_MEMBER_OF;
7723               // From now on, all subsequent PTR_AND_OBJ entries should not be
7724               // marked as MEMBER_OF.
7725               ShouldBeMemberOf = false;
7726             }
7727           }
7728 
7729           Types.push_back(Flags);
7730         }
7731 
7732         // If we have encountered a member expression so far, keep track of the
7733         // mapped member. If the parent is "*this", then the value declaration
7734         // is nullptr.
7735         if (EncounteredME) {
7736           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7737           unsigned FieldIndex = FD->getFieldIndex();
7738 
7739           // Update info about the lowest and highest elements for this struct
7740           if (!PartialStruct.Base.isValid()) {
7741             PartialStruct.LowestElem = {FieldIndex, LB};
7742             if (IsFinalArraySection) {
7743               Address HB =
7744                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7745                       .getAddress(CGF);
7746               PartialStruct.HighestElem = {FieldIndex, HB};
7747             } else {
7748               PartialStruct.HighestElem = {FieldIndex, LB};
7749             }
7750             PartialStruct.Base = BP;
7751           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7752             PartialStruct.LowestElem = {FieldIndex, LB};
7753           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7754             PartialStruct.HighestElem = {FieldIndex, LB};
7755           }
7756         }
7757 
7758         // If we have a final array section, we are done with this expression.
7759         if (IsFinalArraySection)
7760           break;
7761 
7762         // The pointer becomes the base for the next element.
7763         if (Next != CE)
7764           BP = LB;
7765 
7766         IsExpressionFirstInfo = false;
7767         IsCaptureFirstInfo = false;
7768       }
7769     }
7770   }
7771 
7772   /// Return the adjusted map modifiers if the declaration a capture refers to
7773   /// appears in a first-private clause. This is expected to be used only with
7774   /// directives that start with 'target'.
7775   MappableExprsHandler::OpenMPOffloadMappingFlags
7776   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7777     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7778 
7779     // A first private variable captured by reference will use only the
7780     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7781     // declaration is known as first-private in this handler.
7782     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7783       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7784           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7785         return MappableExprsHandler::OMP_MAP_ALWAYS |
7786                MappableExprsHandler::OMP_MAP_TO;
7787       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7788         return MappableExprsHandler::OMP_MAP_TO |
7789                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7790       return MappableExprsHandler::OMP_MAP_PRIVATE |
7791              MappableExprsHandler::OMP_MAP_TO;
7792     }
7793     return MappableExprsHandler::OMP_MAP_TO |
7794            MappableExprsHandler::OMP_MAP_FROM;
7795   }
7796 
7797   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7798     // Rotate by getFlagMemberOffset() bits.
7799     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7800                                                   << getFlagMemberOffset());
7801   }
7802 
7803   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7804                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7805     // If the entry is PTR_AND_OBJ but has not been marked with the special
7806     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7807     // marked as MEMBER_OF.
7808     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7809         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7810       return;
7811 
7812     // Reset the placeholder value to prepare the flag for the assignment of the
7813     // proper MEMBER_OF value.
7814     Flags &= ~OMP_MAP_MEMBER_OF;
7815     Flags |= MemberOfFlag;
7816   }
7817 
7818   void getPlainLayout(const CXXRecordDecl *RD,
7819                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7820                       bool AsBase) const {
7821     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7822 
7823     llvm::StructType *St =
7824         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7825 
7826     unsigned NumElements = St->getNumElements();
7827     llvm::SmallVector<
7828         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7829         RecordLayout(NumElements);
7830 
7831     // Fill bases.
7832     for (const auto &I : RD->bases()) {
7833       if (I.isVirtual())
7834         continue;
7835       const auto *Base = I.getType()->getAsCXXRecordDecl();
7836       // Ignore empty bases.
7837       if (Base->isEmpty() || CGF.getContext()
7838                                  .getASTRecordLayout(Base)
7839                                  .getNonVirtualSize()
7840                                  .isZero())
7841         continue;
7842 
7843       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7844       RecordLayout[FieldIndex] = Base;
7845     }
7846     // Fill in virtual bases.
7847     for (const auto &I : RD->vbases()) {
7848       const auto *Base = I.getType()->getAsCXXRecordDecl();
7849       // Ignore empty bases.
7850       if (Base->isEmpty())
7851         continue;
7852       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7853       if (RecordLayout[FieldIndex])
7854         continue;
7855       RecordLayout[FieldIndex] = Base;
7856     }
7857     // Fill in all the fields.
7858     assert(!RD->isUnion() && "Unexpected union.");
7859     for (const auto *Field : RD->fields()) {
7860       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7861       // will fill in later.)
7862       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7863         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7864         RecordLayout[FieldIndex] = Field;
7865       }
7866     }
7867     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7868              &Data : RecordLayout) {
7869       if (Data.isNull())
7870         continue;
7871       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7872         getPlainLayout(Base, Layout, /*AsBase=*/true);
7873       else
7874         Layout.push_back(Data.get<const FieldDecl *>());
7875     }
7876   }
7877 
7878 public:
7879   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7880       : CurDir(&Dir), CGF(CGF) {
7881     // Extract firstprivate clause information.
7882     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7883       for (const auto *D : C->varlists())
7884         FirstPrivateDecls.try_emplace(
7885             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7886     // Extract implicit firstprivates from uses_allocators clauses.
7887     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7888       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7889         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7890         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7891           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7892                                         /*Implicit=*/true);
7893         else if (const auto *VD = dyn_cast<VarDecl>(
7894                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7895                          ->getDecl()))
7896           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7897       }
7898     }
7899     // Extract device pointer clause information.
7900     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7901       for (auto L : C->component_lists())
7902         DevPointersMap[L.first].push_back(L.second);
7903   }
7904 
7905   /// Constructor for the declare mapper directive.
7906   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7907       : CurDir(&Dir), CGF(CGF) {}
7908 
7909   /// Generate code for the combined entry if we have a partially mapped struct
7910   /// and take care of the mapping flags of the arguments corresponding to
7911   /// individual struct members.
7912   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7913                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7914                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7915                          const StructRangeInfoTy &PartialStruct) const {
7916     // Base is the base of the struct
7917     BasePointers.push_back(PartialStruct.Base.getPointer());
7918     // Pointer is the address of the lowest element
7919     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7920     Pointers.push_back(LB);
7921     // Size is (addr of {highest+1} element) - (addr of lowest element)
7922     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7923     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7924     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7925     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7926     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7927     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7928                                                   /*isSigned=*/false);
7929     Sizes.push_back(Size);
7930     // Map type is always TARGET_PARAM
7931     Types.push_back(OMP_MAP_TARGET_PARAM);
7932     // Remove TARGET_PARAM flag from the first element
7933     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7934 
7935     // All other current entries will be MEMBER_OF the combined entry
7936     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7937     // 0xFFFF in the MEMBER_OF field).
7938     OpenMPOffloadMappingFlags MemberOfFlag =
7939         getMemberOfFlag(BasePointers.size() - 1);
7940     for (auto &M : CurTypes)
7941       setCorrectMemberOfFlag(M, MemberOfFlag);
7942   }
7943 
7944   /// Generate all the base pointers, section pointers, sizes and map
7945   /// types for the extracted mappable expressions. Also, for each item that
7946   /// relates with a device pointer, a pair of the relevant declaration and
7947   /// index where it occurs is appended to the device pointers info array.
7948   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7949                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7950                        MapFlagsArrayTy &Types) const {
7951     // We have to process the component lists that relate with the same
7952     // declaration in a single chunk so that we can generate the map flags
7953     // correctly. Therefore, we organize all lists in a map.
7954     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7955 
7956     // Helper function to fill the information map for the different supported
7957     // clauses.
7958     auto &&InfoGen =
7959         [&Info](const ValueDecl *D,
7960                 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7961                 OpenMPMapClauseKind MapType,
7962                 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7963                 bool ReturnDevicePointer, bool IsImplicit,
7964                 bool ForDeviceAddr = false) {
7965           const ValueDecl *VD =
7966               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7967           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7968                                 IsImplicit, ForDeviceAddr);
7969         };
7970 
7971     assert(CurDir.is<const OMPExecutableDirective *>() &&
7972            "Expect a executable directive");
7973     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7974     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7975       for (const auto L : C->component_lists()) {
7976         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7977             /*ReturnDevicePointer=*/false, C->isImplicit());
7978       }
7979     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7980       for (const auto L : C->component_lists()) {
7981         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7982             /*ReturnDevicePointer=*/false, C->isImplicit());
7983       }
7984     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7985       for (const auto L : C->component_lists()) {
7986         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7987             /*ReturnDevicePointer=*/false, C->isImplicit());
7988       }
7989 
7990     // Look at the use_device_ptr clause information and mark the existing map
7991     // entries as such. If there is no map information for an entry in the
7992     // use_device_ptr list, we create one with map type 'alloc' and zero size
7993     // section. It is the user fault if that was not mapped before. If there is
7994     // no map information and the pointer is a struct member, then we defer the
7995     // emission of that entry until the whole struct has been processed.
7996     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7997         DeferredInfo;
7998 
7999     for (const auto *C :
8000          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8001       for (const auto L : C->component_lists()) {
8002         assert(!L.second.empty() && "Not expecting empty list of components!");
8003         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8004         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8005         const Expr *IE = L.second.back().getAssociatedExpression();
8006         // If the first component is a member expression, we have to look into
8007         // 'this', which maps to null in the map of map information. Otherwise
8008         // look directly for the information.
8009         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8010 
8011         // We potentially have map information for this declaration already.
8012         // Look for the first set of components that refer to it.
8013         if (It != Info.end()) {
8014           auto CI = std::find_if(
8015               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8016                 return MI.Components.back().getAssociatedDeclaration() == VD;
8017               });
8018           // If we found a map entry, signal that the pointer has to be returned
8019           // and move on to the next declaration.
8020           if (CI != It->second.end()) {
8021             CI->ReturnDevicePointer = true;
8022             continue;
8023           }
8024         }
8025 
8026         // We didn't find any match in our map information - generate a zero
8027         // size array section - if the pointer is a struct member we defer this
8028         // action until the whole struct has been processed.
8029         if (isa<MemberExpr>(IE)) {
8030           // Insert the pointer into Info to be processed by
8031           // generateInfoForComponentList. Because it is a member pointer
8032           // without a pointee, no entry will be generated for it, therefore
8033           // we need to generate one after the whole struct has been processed.
8034           // Nonetheless, generateInfoForComponentList must be called to take
8035           // the pointer into account for the calculation of the range of the
8036           // partial struct.
8037           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8038                   /*ReturnDevicePointer=*/false, C->isImplicit());
8039           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8040         } else {
8041           llvm::Value *Ptr =
8042               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8043           BasePointers.emplace_back(Ptr, VD);
8044           Pointers.push_back(Ptr);
8045           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8046           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8047         }
8048       }
8049     }
8050 
8051     // Look at the use_device_addr clause information and mark the existing map
8052     // entries as such. If there is no map information for an entry in the
8053     // use_device_addr list, we create one with map type 'alloc' and zero size
8054     // section. It is the user fault if that was not mapped before. If there is
8055     // no map information and the pointer is a struct member, then we defer the
8056     // emission of that entry until the whole struct has been processed.
8057     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8058     for (const auto *C :
8059          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8060       for (const auto L : C->component_lists()) {
8061         assert(!L.second.empty() && "Not expecting empty list of components!");
8062         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8063         if (!Processed.insert(VD).second)
8064           continue;
8065         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8066         const Expr *IE = L.second.back().getAssociatedExpression();
8067         // If the first component is a member expression, we have to look into
8068         // 'this', which maps to null in the map of map information. Otherwise
8069         // look directly for the information.
8070         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8071 
8072         // We potentially have map information for this declaration already.
8073         // Look for the first set of components that refer to it.
8074         if (It != Info.end()) {
8075           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8076             return MI.Components.back().getAssociatedDeclaration() == VD;
8077           });
8078           // If we found a map entry, signal that the pointer has to be returned
8079           // and move on to the next declaration.
8080           if (CI != It->second.end()) {
8081             CI->ReturnDevicePointer = true;
8082             continue;
8083           }
8084         }
8085 
8086         // We didn't find any match in our map information - generate a zero
8087         // size array section - if the pointer is a struct member we defer this
8088         // action until the whole struct has been processed.
8089         if (isa<MemberExpr>(IE)) {
8090           // Insert the pointer into Info to be processed by
8091           // generateInfoForComponentList. Because it is a member pointer
8092           // without a pointee, no entry will be generated for it, therefore
8093           // we need to generate one after the whole struct has been processed.
8094           // Nonetheless, generateInfoForComponentList must be called to take
8095           // the pointer into account for the calculation of the range of the
8096           // partial struct.
8097           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8098                   /*ReturnDevicePointer=*/false, C->isImplicit(),
8099                   /*ForDeviceAddr=*/true);
8100           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8101         } else {
8102           llvm::Value *Ptr;
8103           if (IE->isGLValue())
8104             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8105           else
8106             Ptr = CGF.EmitScalarExpr(IE);
8107           BasePointers.emplace_back(Ptr, VD);
8108           Pointers.push_back(Ptr);
8109           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8110           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8111         }
8112       }
8113     }
8114 
8115     for (const auto &M : Info) {
8116       // We need to know when we generate information for the first component
8117       // associated with a capture, because the mapping flags depend on it.
8118       bool IsFirstComponentList = true;
8119 
8120       // Temporary versions of arrays
8121       MapBaseValuesArrayTy CurBasePointers;
8122       MapValuesArrayTy CurPointers;
8123       MapValuesArrayTy CurSizes;
8124       MapFlagsArrayTy CurTypes;
8125       StructRangeInfoTy PartialStruct;
8126 
8127       for (const MapInfo &L : M.second) {
8128         assert(!L.Components.empty() &&
8129                "Not expecting declaration with no component lists.");
8130 
8131         // Remember the current base pointer index.
8132         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8133         generateInfoForComponentList(
8134             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8135             CurPointers, CurSizes, CurTypes, PartialStruct,
8136             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8137 
8138         // If this entry relates with a device pointer, set the relevant
8139         // declaration and add the 'return pointer' flag.
8140         if (L.ReturnDevicePointer) {
8141           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8142                  "Unexpected number of mapped base pointers.");
8143 
8144           const ValueDecl *RelevantVD =
8145               L.Components.back().getAssociatedDeclaration();
8146           assert(RelevantVD &&
8147                  "No relevant declaration related with device pointer??");
8148 
8149           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8150           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8151         }
8152         IsFirstComponentList = false;
8153       }
8154 
8155       // Append any pending zero-length pointers which are struct members and
8156       // used with use_device_ptr or use_device_addr.
8157       auto CI = DeferredInfo.find(M.first);
8158       if (CI != DeferredInfo.end()) {
8159         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8160           llvm::Value *BasePtr;
8161           llvm::Value *Ptr;
8162           if (L.ForDeviceAddr) {
8163             if (L.IE->isGLValue())
8164               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8165             else
8166               Ptr = this->CGF.EmitScalarExpr(L.IE);
8167             BasePtr = Ptr;
8168             // Entry is RETURN_PARAM. Also, set the placeholder value
8169             // MEMBER_OF=FFFF so that the entry is later updated with the
8170             // correct value of MEMBER_OF.
8171             CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8172           } else {
8173             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8174             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8175                                              L.IE->getExprLoc());
8176             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8177             // value MEMBER_OF=FFFF so that the entry is later updated with the
8178             // correct value of MEMBER_OF.
8179             CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8180                                OMP_MAP_MEMBER_OF);
8181           }
8182           CurBasePointers.emplace_back(BasePtr, L.VD);
8183           CurPointers.push_back(Ptr);
8184           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8185         }
8186       }
8187 
8188       // If there is an entry in PartialStruct it means we have a struct with
8189       // individual members mapped. Emit an extra combined entry.
8190       if (PartialStruct.Base.isValid())
8191         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8192                           PartialStruct);
8193 
8194       // We need to append the results of this capture to what we already have.
8195       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8196       Pointers.append(CurPointers.begin(), CurPointers.end());
8197       Sizes.append(CurSizes.begin(), CurSizes.end());
8198       Types.append(CurTypes.begin(), CurTypes.end());
8199     }
8200   }
8201 
8202   /// Generate all the base pointers, section pointers, sizes and map types for
8203   /// the extracted map clauses of user-defined mapper.
8204   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8205                                 MapValuesArrayTy &Pointers,
8206                                 MapValuesArrayTy &Sizes,
8207                                 MapFlagsArrayTy &Types) const {
8208     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8209            "Expect a declare mapper directive");
8210     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8211     // We have to process the component lists that relate with the same
8212     // declaration in a single chunk so that we can generate the map flags
8213     // correctly. Therefore, we organize all lists in a map.
8214     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8215 
8216     // Helper function to fill the information map for the different supported
8217     // clauses.
8218     auto &&InfoGen = [&Info](
8219         const ValueDecl *D,
8220         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8221         OpenMPMapClauseKind MapType,
8222         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8223         bool ReturnDevicePointer, bool IsImplicit) {
8224       const ValueDecl *VD =
8225           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8226       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8227                             IsImplicit);
8228     };
8229 
8230     for (const auto *C : CurMapperDir->clauselists()) {
8231       const auto *MC = cast<OMPMapClause>(C);
8232       for (const auto L : MC->component_lists()) {
8233         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8234                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8235       }
8236     }
8237 
8238     for (const auto &M : Info) {
8239       // We need to know when we generate information for the first component
8240       // associated with a capture, because the mapping flags depend on it.
8241       bool IsFirstComponentList = true;
8242 
8243       // Temporary versions of arrays
8244       MapBaseValuesArrayTy CurBasePointers;
8245       MapValuesArrayTy CurPointers;
8246       MapValuesArrayTy CurSizes;
8247       MapFlagsArrayTy CurTypes;
8248       StructRangeInfoTy PartialStruct;
8249 
8250       for (const MapInfo &L : M.second) {
8251         assert(!L.Components.empty() &&
8252                "Not expecting declaration with no component lists.");
8253         generateInfoForComponentList(
8254             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8255             CurPointers, CurSizes, CurTypes, PartialStruct,
8256             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8257         IsFirstComponentList = false;
8258       }
8259 
8260       // If there is an entry in PartialStruct it means we have a struct with
8261       // individual members mapped. Emit an extra combined entry.
8262       if (PartialStruct.Base.isValid())
8263         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8264                           PartialStruct);
8265 
8266       // We need to append the results of this capture to what we already have.
8267       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8268       Pointers.append(CurPointers.begin(), CurPointers.end());
8269       Sizes.append(CurSizes.begin(), CurSizes.end());
8270       Types.append(CurTypes.begin(), CurTypes.end());
8271     }
8272   }
8273 
8274   /// Emit capture info for lambdas for variables captured by reference.
8275   void generateInfoForLambdaCaptures(
8276       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8277       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8278       MapFlagsArrayTy &Types,
8279       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8280     const auto *RD = VD->getType()
8281                          .getCanonicalType()
8282                          .getNonReferenceType()
8283                          ->getAsCXXRecordDecl();
8284     if (!RD || !RD->isLambda())
8285       return;
8286     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8287     LValue VDLVal = CGF.MakeAddrLValue(
8288         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8289     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8290     FieldDecl *ThisCapture = nullptr;
8291     RD->getCaptureFields(Captures, ThisCapture);
8292     if (ThisCapture) {
8293       LValue ThisLVal =
8294           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8295       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8296       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8297                                  VDLVal.getPointer(CGF));
8298       BasePointers.push_back(ThisLVal.getPointer(CGF));
8299       Pointers.push_back(ThisLValVal.getPointer(CGF));
8300       Sizes.push_back(
8301           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8302                                     CGF.Int64Ty, /*isSigned=*/true));
8303       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8304                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8305     }
8306     for (const LambdaCapture &LC : RD->captures()) {
8307       if (!LC.capturesVariable())
8308         continue;
8309       const VarDecl *VD = LC.getCapturedVar();
8310       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8311         continue;
8312       auto It = Captures.find(VD);
8313       assert(It != Captures.end() && "Found lambda capture without field.");
8314       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8315       if (LC.getCaptureKind() == LCK_ByRef) {
8316         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8317         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8318                                    VDLVal.getPointer(CGF));
8319         BasePointers.push_back(VarLVal.getPointer(CGF));
8320         Pointers.push_back(VarLValVal.getPointer(CGF));
8321         Sizes.push_back(CGF.Builder.CreateIntCast(
8322             CGF.getTypeSize(
8323                 VD->getType().getCanonicalType().getNonReferenceType()),
8324             CGF.Int64Ty, /*isSigned=*/true));
8325       } else {
8326         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8327         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8328                                    VDLVal.getPointer(CGF));
8329         BasePointers.push_back(VarLVal.getPointer(CGF));
8330         Pointers.push_back(VarRVal.getScalarVal());
8331         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8332       }
8333       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8334                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8335     }
8336   }
8337 
8338   /// Set correct indices for lambdas captures.
8339   void adjustMemberOfForLambdaCaptures(
8340       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8341       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8342       MapFlagsArrayTy &Types) const {
8343     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8344       // Set correct member_of idx for all implicit lambda captures.
8345       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8346                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8347         continue;
8348       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8349       assert(BasePtr && "Unable to find base lambda address.");
8350       int TgtIdx = -1;
8351       for (unsigned J = I; J > 0; --J) {
8352         unsigned Idx = J - 1;
8353         if (Pointers[Idx] != BasePtr)
8354           continue;
8355         TgtIdx = Idx;
8356         break;
8357       }
8358       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8359       // All other current entries will be MEMBER_OF the combined entry
8360       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8361       // 0xFFFF in the MEMBER_OF field).
8362       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8363       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8364     }
8365   }
8366 
8367   /// Generate the base pointers, section pointers, sizes and map types
8368   /// associated to a given capture.
8369   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8370                               llvm::Value *Arg,
8371                               MapBaseValuesArrayTy &BasePointers,
8372                               MapValuesArrayTy &Pointers,
8373                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8374                               StructRangeInfoTy &PartialStruct) const {
8375     assert(!Cap->capturesVariableArrayType() &&
8376            "Not expecting to generate map info for a variable array type!");
8377 
8378     // We need to know when we generating information for the first component
8379     const ValueDecl *VD = Cap->capturesThis()
8380                               ? nullptr
8381                               : Cap->getCapturedVar()->getCanonicalDecl();
8382 
8383     // If this declaration appears in a is_device_ptr clause we just have to
8384     // pass the pointer by value. If it is a reference to a declaration, we just
8385     // pass its value.
8386     if (DevPointersMap.count(VD)) {
8387       BasePointers.emplace_back(Arg, VD);
8388       Pointers.push_back(Arg);
8389       Sizes.push_back(
8390           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8391                                     CGF.Int64Ty, /*isSigned=*/true));
8392       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8393       return;
8394     }
8395 
8396     using MapData =
8397         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8398                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8399     SmallVector<MapData, 4> DeclComponentLists;
8400     assert(CurDir.is<const OMPExecutableDirective *>() &&
8401            "Expect a executable directive");
8402     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8403     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8404       for (const auto L : C->decl_component_lists(VD)) {
8405         assert(L.first == VD &&
8406                "We got information for the wrong declaration??");
8407         assert(!L.second.empty() &&
8408                "Not expecting declaration with no component lists.");
8409         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8410                                         C->getMapTypeModifiers(),
8411                                         C->isImplicit());
8412       }
8413     }
8414 
8415     // Find overlapping elements (including the offset from the base element).
8416     llvm::SmallDenseMap<
8417         const MapData *,
8418         llvm::SmallVector<
8419             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8420         4>
8421         OverlappedData;
8422     size_t Count = 0;
8423     for (const MapData &L : DeclComponentLists) {
8424       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8425       OpenMPMapClauseKind MapType;
8426       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8427       bool IsImplicit;
8428       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8429       ++Count;
8430       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8431         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8432         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8433         auto CI = Components.rbegin();
8434         auto CE = Components.rend();
8435         auto SI = Components1.rbegin();
8436         auto SE = Components1.rend();
8437         for (; CI != CE && SI != SE; ++CI, ++SI) {
8438           if (CI->getAssociatedExpression()->getStmtClass() !=
8439               SI->getAssociatedExpression()->getStmtClass())
8440             break;
8441           // Are we dealing with different variables/fields?
8442           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8443             break;
8444         }
8445         // Found overlapping if, at least for one component, reached the head of
8446         // the components list.
8447         if (CI == CE || SI == SE) {
8448           assert((CI != CE || SI != SE) &&
8449                  "Unexpected full match of the mapping components.");
8450           const MapData &BaseData = CI == CE ? L : L1;
8451           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8452               SI == SE ? Components : Components1;
8453           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8454           OverlappedElements.getSecond().push_back(SubData);
8455         }
8456       }
8457     }
8458     // Sort the overlapped elements for each item.
8459     llvm::SmallVector<const FieldDecl *, 4> Layout;
8460     if (!OverlappedData.empty()) {
8461       if (const auto *CRD =
8462               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8463         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8464       else {
8465         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8466         Layout.append(RD->field_begin(), RD->field_end());
8467       }
8468     }
8469     for (auto &Pair : OverlappedData) {
8470       llvm::sort(
8471           Pair.getSecond(),
8472           [&Layout](
8473               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8474               OMPClauseMappableExprCommon::MappableExprComponentListRef
8475                   Second) {
8476             auto CI = First.rbegin();
8477             auto CE = First.rend();
8478             auto SI = Second.rbegin();
8479             auto SE = Second.rend();
8480             for (; CI != CE && SI != SE; ++CI, ++SI) {
8481               if (CI->getAssociatedExpression()->getStmtClass() !=
8482                   SI->getAssociatedExpression()->getStmtClass())
8483                 break;
8484               // Are we dealing with different variables/fields?
8485               if (CI->getAssociatedDeclaration() !=
8486                   SI->getAssociatedDeclaration())
8487                 break;
8488             }
8489 
8490             // Lists contain the same elements.
8491             if (CI == CE && SI == SE)
8492               return false;
8493 
8494             // List with less elements is less than list with more elements.
8495             if (CI == CE || SI == SE)
8496               return CI == CE;
8497 
8498             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8499             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8500             if (FD1->getParent() == FD2->getParent())
8501               return FD1->getFieldIndex() < FD2->getFieldIndex();
8502             const auto It =
8503                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8504                   return FD == FD1 || FD == FD2;
8505                 });
8506             return *It == FD1;
8507           });
8508     }
8509 
8510     // Associated with a capture, because the mapping flags depend on it.
8511     // Go through all of the elements with the overlapped elements.
8512     for (const auto &Pair : OverlappedData) {
8513       const MapData &L = *Pair.getFirst();
8514       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8515       OpenMPMapClauseKind MapType;
8516       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8517       bool IsImplicit;
8518       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8519       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8520           OverlappedComponents = Pair.getSecond();
8521       bool IsFirstComponentList = true;
8522       generateInfoForComponentList(
8523           MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8524           Types, PartialStruct, IsFirstComponentList, IsImplicit,
8525           /*ForDeviceAddr=*/false, OverlappedComponents);
8526     }
8527     // Go through other elements without overlapped elements.
8528     bool IsFirstComponentList = OverlappedData.empty();
8529     for (const MapData &L : DeclComponentLists) {
8530       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8531       OpenMPMapClauseKind MapType;
8532       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8533       bool IsImplicit;
8534       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8535       auto It = OverlappedData.find(&L);
8536       if (It == OverlappedData.end())
8537         generateInfoForComponentList(MapType, MapModifiers, Components,
8538                                      BasePointers, Pointers, Sizes, Types,
8539                                      PartialStruct, IsFirstComponentList,
8540                                      IsImplicit);
8541       IsFirstComponentList = false;
8542     }
8543   }
8544 
8545   /// Generate the base pointers, section pointers, sizes and map types
8546   /// associated with the declare target link variables.
8547   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8548                                         MapValuesArrayTy &Pointers,
8549                                         MapValuesArrayTy &Sizes,
8550                                         MapFlagsArrayTy &Types) const {
8551     assert(CurDir.is<const OMPExecutableDirective *>() &&
8552            "Expect a executable directive");
8553     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8554     // Map other list items in the map clause which are not captured variables
8555     // but "declare target link" global variables.
8556     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8557       for (const auto L : C->component_lists()) {
8558         if (!L.first)
8559           continue;
8560         const auto *VD = dyn_cast<VarDecl>(L.first);
8561         if (!VD)
8562           continue;
8563         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8564             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8565         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8566             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8567           continue;
8568         StructRangeInfoTy PartialStruct;
8569         generateInfoForComponentList(
8570             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8571             Pointers, Sizes, Types, PartialStruct,
8572             /*IsFirstComponentList=*/true, C->isImplicit());
8573         assert(!PartialStruct.Base.isValid() &&
8574                "No partial structs for declare target link expected.");
8575       }
8576     }
8577   }
8578 
8579   /// Generate the default map information for a given capture \a CI,
8580   /// record field declaration \a RI and captured value \a CV.
8581   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8582                               const FieldDecl &RI, llvm::Value *CV,
8583                               MapBaseValuesArrayTy &CurBasePointers,
8584                               MapValuesArrayTy &CurPointers,
8585                               MapValuesArrayTy &CurSizes,
8586                               MapFlagsArrayTy &CurMapTypes) const {
8587     bool IsImplicit = true;
8588     // Do the default mapping.
8589     if (CI.capturesThis()) {
8590       CurBasePointers.push_back(CV);
8591       CurPointers.push_back(CV);
8592       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8593       CurSizes.push_back(
8594           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8595                                     CGF.Int64Ty, /*isSigned=*/true));
8596       // Default map type.
8597       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8598     } else if (CI.capturesVariableByCopy()) {
8599       CurBasePointers.push_back(CV);
8600       CurPointers.push_back(CV);
8601       if (!RI.getType()->isAnyPointerType()) {
8602         // We have to signal to the runtime captures passed by value that are
8603         // not pointers.
8604         CurMapTypes.push_back(OMP_MAP_LITERAL);
8605         CurSizes.push_back(CGF.Builder.CreateIntCast(
8606             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8607       } else {
8608         // Pointers are implicitly mapped with a zero size and no flags
8609         // (other than first map that is added for all implicit maps).
8610         CurMapTypes.push_back(OMP_MAP_NONE);
8611         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8612       }
8613       const VarDecl *VD = CI.getCapturedVar();
8614       auto I = FirstPrivateDecls.find(VD);
8615       if (I != FirstPrivateDecls.end())
8616         IsImplicit = I->getSecond();
8617     } else {
8618       assert(CI.capturesVariable() && "Expected captured reference.");
8619       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8620       QualType ElementType = PtrTy->getPointeeType();
8621       CurSizes.push_back(CGF.Builder.CreateIntCast(
8622           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8623       // The default map type for a scalar/complex type is 'to' because by
8624       // default the value doesn't have to be retrieved. For an aggregate
8625       // type, the default is 'tofrom'.
8626       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8627       const VarDecl *VD = CI.getCapturedVar();
8628       auto I = FirstPrivateDecls.find(VD);
8629       if (I != FirstPrivateDecls.end() &&
8630           VD->getType().isConstant(CGF.getContext())) {
8631         llvm::Constant *Addr =
8632             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8633         // Copy the value of the original variable to the new global copy.
8634         CGF.Builder.CreateMemCpy(
8635             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8636             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8637             CurSizes.back(), /*IsVolatile=*/false);
8638         // Use new global variable as the base pointers.
8639         CurBasePointers.push_back(Addr);
8640         CurPointers.push_back(Addr);
8641       } else {
8642         CurBasePointers.push_back(CV);
8643         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8644           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8645               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8646               AlignmentSource::Decl));
8647           CurPointers.push_back(PtrAddr.getPointer());
8648         } else {
8649           CurPointers.push_back(CV);
8650         }
8651       }
8652       if (I != FirstPrivateDecls.end())
8653         IsImplicit = I->getSecond();
8654     }
8655     // Every default map produces a single argument which is a target parameter.
8656     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8657 
8658     // Add flag stating this is an implicit map.
8659     if (IsImplicit)
8660       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8661   }
8662 };
8663 } // anonymous namespace
8664 
8665 /// Emit the arrays used to pass the captures and map information to the
8666 /// offloading runtime library. If there is no map or capture information,
8667 /// return nullptr by reference.
8668 static void
8669 emitOffloadingArrays(CodeGenFunction &CGF,
8670                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8671                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8672                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8673                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8674                      CGOpenMPRuntime::TargetDataInfo &Info) {
8675   CodeGenModule &CGM = CGF.CGM;
8676   ASTContext &Ctx = CGF.getContext();
8677 
8678   // Reset the array information.
8679   Info.clearArrayInfo();
8680   Info.NumberOfPtrs = BasePointers.size();
8681 
8682   if (Info.NumberOfPtrs) {
8683     // Detect if we have any capture size requiring runtime evaluation of the
8684     // size so that a constant array could be eventually used.
8685     bool hasRuntimeEvaluationCaptureSize = false;
8686     for (llvm::Value *S : Sizes)
8687       if (!isa<llvm::Constant>(S)) {
8688         hasRuntimeEvaluationCaptureSize = true;
8689         break;
8690       }
8691 
8692     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8693     QualType PointerArrayType = Ctx.getConstantArrayType(
8694         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8695         /*IndexTypeQuals=*/0);
8696 
8697     Info.BasePointersArray =
8698         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8699     Info.PointersArray =
8700         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8701 
8702     // If we don't have any VLA types or other types that require runtime
8703     // evaluation, we can use a constant array for the map sizes, otherwise we
8704     // need to fill up the arrays as we do for the pointers.
8705     QualType Int64Ty =
8706         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8707     if (hasRuntimeEvaluationCaptureSize) {
8708       QualType SizeArrayType = Ctx.getConstantArrayType(
8709           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8710           /*IndexTypeQuals=*/0);
8711       Info.SizesArray =
8712           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8713     } else {
8714       // We expect all the sizes to be constant, so we collect them to create
8715       // a constant array.
8716       SmallVector<llvm::Constant *, 16> ConstSizes;
8717       for (llvm::Value *S : Sizes)
8718         ConstSizes.push_back(cast<llvm::Constant>(S));
8719 
8720       auto *SizesArrayInit = llvm::ConstantArray::get(
8721           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8722       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8723       auto *SizesArrayGbl = new llvm::GlobalVariable(
8724           CGM.getModule(), SizesArrayInit->getType(),
8725           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8726           SizesArrayInit, Name);
8727       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8728       Info.SizesArray = SizesArrayGbl;
8729     }
8730 
8731     // The map types are always constant so we don't need to generate code to
8732     // fill arrays. Instead, we create an array constant.
8733     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8734     llvm::copy(MapTypes, Mapping.begin());
8735     llvm::Constant *MapTypesArrayInit =
8736         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8737     std::string MaptypesName =
8738         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8739     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8740         CGM.getModule(), MapTypesArrayInit->getType(),
8741         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8742         MapTypesArrayInit, MaptypesName);
8743     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8744     Info.MapTypesArray = MapTypesArrayGbl;
8745 
8746     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8747       llvm::Value *BPVal = *BasePointers[I];
8748       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8749           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8750           Info.BasePointersArray, 0, I);
8751       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8752           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8753       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8754       CGF.Builder.CreateStore(BPVal, BPAddr);
8755 
8756       if (Info.requiresDevicePointerInfo())
8757         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8758           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8759 
8760       llvm::Value *PVal = Pointers[I];
8761       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8762           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8763           Info.PointersArray, 0, I);
8764       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8765           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8766       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8767       CGF.Builder.CreateStore(PVal, PAddr);
8768 
8769       if (hasRuntimeEvaluationCaptureSize) {
8770         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8771             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8772             Info.SizesArray,
8773             /*Idx0=*/0,
8774             /*Idx1=*/I);
8775         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8776         CGF.Builder.CreateStore(
8777             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8778             SAddr);
8779       }
8780     }
8781   }
8782 }
8783 
8784 /// Emit the arguments to be passed to the runtime library based on the
8785 /// arrays of pointers, sizes and map types.
8786 static void emitOffloadingArraysArgument(
8787     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8788     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8789     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8790   CodeGenModule &CGM = CGF.CGM;
8791   if (Info.NumberOfPtrs) {
8792     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8793         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8794         Info.BasePointersArray,
8795         /*Idx0=*/0, /*Idx1=*/0);
8796     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8797         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8798         Info.PointersArray,
8799         /*Idx0=*/0,
8800         /*Idx1=*/0);
8801     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8802         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8803         /*Idx0=*/0, /*Idx1=*/0);
8804     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8805         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8806         Info.MapTypesArray,
8807         /*Idx0=*/0,
8808         /*Idx1=*/0);
8809   } else {
8810     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8811     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8812     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8813     MapTypesArrayArg =
8814         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8815   }
8816 }
8817 
8818 /// Check for inner distribute directive.
8819 static const OMPExecutableDirective *
8820 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8821   const auto *CS = D.getInnermostCapturedStmt();
8822   const auto *Body =
8823       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8824   const Stmt *ChildStmt =
8825       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8826 
8827   if (const auto *NestedDir =
8828           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8829     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8830     switch (D.getDirectiveKind()) {
8831     case OMPD_target:
8832       if (isOpenMPDistributeDirective(DKind))
8833         return NestedDir;
8834       if (DKind == OMPD_teams) {
8835         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8836             /*IgnoreCaptured=*/true);
8837         if (!Body)
8838           return nullptr;
8839         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8840         if (const auto *NND =
8841                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8842           DKind = NND->getDirectiveKind();
8843           if (isOpenMPDistributeDirective(DKind))
8844             return NND;
8845         }
8846       }
8847       return nullptr;
8848     case OMPD_target_teams:
8849       if (isOpenMPDistributeDirective(DKind))
8850         return NestedDir;
8851       return nullptr;
8852     case OMPD_target_parallel:
8853     case OMPD_target_simd:
8854     case OMPD_target_parallel_for:
8855     case OMPD_target_parallel_for_simd:
8856       return nullptr;
8857     case OMPD_target_teams_distribute:
8858     case OMPD_target_teams_distribute_simd:
8859     case OMPD_target_teams_distribute_parallel_for:
8860     case OMPD_target_teams_distribute_parallel_for_simd:
8861     case OMPD_parallel:
8862     case OMPD_for:
8863     case OMPD_parallel_for:
8864     case OMPD_parallel_master:
8865     case OMPD_parallel_sections:
8866     case OMPD_for_simd:
8867     case OMPD_parallel_for_simd:
8868     case OMPD_cancel:
8869     case OMPD_cancellation_point:
8870     case OMPD_ordered:
8871     case OMPD_threadprivate:
8872     case OMPD_allocate:
8873     case OMPD_task:
8874     case OMPD_simd:
8875     case OMPD_sections:
8876     case OMPD_section:
8877     case OMPD_single:
8878     case OMPD_master:
8879     case OMPD_critical:
8880     case OMPD_taskyield:
8881     case OMPD_barrier:
8882     case OMPD_taskwait:
8883     case OMPD_taskgroup:
8884     case OMPD_atomic:
8885     case OMPD_flush:
8886     case OMPD_depobj:
8887     case OMPD_scan:
8888     case OMPD_teams:
8889     case OMPD_target_data:
8890     case OMPD_target_exit_data:
8891     case OMPD_target_enter_data:
8892     case OMPD_distribute:
8893     case OMPD_distribute_simd:
8894     case OMPD_distribute_parallel_for:
8895     case OMPD_distribute_parallel_for_simd:
8896     case OMPD_teams_distribute:
8897     case OMPD_teams_distribute_simd:
8898     case OMPD_teams_distribute_parallel_for:
8899     case OMPD_teams_distribute_parallel_for_simd:
8900     case OMPD_target_update:
8901     case OMPD_declare_simd:
8902     case OMPD_declare_variant:
8903     case OMPD_begin_declare_variant:
8904     case OMPD_end_declare_variant:
8905     case OMPD_declare_target:
8906     case OMPD_end_declare_target:
8907     case OMPD_declare_reduction:
8908     case OMPD_declare_mapper:
8909     case OMPD_taskloop:
8910     case OMPD_taskloop_simd:
8911     case OMPD_master_taskloop:
8912     case OMPD_master_taskloop_simd:
8913     case OMPD_parallel_master_taskloop:
8914     case OMPD_parallel_master_taskloop_simd:
8915     case OMPD_requires:
8916     case OMPD_unknown:
8917       llvm_unreachable("Unexpected directive.");
8918     }
8919   }
8920 
8921   return nullptr;
8922 }
8923 
8924 /// Emit the user-defined mapper function. The code generation follows the
8925 /// pattern in the example below.
8926 /// \code
8927 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8928 ///                                           void *base, void *begin,
8929 ///                                           int64_t size, int64_t type) {
8930 ///   // Allocate space for an array section first.
8931 ///   if (size > 1 && !maptype.IsDelete)
8932 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8933 ///                                 size*sizeof(Ty), clearToFrom(type));
8934 ///   // Map members.
8935 ///   for (unsigned i = 0; i < size; i++) {
8936 ///     // For each component specified by this mapper:
8937 ///     for (auto c : all_components) {
8938 ///       if (c.hasMapper())
8939 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8940 ///                       c.arg_type);
8941 ///       else
8942 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8943 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8944 ///     }
8945 ///   }
8946 ///   // Delete the array section.
8947 ///   if (size > 1 && maptype.IsDelete)
8948 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8949 ///                                 size*sizeof(Ty), clearToFrom(type));
8950 /// }
8951 /// \endcode
8952 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8953                                             CodeGenFunction *CGF) {
8954   if (UDMMap.count(D) > 0)
8955     return;
8956   ASTContext &C = CGM.getContext();
8957   QualType Ty = D->getType();
8958   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8959   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8960   auto *MapperVarDecl =
8961       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8962   SourceLocation Loc = D->getLocation();
8963   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8964 
8965   // Prepare mapper function arguments and attributes.
8966   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8967                               C.VoidPtrTy, ImplicitParamDecl::Other);
8968   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8969                             ImplicitParamDecl::Other);
8970   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8971                              C.VoidPtrTy, ImplicitParamDecl::Other);
8972   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8973                             ImplicitParamDecl::Other);
8974   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8975                             ImplicitParamDecl::Other);
8976   FunctionArgList Args;
8977   Args.push_back(&HandleArg);
8978   Args.push_back(&BaseArg);
8979   Args.push_back(&BeginArg);
8980   Args.push_back(&SizeArg);
8981   Args.push_back(&TypeArg);
8982   const CGFunctionInfo &FnInfo =
8983       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8984   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8985   SmallString<64> TyStr;
8986   llvm::raw_svector_ostream Out(TyStr);
8987   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8988   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8989   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8990                                     Name, &CGM.getModule());
8991   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8992   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8993   // Start the mapper function code generation.
8994   CodeGenFunction MapperCGF(CGM);
8995   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8996   // Compute the starting and end addreses of array elements.
8997   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8998       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8999       C.getPointerType(Int64Ty), Loc);
9000   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9001       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9002       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9003   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9004   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9005       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9006       C.getPointerType(Int64Ty), Loc);
9007   // Prepare common arguments for array initiation and deletion.
9008   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9009       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9010       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9011   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9012       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9013       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9014   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9015       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9016       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9017 
9018   // Emit array initiation if this is an array section and \p MapType indicates
9019   // that memory allocation is required.
9020   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9021   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9022                              ElementSize, HeadBB, /*IsInit=*/true);
9023 
9024   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9025 
9026   // Emit the loop header block.
9027   MapperCGF.EmitBlock(HeadBB);
9028   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9029   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9030   // Evaluate whether the initial condition is satisfied.
9031   llvm::Value *IsEmpty =
9032       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9033   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9034   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9035 
9036   // Emit the loop body block.
9037   MapperCGF.EmitBlock(BodyBB);
9038   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9039       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9040   PtrPHI->addIncoming(PtrBegin, EntryBB);
9041   Address PtrCurrent =
9042       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9043                           .getAlignment()
9044                           .alignmentOfArrayElement(ElementSize));
9045   // Privatize the declared variable of mapper to be the current array element.
9046   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9047   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9048     return MapperCGF
9049         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9050         .getAddress(MapperCGF);
9051   });
9052   (void)Scope.Privatize();
9053 
9054   // Get map clause information. Fill up the arrays with all mapped variables.
9055   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9056   MappableExprsHandler::MapValuesArrayTy Pointers;
9057   MappableExprsHandler::MapValuesArrayTy Sizes;
9058   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9059   MappableExprsHandler MEHandler(*D, MapperCGF);
9060   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9061 
9062   // Call the runtime API __tgt_mapper_num_components to get the number of
9063   // pre-existing components.
9064   llvm::Value *OffloadingArgs[] = {Handle};
9065   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9066       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9067           CGM.getModule(), OMPRTL___tgt_mapper_num_components),
9068       OffloadingArgs);
9069   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9070       PreviousSize,
9071       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9072 
9073   // Fill up the runtime mapper handle for all components.
9074   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9075     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9076         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9077     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9078         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9079     llvm::Value *CurSizeArg = Sizes[I];
9080 
9081     // Extract the MEMBER_OF field from the map type.
9082     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9083     MapperCGF.EmitBlock(MemberBB);
9084     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9085     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9086         OriMapType,
9087         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9088     llvm::BasicBlock *MemberCombineBB =
9089         MapperCGF.createBasicBlock("omp.member.combine");
9090     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9091     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9092     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9093     // Add the number of pre-existing components to the MEMBER_OF field if it
9094     // is valid.
9095     MapperCGF.EmitBlock(MemberCombineBB);
9096     llvm::Value *CombinedMember =
9097         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9098     // Do nothing if it is not a member of previous components.
9099     MapperCGF.EmitBlock(TypeBB);
9100     llvm::PHINode *MemberMapType =
9101         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9102     MemberMapType->addIncoming(OriMapType, MemberBB);
9103     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9104 
9105     // Combine the map type inherited from user-defined mapper with that
9106     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9107     // bits of the \a MapType, which is the input argument of the mapper
9108     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9109     // bits of MemberMapType.
9110     // [OpenMP 5.0], 1.2.6. map-type decay.
9111     //        | alloc |  to   | from  | tofrom | release | delete
9112     // ----------------------------------------------------------
9113     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9114     // to     | alloc |  to   | alloc |   to   | release | delete
9115     // from   | alloc | alloc | from  |  from  | release | delete
9116     // tofrom | alloc |  to   | from  | tofrom | release | delete
9117     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9118         MapType,
9119         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9120                                    MappableExprsHandler::OMP_MAP_FROM));
9121     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9122     llvm::BasicBlock *AllocElseBB =
9123         MapperCGF.createBasicBlock("omp.type.alloc.else");
9124     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9125     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9126     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9127     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9128     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9129     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9130     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9131     MapperCGF.EmitBlock(AllocBB);
9132     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9133         MemberMapType,
9134         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9135                                      MappableExprsHandler::OMP_MAP_FROM)));
9136     MapperCGF.Builder.CreateBr(EndBB);
9137     MapperCGF.EmitBlock(AllocElseBB);
9138     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9139         LeftToFrom,
9140         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9141     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9142     // In case of to, clear OMP_MAP_FROM.
9143     MapperCGF.EmitBlock(ToBB);
9144     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9145         MemberMapType,
9146         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9147     MapperCGF.Builder.CreateBr(EndBB);
9148     MapperCGF.EmitBlock(ToElseBB);
9149     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9150         LeftToFrom,
9151         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9152     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9153     // In case of from, clear OMP_MAP_TO.
9154     MapperCGF.EmitBlock(FromBB);
9155     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9156         MemberMapType,
9157         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9158     // In case of tofrom, do nothing.
9159     MapperCGF.EmitBlock(EndBB);
9160     llvm::PHINode *CurMapType =
9161         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9162     CurMapType->addIncoming(AllocMapType, AllocBB);
9163     CurMapType->addIncoming(ToMapType, ToBB);
9164     CurMapType->addIncoming(FromMapType, FromBB);
9165     CurMapType->addIncoming(MemberMapType, ToElseBB);
9166 
9167     // TODO: call the corresponding mapper function if a user-defined mapper is
9168     // associated with this map clause.
9169     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9170     // data structure.
9171     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9172                                      CurSizeArg, CurMapType};
9173     MapperCGF.EmitRuntimeCall(
9174         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9175             CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9176         OffloadingArgs);
9177   }
9178 
9179   // Update the pointer to point to the next element that needs to be mapped,
9180   // and check whether we have mapped all elements.
9181   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9182       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9183   PtrPHI->addIncoming(PtrNext, BodyBB);
9184   llvm::Value *IsDone =
9185       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9186   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9187   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9188 
9189   MapperCGF.EmitBlock(ExitBB);
9190   // Emit array deletion if this is an array section and \p MapType indicates
9191   // that deletion is required.
9192   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9193                              ElementSize, DoneBB, /*IsInit=*/false);
9194 
9195   // Emit the function exit block.
9196   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9197   MapperCGF.FinishFunction();
9198   UDMMap.try_emplace(D, Fn);
9199   if (CGF) {
9200     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9201     Decls.second.push_back(D);
9202   }
9203 }
9204 
9205 /// Emit the array initialization or deletion portion for user-defined mapper
9206 /// code generation. First, it evaluates whether an array section is mapped and
9207 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9208 /// true, and \a MapType indicates to not delete this array, array
9209 /// initialization code is generated. If \a IsInit is false, and \a MapType
9210 /// indicates to not this array, array deletion code is generated.
9211 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9212     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9213     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9214     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9215   StringRef Prefix = IsInit ? ".init" : ".del";
9216 
9217   // Evaluate if this is an array section.
9218   llvm::BasicBlock *IsDeleteBB =
9219       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9220   llvm::BasicBlock *BodyBB =
9221       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9222   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9223       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9224   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9225 
9226   // Evaluate if we are going to delete this section.
9227   MapperCGF.EmitBlock(IsDeleteBB);
9228   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9229       MapType,
9230       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9231   llvm::Value *DeleteCond;
9232   if (IsInit) {
9233     DeleteCond = MapperCGF.Builder.CreateIsNull(
9234         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9235   } else {
9236     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9237         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9238   }
9239   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9240 
9241   MapperCGF.EmitBlock(BodyBB);
9242   // Get the array size by multiplying element size and element number (i.e., \p
9243   // Size).
9244   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9245       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9246   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9247   // memory allocation/deletion purpose only.
9248   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9249       MapType,
9250       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9251                                    MappableExprsHandler::OMP_MAP_FROM)));
9252   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9253   // data structure.
9254   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9255   MapperCGF.EmitRuntimeCall(
9256       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9257           CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9258       OffloadingArgs);
9259 }
9260 
9261 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9262     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9263     llvm::Value *DeviceID,
9264     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9265                                      const OMPLoopDirective &D)>
9266         SizeEmitter) {
9267   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9268   const OMPExecutableDirective *TD = &D;
9269   // Get nested teams distribute kind directive, if any.
9270   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9271     TD = getNestedDistributeDirective(CGM.getContext(), D);
9272   if (!TD)
9273     return;
9274   const auto *LD = cast<OMPLoopDirective>(TD);
9275   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9276                                                      PrePostActionTy &) {
9277     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9278       llvm::Value *Args[] = {DeviceID, NumIterations};
9279       CGF.EmitRuntimeCall(
9280           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9281               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9282           Args);
9283     }
9284   };
9285   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9286 }
9287 
9288 void CGOpenMPRuntime::emitTargetCall(
9289     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9290     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9291     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9292     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9293                                      const OMPLoopDirective &D)>
9294         SizeEmitter) {
9295   if (!CGF.HaveInsertPoint())
9296     return;
9297 
9298   assert(OutlinedFn && "Invalid outlined function!");
9299 
9300   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9301   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9302   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9303   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9304                                             PrePostActionTy &) {
9305     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9306   };
9307   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9308 
9309   CodeGenFunction::OMPTargetDataInfo InputInfo;
9310   llvm::Value *MapTypesArray = nullptr;
9311   // Fill up the pointer arrays and transfer execution to the device.
9312   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9313                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9314                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9315     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9316       // Reverse offloading is not supported, so just execute on the host.
9317       if (RequiresOuterTask) {
9318         CapturedVars.clear();
9319         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9320       }
9321       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9322       return;
9323     }
9324 
9325     // On top of the arrays that were filled up, the target offloading call
9326     // takes as arguments the device id as well as the host pointer. The host
9327     // pointer is used by the runtime library to identify the current target
9328     // region, so it only has to be unique and not necessarily point to
9329     // anything. It could be the pointer to the outlined function that
9330     // implements the target region, but we aren't using that so that the
9331     // compiler doesn't need to keep that, and could therefore inline the host
9332     // function if proven worthwhile during optimization.
9333 
9334     // From this point on, we need to have an ID of the target region defined.
9335     assert(OutlinedFnID && "Invalid outlined function ID!");
9336 
9337     // Emit device ID if any.
9338     llvm::Value *DeviceID;
9339     if (Device.getPointer()) {
9340       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9341               Device.getInt() == OMPC_DEVICE_device_num) &&
9342              "Expected device_num modifier.");
9343       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9344       DeviceID =
9345           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9346     } else {
9347       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9348     }
9349 
9350     // Emit the number of elements in the offloading arrays.
9351     llvm::Value *PointerNum =
9352         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9353 
9354     // Return value of the runtime offloading call.
9355     llvm::Value *Return;
9356 
9357     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9358     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9359 
9360     // Emit tripcount for the target loop-based directive.
9361     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9362 
9363     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9364     // The target region is an outlined function launched by the runtime
9365     // via calls __tgt_target() or __tgt_target_teams().
9366     //
9367     // __tgt_target() launches a target region with one team and one thread,
9368     // executing a serial region.  This master thread may in turn launch
9369     // more threads within its team upon encountering a parallel region,
9370     // however, no additional teams can be launched on the device.
9371     //
9372     // __tgt_target_teams() launches a target region with one or more teams,
9373     // each with one or more threads.  This call is required for target
9374     // constructs such as:
9375     //  'target teams'
9376     //  'target' / 'teams'
9377     //  'target teams distribute parallel for'
9378     //  'target parallel'
9379     // and so on.
9380     //
9381     // Note that on the host and CPU targets, the runtime implementation of
9382     // these calls simply call the outlined function without forking threads.
9383     // The outlined functions themselves have runtime calls to
9384     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9385     // the compiler in emitTeamsCall() and emitParallelCall().
9386     //
9387     // In contrast, on the NVPTX target, the implementation of
9388     // __tgt_target_teams() launches a GPU kernel with the requested number
9389     // of teams and threads so no additional calls to the runtime are required.
9390     if (NumTeams) {
9391       // If we have NumTeams defined this means that we have an enclosed teams
9392       // region. Therefore we also expect to have NumThreads defined. These two
9393       // values should be defined in the presence of a teams directive,
9394       // regardless of having any clauses associated. If the user is using teams
9395       // but no clauses, these two values will be the default that should be
9396       // passed to the runtime library - a 32-bit integer with the value zero.
9397       assert(NumThreads && "Thread limit expression should be available along "
9398                            "with number of teams.");
9399       llvm::Value *OffloadingArgs[] = {DeviceID,
9400                                        OutlinedFnID,
9401                                        PointerNum,
9402                                        InputInfo.BasePointersArray.getPointer(),
9403                                        InputInfo.PointersArray.getPointer(),
9404                                        InputInfo.SizesArray.getPointer(),
9405                                        MapTypesArray,
9406                                        NumTeams,
9407                                        NumThreads};
9408       Return = CGF.EmitRuntimeCall(
9409           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9410               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9411                                          : OMPRTL___tgt_target_teams),
9412           OffloadingArgs);
9413     } else {
9414       llvm::Value *OffloadingArgs[] = {DeviceID,
9415                                        OutlinedFnID,
9416                                        PointerNum,
9417                                        InputInfo.BasePointersArray.getPointer(),
9418                                        InputInfo.PointersArray.getPointer(),
9419                                        InputInfo.SizesArray.getPointer(),
9420                                        MapTypesArray};
9421       Return = CGF.EmitRuntimeCall(
9422           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9423               CGM.getModule(),
9424               HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9425           OffloadingArgs);
9426     }
9427 
9428     // Check the error code and execute the host version if required.
9429     llvm::BasicBlock *OffloadFailedBlock =
9430         CGF.createBasicBlock("omp_offload.failed");
9431     llvm::BasicBlock *OffloadContBlock =
9432         CGF.createBasicBlock("omp_offload.cont");
9433     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9434     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9435 
9436     CGF.EmitBlock(OffloadFailedBlock);
9437     if (RequiresOuterTask) {
9438       CapturedVars.clear();
9439       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9440     }
9441     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9442     CGF.EmitBranch(OffloadContBlock);
9443 
9444     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9445   };
9446 
9447   // Notify that the host version must be executed.
9448   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9449                     RequiresOuterTask](CodeGenFunction &CGF,
9450                                        PrePostActionTy &) {
9451     if (RequiresOuterTask) {
9452       CapturedVars.clear();
9453       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9454     }
9455     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9456   };
9457 
9458   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9459                           &CapturedVars, RequiresOuterTask,
9460                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9461     // Fill up the arrays with all the captured variables.
9462     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9463     MappableExprsHandler::MapValuesArrayTy Pointers;
9464     MappableExprsHandler::MapValuesArrayTy Sizes;
9465     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9466 
9467     // Get mappable expression information.
9468     MappableExprsHandler MEHandler(D, CGF);
9469     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9470 
9471     auto RI = CS.getCapturedRecordDecl()->field_begin();
9472     auto CV = CapturedVars.begin();
9473     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9474                                               CE = CS.capture_end();
9475          CI != CE; ++CI, ++RI, ++CV) {
9476       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9477       MappableExprsHandler::MapValuesArrayTy CurPointers;
9478       MappableExprsHandler::MapValuesArrayTy CurSizes;
9479       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9480       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9481 
9482       // VLA sizes are passed to the outlined region by copy and do not have map
9483       // information associated.
9484       if (CI->capturesVariableArrayType()) {
9485         CurBasePointers.push_back(*CV);
9486         CurPointers.push_back(*CV);
9487         CurSizes.push_back(CGF.Builder.CreateIntCast(
9488             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9489         // Copy to the device as an argument. No need to retrieve it.
9490         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9491                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9492                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9493       } else {
9494         // If we have any information in the map clause, we use it, otherwise we
9495         // just do a default mapping.
9496         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9497                                          CurSizes, CurMapTypes, PartialStruct);
9498         if (CurBasePointers.empty())
9499           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9500                                            CurPointers, CurSizes, CurMapTypes);
9501         // Generate correct mapping for variables captured by reference in
9502         // lambdas.
9503         if (CI->capturesVariable())
9504           MEHandler.generateInfoForLambdaCaptures(
9505               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9506               CurMapTypes, LambdaPointers);
9507       }
9508       // We expect to have at least an element of information for this capture.
9509       assert(!CurBasePointers.empty() &&
9510              "Non-existing map pointer for capture!");
9511       assert(CurBasePointers.size() == CurPointers.size() &&
9512              CurBasePointers.size() == CurSizes.size() &&
9513              CurBasePointers.size() == CurMapTypes.size() &&
9514              "Inconsistent map information sizes!");
9515 
9516       // If there is an entry in PartialStruct it means we have a struct with
9517       // individual members mapped. Emit an extra combined entry.
9518       if (PartialStruct.Base.isValid())
9519         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9520                                     CurMapTypes, PartialStruct);
9521 
9522       // We need to append the results of this capture to what we already have.
9523       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9524       Pointers.append(CurPointers.begin(), CurPointers.end());
9525       Sizes.append(CurSizes.begin(), CurSizes.end());
9526       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9527     }
9528     // Adjust MEMBER_OF flags for the lambdas captures.
9529     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9530                                               Pointers, MapTypes);
9531     // Map other list items in the map clause which are not captured variables
9532     // but "declare target link" global variables.
9533     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9534                                                MapTypes);
9535 
9536     TargetDataInfo Info;
9537     // Fill up the arrays and create the arguments.
9538     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9539     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9540                                  Info.PointersArray, Info.SizesArray,
9541                                  Info.MapTypesArray, Info);
9542     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9543     InputInfo.BasePointersArray =
9544         Address(Info.BasePointersArray, CGM.getPointerAlign());
9545     InputInfo.PointersArray =
9546         Address(Info.PointersArray, CGM.getPointerAlign());
9547     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9548     MapTypesArray = Info.MapTypesArray;
9549     if (RequiresOuterTask)
9550       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9551     else
9552       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9553   };
9554 
9555   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9556                              CodeGenFunction &CGF, PrePostActionTy &) {
9557     if (RequiresOuterTask) {
9558       CodeGenFunction::OMPTargetDataInfo InputInfo;
9559       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9560     } else {
9561       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9562     }
9563   };
9564 
9565   // If we have a target function ID it means that we need to support
9566   // offloading, otherwise, just execute on the host. We need to execute on host
9567   // regardless of the conditional in the if clause if, e.g., the user do not
9568   // specify target triples.
9569   if (OutlinedFnID) {
9570     if (IfCond) {
9571       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9572     } else {
9573       RegionCodeGenTy ThenRCG(TargetThenGen);
9574       ThenRCG(CGF);
9575     }
9576   } else {
9577     RegionCodeGenTy ElseRCG(TargetElseGen);
9578     ElseRCG(CGF);
9579   }
9580 }
9581 
9582 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9583                                                     StringRef ParentName) {
9584   if (!S)
9585     return;
9586 
9587   // Codegen OMP target directives that offload compute to the device.
9588   bool RequiresDeviceCodegen =
9589       isa<OMPExecutableDirective>(S) &&
9590       isOpenMPTargetExecutionDirective(
9591           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9592 
9593   if (RequiresDeviceCodegen) {
9594     const auto &E = *cast<OMPExecutableDirective>(S);
9595     unsigned DeviceID;
9596     unsigned FileID;
9597     unsigned Line;
9598     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9599                              FileID, Line);
9600 
9601     // Is this a target region that should not be emitted as an entry point? If
9602     // so just signal we are done with this target region.
9603     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9604                                                             ParentName, Line))
9605       return;
9606 
9607     switch (E.getDirectiveKind()) {
9608     case OMPD_target:
9609       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9610                                                    cast<OMPTargetDirective>(E));
9611       break;
9612     case OMPD_target_parallel:
9613       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9614           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9615       break;
9616     case OMPD_target_teams:
9617       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9618           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9619       break;
9620     case OMPD_target_teams_distribute:
9621       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9622           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9623       break;
9624     case OMPD_target_teams_distribute_simd:
9625       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9626           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9627       break;
9628     case OMPD_target_parallel_for:
9629       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9630           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9631       break;
9632     case OMPD_target_parallel_for_simd:
9633       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9634           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9635       break;
9636     case OMPD_target_simd:
9637       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9638           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9639       break;
9640     case OMPD_target_teams_distribute_parallel_for:
9641       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9642           CGM, ParentName,
9643           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9644       break;
9645     case OMPD_target_teams_distribute_parallel_for_simd:
9646       CodeGenFunction::
9647           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9648               CGM, ParentName,
9649               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9650       break;
9651     case OMPD_parallel:
9652     case OMPD_for:
9653     case OMPD_parallel_for:
9654     case OMPD_parallel_master:
9655     case OMPD_parallel_sections:
9656     case OMPD_for_simd:
9657     case OMPD_parallel_for_simd:
9658     case OMPD_cancel:
9659     case OMPD_cancellation_point:
9660     case OMPD_ordered:
9661     case OMPD_threadprivate:
9662     case OMPD_allocate:
9663     case OMPD_task:
9664     case OMPD_simd:
9665     case OMPD_sections:
9666     case OMPD_section:
9667     case OMPD_single:
9668     case OMPD_master:
9669     case OMPD_critical:
9670     case OMPD_taskyield:
9671     case OMPD_barrier:
9672     case OMPD_taskwait:
9673     case OMPD_taskgroup:
9674     case OMPD_atomic:
9675     case OMPD_flush:
9676     case OMPD_depobj:
9677     case OMPD_scan:
9678     case OMPD_teams:
9679     case OMPD_target_data:
9680     case OMPD_target_exit_data:
9681     case OMPD_target_enter_data:
9682     case OMPD_distribute:
9683     case OMPD_distribute_simd:
9684     case OMPD_distribute_parallel_for:
9685     case OMPD_distribute_parallel_for_simd:
9686     case OMPD_teams_distribute:
9687     case OMPD_teams_distribute_simd:
9688     case OMPD_teams_distribute_parallel_for:
9689     case OMPD_teams_distribute_parallel_for_simd:
9690     case OMPD_target_update:
9691     case OMPD_declare_simd:
9692     case OMPD_declare_variant:
9693     case OMPD_begin_declare_variant:
9694     case OMPD_end_declare_variant:
9695     case OMPD_declare_target:
9696     case OMPD_end_declare_target:
9697     case OMPD_declare_reduction:
9698     case OMPD_declare_mapper:
9699     case OMPD_taskloop:
9700     case OMPD_taskloop_simd:
9701     case OMPD_master_taskloop:
9702     case OMPD_master_taskloop_simd:
9703     case OMPD_parallel_master_taskloop:
9704     case OMPD_parallel_master_taskloop_simd:
9705     case OMPD_requires:
9706     case OMPD_unknown:
9707       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9708     }
9709     return;
9710   }
9711 
9712   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9713     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9714       return;
9715 
9716     scanForTargetRegionsFunctions(
9717         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9718     return;
9719   }
9720 
9721   // If this is a lambda function, look into its body.
9722   if (const auto *L = dyn_cast<LambdaExpr>(S))
9723     S = L->getBody();
9724 
9725   // Keep looking for target regions recursively.
9726   for (const Stmt *II : S->children())
9727     scanForTargetRegionsFunctions(II, ParentName);
9728 }
9729 
9730 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9731   // If emitting code for the host, we do not process FD here. Instead we do
9732   // the normal code generation.
9733   if (!CGM.getLangOpts().OpenMPIsDevice) {
9734     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9735       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9736           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9737       // Do not emit device_type(nohost) functions for the host.
9738       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9739         return true;
9740     }
9741     return false;
9742   }
9743 
9744   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9745   // Try to detect target regions in the function.
9746   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9747     StringRef Name = CGM.getMangledName(GD);
9748     scanForTargetRegionsFunctions(FD->getBody(), Name);
9749     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9750         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9751     // Do not emit device_type(nohost) functions for the host.
9752     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9753       return true;
9754   }
9755 
9756   // Do not to emit function if it is not marked as declare target.
9757   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9758          AlreadyEmittedTargetDecls.count(VD) == 0;
9759 }
9760 
9761 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9762   if (!CGM.getLangOpts().OpenMPIsDevice)
9763     return false;
9764 
9765   // Check if there are Ctors/Dtors in this declaration and look for target
9766   // regions in it. We use the complete variant to produce the kernel name
9767   // mangling.
9768   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9769   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9770     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9771       StringRef ParentName =
9772           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9773       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9774     }
9775     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9776       StringRef ParentName =
9777           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9778       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9779     }
9780   }
9781 
9782   // Do not to emit variable if it is not marked as declare target.
9783   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9784       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9785           cast<VarDecl>(GD.getDecl()));
9786   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9787       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9788        HasRequiresUnifiedSharedMemory)) {
9789     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9790     return true;
9791   }
9792   return false;
9793 }
9794 
9795 llvm::Constant *
9796 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9797                                                 const VarDecl *VD) {
9798   assert(VD->getType().isConstant(CGM.getContext()) &&
9799          "Expected constant variable.");
9800   StringRef VarName;
9801   llvm::Constant *Addr;
9802   llvm::GlobalValue::LinkageTypes Linkage;
9803   QualType Ty = VD->getType();
9804   SmallString<128> Buffer;
9805   {
9806     unsigned DeviceID;
9807     unsigned FileID;
9808     unsigned Line;
9809     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9810                              FileID, Line);
9811     llvm::raw_svector_ostream OS(Buffer);
9812     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9813        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9814     VarName = OS.str();
9815   }
9816   Linkage = llvm::GlobalValue::InternalLinkage;
9817   Addr =
9818       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9819                                   getDefaultFirstprivateAddressSpace());
9820   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9821   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9822   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9823   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9824       VarName, Addr, VarSize,
9825       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9826   return Addr;
9827 }
9828 
9829 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9830                                                    llvm::Constant *Addr) {
9831   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9832       !CGM.getLangOpts().OpenMPIsDevice)
9833     return;
9834   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9835       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9836   if (!Res) {
9837     if (CGM.getLangOpts().OpenMPIsDevice) {
9838       // Register non-target variables being emitted in device code (debug info
9839       // may cause this).
9840       StringRef VarName = CGM.getMangledName(VD);
9841       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9842     }
9843     return;
9844   }
9845   // Register declare target variables.
9846   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9847   StringRef VarName;
9848   CharUnits VarSize;
9849   llvm::GlobalValue::LinkageTypes Linkage;
9850 
9851   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9852       !HasRequiresUnifiedSharedMemory) {
9853     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9854     VarName = CGM.getMangledName(VD);
9855     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9856       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9857       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9858     } else {
9859       VarSize = CharUnits::Zero();
9860     }
9861     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9862     // Temp solution to prevent optimizations of the internal variables.
9863     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9864       std::string RefName = getName({VarName, "ref"});
9865       if (!CGM.GetGlobalValue(RefName)) {
9866         llvm::Constant *AddrRef =
9867             getOrCreateInternalVariable(Addr->getType(), RefName);
9868         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9869         GVAddrRef->setConstant(/*Val=*/true);
9870         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9871         GVAddrRef->setInitializer(Addr);
9872         CGM.addCompilerUsedGlobal(GVAddrRef);
9873       }
9874     }
9875   } else {
9876     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9877             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9878              HasRequiresUnifiedSharedMemory)) &&
9879            "Declare target attribute must link or to with unified memory.");
9880     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9881       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9882     else
9883       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9884 
9885     if (CGM.getLangOpts().OpenMPIsDevice) {
9886       VarName = Addr->getName();
9887       Addr = nullptr;
9888     } else {
9889       VarName = getAddrOfDeclareTargetVar(VD).getName();
9890       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9891     }
9892     VarSize = CGM.getPointerSize();
9893     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9894   }
9895 
9896   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9897       VarName, Addr, VarSize, Flags, Linkage);
9898 }
9899 
9900 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9901   if (isa<FunctionDecl>(GD.getDecl()) ||
9902       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9903     return emitTargetFunctions(GD);
9904 
9905   return emitTargetGlobalVariable(GD);
9906 }
9907 
9908 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9909   for (const VarDecl *VD : DeferredGlobalVariables) {
9910     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9911         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9912     if (!Res)
9913       continue;
9914     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9915         !HasRequiresUnifiedSharedMemory) {
9916       CGM.EmitGlobal(VD);
9917     } else {
9918       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9919               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9920                HasRequiresUnifiedSharedMemory)) &&
9921              "Expected link clause or to clause with unified memory.");
9922       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9923     }
9924   }
9925 }
9926 
9927 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9928     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9929   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9930          " Expected target-based directive.");
9931 }
9932 
9933 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9934   for (const OMPClause *Clause : D->clauselists()) {
9935     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9936       HasRequiresUnifiedSharedMemory = true;
9937     } else if (const auto *AC =
9938                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9939       switch (AC->getAtomicDefaultMemOrderKind()) {
9940       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9941         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9942         break;
9943       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9944         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9945         break;
9946       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9947         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9948         break;
9949       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9950         break;
9951       }
9952     }
9953   }
9954 }
9955 
9956 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9957   return RequiresAtomicOrdering;
9958 }
9959 
9960 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9961                                                        LangAS &AS) {
9962   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9963     return false;
9964   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9965   switch(A->getAllocatorType()) {
9966   case OMPAllocateDeclAttr::OMPNullMemAlloc:
9967   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9968   // Not supported, fallback to the default mem space.
9969   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9970   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9971   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9972   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9973   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9974   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9975   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9976     AS = LangAS::Default;
9977     return true;
9978   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9979     llvm_unreachable("Expected predefined allocator for the variables with the "
9980                      "static storage.");
9981   }
9982   return false;
9983 }
9984 
9985 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9986   return HasRequiresUnifiedSharedMemory;
9987 }
9988 
9989 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9990     CodeGenModule &CGM)
9991     : CGM(CGM) {
9992   if (CGM.getLangOpts().OpenMPIsDevice) {
9993     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9994     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9995   }
9996 }
9997 
9998 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9999   if (CGM.getLangOpts().OpenMPIsDevice)
10000     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10001 }
10002 
10003 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10004   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10005     return true;
10006 
10007   const auto *D = cast<FunctionDecl>(GD.getDecl());
10008   // Do not to emit function if it is marked as declare target as it was already
10009   // emitted.
10010   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10011     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10012       if (auto *F = dyn_cast_or_null<llvm::Function>(
10013               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10014         return !F->isDeclaration();
10015       return false;
10016     }
10017     return true;
10018   }
10019 
10020   return !AlreadyEmittedTargetDecls.insert(D).second;
10021 }
10022 
10023 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10024   // If we don't have entries or if we are emitting code for the device, we
10025   // don't need to do anything.
10026   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10027       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10028       (OffloadEntriesInfoManager.empty() &&
10029        !HasEmittedDeclareTargetRegion &&
10030        !HasEmittedTargetRegion))
10031     return nullptr;
10032 
10033   // Create and register the function that handles the requires directives.
10034   ASTContext &C = CGM.getContext();
10035 
10036   llvm::Function *RequiresRegFn;
10037   {
10038     CodeGenFunction CGF(CGM);
10039     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10040     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10041     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10042     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10043     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10044     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10045     // TODO: check for other requires clauses.
10046     // The requires directive takes effect only when a target region is
10047     // present in the compilation unit. Otherwise it is ignored and not
10048     // passed to the runtime. This avoids the runtime from throwing an error
10049     // for mismatching requires clauses across compilation units that don't
10050     // contain at least 1 target region.
10051     assert((HasEmittedTargetRegion ||
10052             HasEmittedDeclareTargetRegion ||
10053             !OffloadEntriesInfoManager.empty()) &&
10054            "Target or declare target region expected.");
10055     if (HasRequiresUnifiedSharedMemory)
10056       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10057     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10058                             CGM.getModule(), OMPRTL___tgt_register_requires),
10059                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10060     CGF.FinishFunction();
10061   }
10062   return RequiresRegFn;
10063 }
10064 
10065 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10066                                     const OMPExecutableDirective &D,
10067                                     SourceLocation Loc,
10068                                     llvm::Function *OutlinedFn,
10069                                     ArrayRef<llvm::Value *> CapturedVars) {
10070   if (!CGF.HaveInsertPoint())
10071     return;
10072 
10073   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10074   CodeGenFunction::RunCleanupsScope Scope(CGF);
10075 
10076   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10077   llvm::Value *Args[] = {
10078       RTLoc,
10079       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10080       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10081   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10082   RealArgs.append(std::begin(Args), std::end(Args));
10083   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10084 
10085   llvm::FunctionCallee RTLFn =
10086       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10087           CGM.getModule(), OMPRTL___kmpc_fork_teams);
10088   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10089 }
10090 
10091 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10092                                          const Expr *NumTeams,
10093                                          const Expr *ThreadLimit,
10094                                          SourceLocation Loc) {
10095   if (!CGF.HaveInsertPoint())
10096     return;
10097 
10098   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10099 
10100   llvm::Value *NumTeamsVal =
10101       NumTeams
10102           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10103                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10104           : CGF.Builder.getInt32(0);
10105 
10106   llvm::Value *ThreadLimitVal =
10107       ThreadLimit
10108           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10109                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10110           : CGF.Builder.getInt32(0);
10111 
10112   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10113   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10114                                      ThreadLimitVal};
10115   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10116                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10117                       PushNumTeamsArgs);
10118 }
10119 
10120 void CGOpenMPRuntime::emitTargetDataCalls(
10121     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10122     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10123   if (!CGF.HaveInsertPoint())
10124     return;
10125 
10126   // Action used to replace the default codegen action and turn privatization
10127   // off.
10128   PrePostActionTy NoPrivAction;
10129 
10130   // Generate the code for the opening of the data environment. Capture all the
10131   // arguments of the runtime call by reference because they are used in the
10132   // closing of the region.
10133   auto &&BeginThenGen = [this, &D, Device, &Info,
10134                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10135     // Fill up the arrays with all the mapped variables.
10136     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10137     MappableExprsHandler::MapValuesArrayTy Pointers;
10138     MappableExprsHandler::MapValuesArrayTy Sizes;
10139     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10140 
10141     // Get map clause information.
10142     MappableExprsHandler MCHandler(D, CGF);
10143     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10144 
10145     // Fill up the arrays and create the arguments.
10146     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10147 
10148     llvm::Value *BasePointersArrayArg = nullptr;
10149     llvm::Value *PointersArrayArg = nullptr;
10150     llvm::Value *SizesArrayArg = nullptr;
10151     llvm::Value *MapTypesArrayArg = nullptr;
10152     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10153                                  SizesArrayArg, MapTypesArrayArg, Info);
10154 
10155     // Emit device ID if any.
10156     llvm::Value *DeviceID = nullptr;
10157     if (Device) {
10158       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10159                                            CGF.Int64Ty, /*isSigned=*/true);
10160     } else {
10161       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10162     }
10163 
10164     // Emit the number of elements in the offloading arrays.
10165     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10166 
10167     llvm::Value *OffloadingArgs[] = {
10168         DeviceID,         PointerNum,    BasePointersArrayArg,
10169         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10170     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10171                             CGM.getModule(), OMPRTL___tgt_target_data_begin),
10172                         OffloadingArgs);
10173 
10174     // If device pointer privatization is required, emit the body of the region
10175     // here. It will have to be duplicated: with and without privatization.
10176     if (!Info.CaptureDeviceAddrMap.empty())
10177       CodeGen(CGF);
10178   };
10179 
10180   // Generate code for the closing of the data region.
10181   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10182                                             PrePostActionTy &) {
10183     assert(Info.isValid() && "Invalid data environment closing arguments.");
10184 
10185     llvm::Value *BasePointersArrayArg = nullptr;
10186     llvm::Value *PointersArrayArg = nullptr;
10187     llvm::Value *SizesArrayArg = nullptr;
10188     llvm::Value *MapTypesArrayArg = nullptr;
10189     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10190                                  SizesArrayArg, MapTypesArrayArg, Info);
10191 
10192     // Emit device ID if any.
10193     llvm::Value *DeviceID = nullptr;
10194     if (Device) {
10195       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10196                                            CGF.Int64Ty, /*isSigned=*/true);
10197     } else {
10198       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10199     }
10200 
10201     // Emit the number of elements in the offloading arrays.
10202     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10203 
10204     llvm::Value *OffloadingArgs[] = {
10205         DeviceID,         PointerNum,    BasePointersArrayArg,
10206         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10207     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10208                             CGM.getModule(), OMPRTL___tgt_target_data_end),
10209                         OffloadingArgs);
10210   };
10211 
10212   // If we need device pointer privatization, we need to emit the body of the
10213   // region with no privatization in the 'else' branch of the conditional.
10214   // Otherwise, we don't have to do anything.
10215   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10216                                                          PrePostActionTy &) {
10217     if (!Info.CaptureDeviceAddrMap.empty()) {
10218       CodeGen.setAction(NoPrivAction);
10219       CodeGen(CGF);
10220     }
10221   };
10222 
10223   // We don't have to do anything to close the region if the if clause evaluates
10224   // to false.
10225   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10226 
10227   if (IfCond) {
10228     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10229   } else {
10230     RegionCodeGenTy RCG(BeginThenGen);
10231     RCG(CGF);
10232   }
10233 
10234   // If we don't require privatization of device pointers, we emit the body in
10235   // between the runtime calls. This avoids duplicating the body code.
10236   if (Info.CaptureDeviceAddrMap.empty()) {
10237     CodeGen.setAction(NoPrivAction);
10238     CodeGen(CGF);
10239   }
10240 
10241   if (IfCond) {
10242     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10243   } else {
10244     RegionCodeGenTy RCG(EndThenGen);
10245     RCG(CGF);
10246   }
10247 }
10248 
10249 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10250     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10251     const Expr *Device) {
10252   if (!CGF.HaveInsertPoint())
10253     return;
10254 
10255   assert((isa<OMPTargetEnterDataDirective>(D) ||
10256           isa<OMPTargetExitDataDirective>(D) ||
10257           isa<OMPTargetUpdateDirective>(D)) &&
10258          "Expecting either target enter, exit data, or update directives.");
10259 
10260   CodeGenFunction::OMPTargetDataInfo InputInfo;
10261   llvm::Value *MapTypesArray = nullptr;
10262   // Generate the code for the opening of the data environment.
10263   auto &&ThenGen = [this, &D, Device, &InputInfo,
10264                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10265     // Emit device ID if any.
10266     llvm::Value *DeviceID = nullptr;
10267     if (Device) {
10268       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10269                                            CGF.Int64Ty, /*isSigned=*/true);
10270     } else {
10271       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10272     }
10273 
10274     // Emit the number of elements in the offloading arrays.
10275     llvm::Constant *PointerNum =
10276         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10277 
10278     llvm::Value *OffloadingArgs[] = {DeviceID,
10279                                      PointerNum,
10280                                      InputInfo.BasePointersArray.getPointer(),
10281                                      InputInfo.PointersArray.getPointer(),
10282                                      InputInfo.SizesArray.getPointer(),
10283                                      MapTypesArray};
10284 
10285     // Select the right runtime function call for each expected standalone
10286     // directive.
10287     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10288     RuntimeFunction RTLFn;
10289     switch (D.getDirectiveKind()) {
10290     case OMPD_target_enter_data:
10291       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10292                         : OMPRTL___tgt_target_data_begin;
10293       break;
10294     case OMPD_target_exit_data:
10295       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10296                         : OMPRTL___tgt_target_data_end;
10297       break;
10298     case OMPD_target_update:
10299       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10300                         : OMPRTL___tgt_target_data_update;
10301       break;
10302     case OMPD_parallel:
10303     case OMPD_for:
10304     case OMPD_parallel_for:
10305     case OMPD_parallel_master:
10306     case OMPD_parallel_sections:
10307     case OMPD_for_simd:
10308     case OMPD_parallel_for_simd:
10309     case OMPD_cancel:
10310     case OMPD_cancellation_point:
10311     case OMPD_ordered:
10312     case OMPD_threadprivate:
10313     case OMPD_allocate:
10314     case OMPD_task:
10315     case OMPD_simd:
10316     case OMPD_sections:
10317     case OMPD_section:
10318     case OMPD_single:
10319     case OMPD_master:
10320     case OMPD_critical:
10321     case OMPD_taskyield:
10322     case OMPD_barrier:
10323     case OMPD_taskwait:
10324     case OMPD_taskgroup:
10325     case OMPD_atomic:
10326     case OMPD_flush:
10327     case OMPD_depobj:
10328     case OMPD_scan:
10329     case OMPD_teams:
10330     case OMPD_target_data:
10331     case OMPD_distribute:
10332     case OMPD_distribute_simd:
10333     case OMPD_distribute_parallel_for:
10334     case OMPD_distribute_parallel_for_simd:
10335     case OMPD_teams_distribute:
10336     case OMPD_teams_distribute_simd:
10337     case OMPD_teams_distribute_parallel_for:
10338     case OMPD_teams_distribute_parallel_for_simd:
10339     case OMPD_declare_simd:
10340     case OMPD_declare_variant:
10341     case OMPD_begin_declare_variant:
10342     case OMPD_end_declare_variant:
10343     case OMPD_declare_target:
10344     case OMPD_end_declare_target:
10345     case OMPD_declare_reduction:
10346     case OMPD_declare_mapper:
10347     case OMPD_taskloop:
10348     case OMPD_taskloop_simd:
10349     case OMPD_master_taskloop:
10350     case OMPD_master_taskloop_simd:
10351     case OMPD_parallel_master_taskloop:
10352     case OMPD_parallel_master_taskloop_simd:
10353     case OMPD_target:
10354     case OMPD_target_simd:
10355     case OMPD_target_teams_distribute:
10356     case OMPD_target_teams_distribute_simd:
10357     case OMPD_target_teams_distribute_parallel_for:
10358     case OMPD_target_teams_distribute_parallel_for_simd:
10359     case OMPD_target_teams:
10360     case OMPD_target_parallel:
10361     case OMPD_target_parallel_for:
10362     case OMPD_target_parallel_for_simd:
10363     case OMPD_requires:
10364     case OMPD_unknown:
10365       llvm_unreachable("Unexpected standalone target data directive.");
10366       break;
10367     }
10368     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10369                             CGM.getModule(), RTLFn),
10370                         OffloadingArgs);
10371   };
10372 
10373   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10374                              CodeGenFunction &CGF, PrePostActionTy &) {
10375     // Fill up the arrays with all the mapped variables.
10376     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10377     MappableExprsHandler::MapValuesArrayTy Pointers;
10378     MappableExprsHandler::MapValuesArrayTy Sizes;
10379     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10380 
10381     // Get map clause information.
10382     MappableExprsHandler MEHandler(D, CGF);
10383     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10384 
10385     TargetDataInfo Info;
10386     // Fill up the arrays and create the arguments.
10387     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10388     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10389                                  Info.PointersArray, Info.SizesArray,
10390                                  Info.MapTypesArray, Info);
10391     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10392     InputInfo.BasePointersArray =
10393         Address(Info.BasePointersArray, CGM.getPointerAlign());
10394     InputInfo.PointersArray =
10395         Address(Info.PointersArray, CGM.getPointerAlign());
10396     InputInfo.SizesArray =
10397         Address(Info.SizesArray, CGM.getPointerAlign());
10398     MapTypesArray = Info.MapTypesArray;
10399     if (D.hasClausesOfKind<OMPDependClause>())
10400       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10401     else
10402       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10403   };
10404 
10405   if (IfCond) {
10406     emitIfClause(CGF, IfCond, TargetThenGen,
10407                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10408   } else {
10409     RegionCodeGenTy ThenRCG(TargetThenGen);
10410     ThenRCG(CGF);
10411   }
10412 }
10413 
10414 namespace {
10415   /// Kind of parameter in a function with 'declare simd' directive.
10416   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10417   /// Attribute set of the parameter.
10418   struct ParamAttrTy {
10419     ParamKindTy Kind = Vector;
10420     llvm::APSInt StrideOrArg;
10421     llvm::APSInt Alignment;
10422   };
10423 } // namespace
10424 
10425 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10426                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10427   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10428   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10429   // of that clause. The VLEN value must be power of 2.
10430   // In other case the notion of the function`s "characteristic data type" (CDT)
10431   // is used to compute the vector length.
10432   // CDT is defined in the following order:
10433   //   a) For non-void function, the CDT is the return type.
10434   //   b) If the function has any non-uniform, non-linear parameters, then the
10435   //   CDT is the type of the first such parameter.
10436   //   c) If the CDT determined by a) or b) above is struct, union, or class
10437   //   type which is pass-by-value (except for the type that maps to the
10438   //   built-in complex data type), the characteristic data type is int.
10439   //   d) If none of the above three cases is applicable, the CDT is int.
10440   // The VLEN is then determined based on the CDT and the size of vector
10441   // register of that ISA for which current vector version is generated. The
10442   // VLEN is computed using the formula below:
10443   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10444   // where vector register size specified in section 3.2.1 Registers and the
10445   // Stack Frame of original AMD64 ABI document.
10446   QualType RetType = FD->getReturnType();
10447   if (RetType.isNull())
10448     return 0;
10449   ASTContext &C = FD->getASTContext();
10450   QualType CDT;
10451   if (!RetType.isNull() && !RetType->isVoidType()) {
10452     CDT = RetType;
10453   } else {
10454     unsigned Offset = 0;
10455     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10456       if (ParamAttrs[Offset].Kind == Vector)
10457         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10458       ++Offset;
10459     }
10460     if (CDT.isNull()) {
10461       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10462         if (ParamAttrs[I + Offset].Kind == Vector) {
10463           CDT = FD->getParamDecl(I)->getType();
10464           break;
10465         }
10466       }
10467     }
10468   }
10469   if (CDT.isNull())
10470     CDT = C.IntTy;
10471   CDT = CDT->getCanonicalTypeUnqualified();
10472   if (CDT->isRecordType() || CDT->isUnionType())
10473     CDT = C.IntTy;
10474   return C.getTypeSize(CDT);
10475 }
10476 
10477 static void
10478 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10479                            const llvm::APSInt &VLENVal,
10480                            ArrayRef<ParamAttrTy> ParamAttrs,
10481                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10482   struct ISADataTy {
10483     char ISA;
10484     unsigned VecRegSize;
10485   };
10486   ISADataTy ISAData[] = {
10487       {
10488           'b', 128
10489       }, // SSE
10490       {
10491           'c', 256
10492       }, // AVX
10493       {
10494           'd', 256
10495       }, // AVX2
10496       {
10497           'e', 512
10498       }, // AVX512
10499   };
10500   llvm::SmallVector<char, 2> Masked;
10501   switch (State) {
10502   case OMPDeclareSimdDeclAttr::BS_Undefined:
10503     Masked.push_back('N');
10504     Masked.push_back('M');
10505     break;
10506   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10507     Masked.push_back('N');
10508     break;
10509   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10510     Masked.push_back('M');
10511     break;
10512   }
10513   for (char Mask : Masked) {
10514     for (const ISADataTy &Data : ISAData) {
10515       SmallString<256> Buffer;
10516       llvm::raw_svector_ostream Out(Buffer);
10517       Out << "_ZGV" << Data.ISA << Mask;
10518       if (!VLENVal) {
10519         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10520         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10521         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10522       } else {
10523         Out << VLENVal;
10524       }
10525       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10526         switch (ParamAttr.Kind){
10527         case LinearWithVarStride:
10528           Out << 's' << ParamAttr.StrideOrArg;
10529           break;
10530         case Linear:
10531           Out << 'l';
10532           if (ParamAttr.StrideOrArg != 1)
10533             Out << ParamAttr.StrideOrArg;
10534           break;
10535         case Uniform:
10536           Out << 'u';
10537           break;
10538         case Vector:
10539           Out << 'v';
10540           break;
10541         }
10542         if (!!ParamAttr.Alignment)
10543           Out << 'a' << ParamAttr.Alignment;
10544       }
10545       Out << '_' << Fn->getName();
10546       Fn->addFnAttr(Out.str());
10547     }
10548   }
10549 }
10550 
10551 // This are the Functions that are needed to mangle the name of the
10552 // vector functions generated by the compiler, according to the rules
10553 // defined in the "Vector Function ABI specifications for AArch64",
10554 // available at
10555 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10556 
10557 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10558 ///
10559 /// TODO: Need to implement the behavior for reference marked with a
10560 /// var or no linear modifiers (1.b in the section). For this, we
10561 /// need to extend ParamKindTy to support the linear modifiers.
10562 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10563   QT = QT.getCanonicalType();
10564 
10565   if (QT->isVoidType())
10566     return false;
10567 
10568   if (Kind == ParamKindTy::Uniform)
10569     return false;
10570 
10571   if (Kind == ParamKindTy::Linear)
10572     return false;
10573 
10574   // TODO: Handle linear references with modifiers
10575 
10576   if (Kind == ParamKindTy::LinearWithVarStride)
10577     return false;
10578 
10579   return true;
10580 }
10581 
10582 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10583 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10584   QT = QT.getCanonicalType();
10585   unsigned Size = C.getTypeSize(QT);
10586 
10587   // Only scalars and complex within 16 bytes wide set PVB to true.
10588   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10589     return false;
10590 
10591   if (QT->isFloatingType())
10592     return true;
10593 
10594   if (QT->isIntegerType())
10595     return true;
10596 
10597   if (QT->isPointerType())
10598     return true;
10599 
10600   // TODO: Add support for complex types (section 3.1.2, item 2).
10601 
10602   return false;
10603 }
10604 
10605 /// Computes the lane size (LS) of a return type or of an input parameter,
10606 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10607 /// TODO: Add support for references, section 3.2.1, item 1.
10608 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10609   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10610     QualType PTy = QT.getCanonicalType()->getPointeeType();
10611     if (getAArch64PBV(PTy, C))
10612       return C.getTypeSize(PTy);
10613   }
10614   if (getAArch64PBV(QT, C))
10615     return C.getTypeSize(QT);
10616 
10617   return C.getTypeSize(C.getUIntPtrType());
10618 }
10619 
10620 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10621 // signature of the scalar function, as defined in 3.2.2 of the
10622 // AAVFABI.
10623 static std::tuple<unsigned, unsigned, bool>
10624 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10625   QualType RetType = FD->getReturnType().getCanonicalType();
10626 
10627   ASTContext &C = FD->getASTContext();
10628 
10629   bool OutputBecomesInput = false;
10630 
10631   llvm::SmallVector<unsigned, 8> Sizes;
10632   if (!RetType->isVoidType()) {
10633     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10634     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10635       OutputBecomesInput = true;
10636   }
10637   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10638     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10639     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10640   }
10641 
10642   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10643   // The LS of a function parameter / return value can only be a power
10644   // of 2, starting from 8 bits, up to 128.
10645   assert(std::all_of(Sizes.begin(), Sizes.end(),
10646                      [](unsigned Size) {
10647                        return Size == 8 || Size == 16 || Size == 32 ||
10648                               Size == 64 || Size == 128;
10649                      }) &&
10650          "Invalid size");
10651 
10652   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10653                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10654                          OutputBecomesInput);
10655 }
10656 
10657 /// Mangle the parameter part of the vector function name according to
10658 /// their OpenMP classification. The mangling function is defined in
10659 /// section 3.5 of the AAVFABI.
10660 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10661   SmallString<256> Buffer;
10662   llvm::raw_svector_ostream Out(Buffer);
10663   for (const auto &ParamAttr : ParamAttrs) {
10664     switch (ParamAttr.Kind) {
10665     case LinearWithVarStride:
10666       Out << "ls" << ParamAttr.StrideOrArg;
10667       break;
10668     case Linear:
10669       Out << 'l';
10670       // Don't print the step value if it is not present or if it is
10671       // equal to 1.
10672       if (ParamAttr.StrideOrArg != 1)
10673         Out << ParamAttr.StrideOrArg;
10674       break;
10675     case Uniform:
10676       Out << 'u';
10677       break;
10678     case Vector:
10679       Out << 'v';
10680       break;
10681     }
10682 
10683     if (!!ParamAttr.Alignment)
10684       Out << 'a' << ParamAttr.Alignment;
10685   }
10686 
10687   return std::string(Out.str());
10688 }
10689 
10690 // Function used to add the attribute. The parameter `VLEN` is
10691 // templated to allow the use of "x" when targeting scalable functions
10692 // for SVE.
10693 template <typename T>
10694 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10695                                  char ISA, StringRef ParSeq,
10696                                  StringRef MangledName, bool OutputBecomesInput,
10697                                  llvm::Function *Fn) {
10698   SmallString<256> Buffer;
10699   llvm::raw_svector_ostream Out(Buffer);
10700   Out << Prefix << ISA << LMask << VLEN;
10701   if (OutputBecomesInput)
10702     Out << "v";
10703   Out << ParSeq << "_" << MangledName;
10704   Fn->addFnAttr(Out.str());
10705 }
10706 
10707 // Helper function to generate the Advanced SIMD names depending on
10708 // the value of the NDS when simdlen is not present.
10709 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10710                                       StringRef Prefix, char ISA,
10711                                       StringRef ParSeq, StringRef MangledName,
10712                                       bool OutputBecomesInput,
10713                                       llvm::Function *Fn) {
10714   switch (NDS) {
10715   case 8:
10716     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10717                          OutputBecomesInput, Fn);
10718     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10719                          OutputBecomesInput, Fn);
10720     break;
10721   case 16:
10722     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10723                          OutputBecomesInput, Fn);
10724     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10725                          OutputBecomesInput, Fn);
10726     break;
10727   case 32:
10728     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10729                          OutputBecomesInput, Fn);
10730     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10731                          OutputBecomesInput, Fn);
10732     break;
10733   case 64:
10734   case 128:
10735     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10736                          OutputBecomesInput, Fn);
10737     break;
10738   default:
10739     llvm_unreachable("Scalar type is too wide.");
10740   }
10741 }
10742 
10743 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10744 static void emitAArch64DeclareSimdFunction(
10745     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10746     ArrayRef<ParamAttrTy> ParamAttrs,
10747     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10748     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10749 
10750   // Get basic data for building the vector signature.
10751   const auto Data = getNDSWDS(FD, ParamAttrs);
10752   const unsigned NDS = std::get<0>(Data);
10753   const unsigned WDS = std::get<1>(Data);
10754   const bool OutputBecomesInput = std::get<2>(Data);
10755 
10756   // Check the values provided via `simdlen` by the user.
10757   // 1. A `simdlen(1)` doesn't produce vector signatures,
10758   if (UserVLEN == 1) {
10759     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10760         DiagnosticsEngine::Warning,
10761         "The clause simdlen(1) has no effect when targeting aarch64.");
10762     CGM.getDiags().Report(SLoc, DiagID);
10763     return;
10764   }
10765 
10766   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10767   // Advanced SIMD output.
10768   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10769     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10770         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10771                                     "power of 2 when targeting Advanced SIMD.");
10772     CGM.getDiags().Report(SLoc, DiagID);
10773     return;
10774   }
10775 
10776   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10777   // limits.
10778   if (ISA == 's' && UserVLEN != 0) {
10779     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10780       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10781           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10782                                       "lanes in the architectural constraints "
10783                                       "for SVE (min is 128-bit, max is "
10784                                       "2048-bit, by steps of 128-bit)");
10785       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10786       return;
10787     }
10788   }
10789 
10790   // Sort out parameter sequence.
10791   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10792   StringRef Prefix = "_ZGV";
10793   // Generate simdlen from user input (if any).
10794   if (UserVLEN) {
10795     if (ISA == 's') {
10796       // SVE generates only a masked function.
10797       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10798                            OutputBecomesInput, Fn);
10799     } else {
10800       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10801       // Advanced SIMD generates one or two functions, depending on
10802       // the `[not]inbranch` clause.
10803       switch (State) {
10804       case OMPDeclareSimdDeclAttr::BS_Undefined:
10805         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10806                              OutputBecomesInput, Fn);
10807         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10808                              OutputBecomesInput, Fn);
10809         break;
10810       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10811         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10812                              OutputBecomesInput, Fn);
10813         break;
10814       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10815         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10816                              OutputBecomesInput, Fn);
10817         break;
10818       }
10819     }
10820   } else {
10821     // If no user simdlen is provided, follow the AAVFABI rules for
10822     // generating the vector length.
10823     if (ISA == 's') {
10824       // SVE, section 3.4.1, item 1.
10825       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10826                            OutputBecomesInput, Fn);
10827     } else {
10828       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10829       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10830       // two vector names depending on the use of the clause
10831       // `[not]inbranch`.
10832       switch (State) {
10833       case OMPDeclareSimdDeclAttr::BS_Undefined:
10834         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10835                                   OutputBecomesInput, Fn);
10836         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10837                                   OutputBecomesInput, Fn);
10838         break;
10839       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10840         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10841                                   OutputBecomesInput, Fn);
10842         break;
10843       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10844         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10845                                   OutputBecomesInput, Fn);
10846         break;
10847       }
10848     }
10849   }
10850 }
10851 
10852 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10853                                               llvm::Function *Fn) {
10854   ASTContext &C = CGM.getContext();
10855   FD = FD->getMostRecentDecl();
10856   // Map params to their positions in function decl.
10857   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10858   if (isa<CXXMethodDecl>(FD))
10859     ParamPositions.try_emplace(FD, 0);
10860   unsigned ParamPos = ParamPositions.size();
10861   for (const ParmVarDecl *P : FD->parameters()) {
10862     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10863     ++ParamPos;
10864   }
10865   while (FD) {
10866     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10867       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10868       // Mark uniform parameters.
10869       for (const Expr *E : Attr->uniforms()) {
10870         E = E->IgnoreParenImpCasts();
10871         unsigned Pos;
10872         if (isa<CXXThisExpr>(E)) {
10873           Pos = ParamPositions[FD];
10874         } else {
10875           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10876                                 ->getCanonicalDecl();
10877           Pos = ParamPositions[PVD];
10878         }
10879         ParamAttrs[Pos].Kind = Uniform;
10880       }
10881       // Get alignment info.
10882       auto NI = Attr->alignments_begin();
10883       for (const Expr *E : Attr->aligneds()) {
10884         E = E->IgnoreParenImpCasts();
10885         unsigned Pos;
10886         QualType ParmTy;
10887         if (isa<CXXThisExpr>(E)) {
10888           Pos = ParamPositions[FD];
10889           ParmTy = E->getType();
10890         } else {
10891           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10892                                 ->getCanonicalDecl();
10893           Pos = ParamPositions[PVD];
10894           ParmTy = PVD->getType();
10895         }
10896         ParamAttrs[Pos].Alignment =
10897             (*NI)
10898                 ? (*NI)->EvaluateKnownConstInt(C)
10899                 : llvm::APSInt::getUnsigned(
10900                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10901                           .getQuantity());
10902         ++NI;
10903       }
10904       // Mark linear parameters.
10905       auto SI = Attr->steps_begin();
10906       auto MI = Attr->modifiers_begin();
10907       for (const Expr *E : Attr->linears()) {
10908         E = E->IgnoreParenImpCasts();
10909         unsigned Pos;
10910         // Rescaling factor needed to compute the linear parameter
10911         // value in the mangled name.
10912         unsigned PtrRescalingFactor = 1;
10913         if (isa<CXXThisExpr>(E)) {
10914           Pos = ParamPositions[FD];
10915         } else {
10916           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10917                                 ->getCanonicalDecl();
10918           Pos = ParamPositions[PVD];
10919           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10920             PtrRescalingFactor = CGM.getContext()
10921                                      .getTypeSizeInChars(P->getPointeeType())
10922                                      .getQuantity();
10923         }
10924         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10925         ParamAttr.Kind = Linear;
10926         // Assuming a stride of 1, for `linear` without modifiers.
10927         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10928         if (*SI) {
10929           Expr::EvalResult Result;
10930           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10931             if (const auto *DRE =
10932                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10933               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10934                 ParamAttr.Kind = LinearWithVarStride;
10935                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10936                     ParamPositions[StridePVD->getCanonicalDecl()]);
10937               }
10938             }
10939           } else {
10940             ParamAttr.StrideOrArg = Result.Val.getInt();
10941           }
10942         }
10943         // If we are using a linear clause on a pointer, we need to
10944         // rescale the value of linear_step with the byte size of the
10945         // pointee type.
10946         if (Linear == ParamAttr.Kind)
10947           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10948         ++SI;
10949         ++MI;
10950       }
10951       llvm::APSInt VLENVal;
10952       SourceLocation ExprLoc;
10953       const Expr *VLENExpr = Attr->getSimdlen();
10954       if (VLENExpr) {
10955         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10956         ExprLoc = VLENExpr->getExprLoc();
10957       }
10958       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10959       if (CGM.getTriple().isX86()) {
10960         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10961       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10962         unsigned VLEN = VLENVal.getExtValue();
10963         StringRef MangledName = Fn->getName();
10964         if (CGM.getTarget().hasFeature("sve"))
10965           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10966                                          MangledName, 's', 128, Fn, ExprLoc);
10967         if (CGM.getTarget().hasFeature("neon"))
10968           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10969                                          MangledName, 'n', 128, Fn, ExprLoc);
10970       }
10971     }
10972     FD = FD->getPreviousDecl();
10973   }
10974 }
10975 
10976 namespace {
10977 /// Cleanup action for doacross support.
10978 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10979 public:
10980   static const int DoacrossFinArgs = 2;
10981 
10982 private:
10983   llvm::FunctionCallee RTLFn;
10984   llvm::Value *Args[DoacrossFinArgs];
10985 
10986 public:
10987   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10988                     ArrayRef<llvm::Value *> CallArgs)
10989       : RTLFn(RTLFn) {
10990     assert(CallArgs.size() == DoacrossFinArgs);
10991     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10992   }
10993   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10994     if (!CGF.HaveInsertPoint())
10995       return;
10996     CGF.EmitRuntimeCall(RTLFn, Args);
10997   }
10998 };
10999 } // namespace
11000 
11001 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11002                                        const OMPLoopDirective &D,
11003                                        ArrayRef<Expr *> NumIterations) {
11004   if (!CGF.HaveInsertPoint())
11005     return;
11006 
11007   ASTContext &C = CGM.getContext();
11008   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11009   RecordDecl *RD;
11010   if (KmpDimTy.isNull()) {
11011     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11012     //  kmp_int64 lo; // lower
11013     //  kmp_int64 up; // upper
11014     //  kmp_int64 st; // stride
11015     // };
11016     RD = C.buildImplicitRecord("kmp_dim");
11017     RD->startDefinition();
11018     addFieldToRecordDecl(C, RD, Int64Ty);
11019     addFieldToRecordDecl(C, RD, Int64Ty);
11020     addFieldToRecordDecl(C, RD, Int64Ty);
11021     RD->completeDefinition();
11022     KmpDimTy = C.getRecordType(RD);
11023   } else {
11024     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11025   }
11026   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11027   QualType ArrayTy =
11028       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11029 
11030   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11031   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11032   enum { LowerFD = 0, UpperFD, StrideFD };
11033   // Fill dims with data.
11034   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11035     LValue DimsLVal = CGF.MakeAddrLValue(
11036         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11037     // dims.upper = num_iterations;
11038     LValue UpperLVal = CGF.EmitLValueForField(
11039         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11040     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11041         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11042         Int64Ty, NumIterations[I]->getExprLoc());
11043     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11044     // dims.stride = 1;
11045     LValue StrideLVal = CGF.EmitLValueForField(
11046         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11047     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11048                           StrideLVal);
11049   }
11050 
11051   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11052   // kmp_int32 num_dims, struct kmp_dim * dims);
11053   llvm::Value *Args[] = {
11054       emitUpdateLocation(CGF, D.getBeginLoc()),
11055       getThreadID(CGF, D.getBeginLoc()),
11056       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11057       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11058           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11059           CGM.VoidPtrTy)};
11060 
11061   llvm::FunctionCallee RTLFn =
11062       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11063           CGM.getModule(), OMPRTL___kmpc_doacross_init);
11064   CGF.EmitRuntimeCall(RTLFn, Args);
11065   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11066       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11067   llvm::FunctionCallee FiniRTLFn =
11068       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11069           CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11070   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11071                                              llvm::makeArrayRef(FiniArgs));
11072 }
11073 
11074 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11075                                           const OMPDependClause *C) {
11076   QualType Int64Ty =
11077       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11078   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11079   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11080       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11081   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11082   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11083     const Expr *CounterVal = C->getLoopData(I);
11084     assert(CounterVal);
11085     llvm::Value *CntVal = CGF.EmitScalarConversion(
11086         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11087         CounterVal->getExprLoc());
11088     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11089                           /*Volatile=*/false, Int64Ty);
11090   }
11091   llvm::Value *Args[] = {
11092       emitUpdateLocation(CGF, C->getBeginLoc()),
11093       getThreadID(CGF, C->getBeginLoc()),
11094       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11095   llvm::FunctionCallee RTLFn;
11096   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11097     RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11098         CGM.getModule(), OMPRTL___kmpc_doacross_post);
11099   } else {
11100     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11101     RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11102         CGM.getModule(), OMPRTL___kmpc_doacross_wait);
11103   }
11104   CGF.EmitRuntimeCall(RTLFn, Args);
11105 }
11106 
11107 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11108                                llvm::FunctionCallee Callee,
11109                                ArrayRef<llvm::Value *> Args) const {
11110   assert(Loc.isValid() && "Outlined function call location must be valid.");
11111   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11112 
11113   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11114     if (Fn->doesNotThrow()) {
11115       CGF.EmitNounwindRuntimeCall(Fn, Args);
11116       return;
11117     }
11118   }
11119   CGF.EmitRuntimeCall(Callee, Args);
11120 }
11121 
11122 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11123     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11124     ArrayRef<llvm::Value *> Args) const {
11125   emitCall(CGF, Loc, OutlinedFn, Args);
11126 }
11127 
11128 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11129   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11130     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11131       HasEmittedDeclareTargetRegion = true;
11132 }
11133 
11134 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11135                                              const VarDecl *NativeParam,
11136                                              const VarDecl *TargetParam) const {
11137   return CGF.GetAddrOfLocalVar(NativeParam);
11138 }
11139 
11140 namespace {
11141 /// Cleanup action for allocate support.
11142 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11143 public:
11144   static const int CleanupArgs = 3;
11145 
11146 private:
11147   llvm::FunctionCallee RTLFn;
11148   llvm::Value *Args[CleanupArgs];
11149 
11150 public:
11151   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11152                        ArrayRef<llvm::Value *> CallArgs)
11153       : RTLFn(RTLFn) {
11154     assert(CallArgs.size() == CleanupArgs &&
11155            "Size of arguments does not match.");
11156     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11157   }
11158   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11159     if (!CGF.HaveInsertPoint())
11160       return;
11161     CGF.EmitRuntimeCall(RTLFn, Args);
11162   }
11163 };
11164 } // namespace
11165 
11166 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11167                                                    const VarDecl *VD) {
11168   if (!VD)
11169     return Address::invalid();
11170   const VarDecl *CVD = VD->getCanonicalDecl();
11171   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11172     return Address::invalid();
11173   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11174   // Use the default allocation.
11175   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11176        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11177       !AA->getAllocator())
11178     return Address::invalid();
11179   llvm::Value *Size;
11180   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11181   if (CVD->getType()->isVariablyModifiedType()) {
11182     Size = CGF.getTypeSize(CVD->getType());
11183     // Align the size: ((size + align - 1) / align) * align
11184     Size = CGF.Builder.CreateNUWAdd(
11185         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11186     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11187     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11188   } else {
11189     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11190     Size = CGM.getSize(Sz.alignTo(Align));
11191   }
11192   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11193   assert(AA->getAllocator() &&
11194          "Expected allocator expression for non-default allocator.");
11195   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11196   // According to the standard, the original allocator type is a enum (integer).
11197   // Convert to pointer type, if required.
11198   if (Allocator->getType()->isIntegerTy())
11199     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11200   else if (Allocator->getType()->isPointerTy())
11201     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11202                                                                 CGM.VoidPtrTy);
11203   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11204 
11205   llvm::Value *Addr =
11206       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11207                               CGM.getModule(), OMPRTL___kmpc_alloc),
11208                           Args, getName({CVD->getName(), ".void.addr"}));
11209   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11210                                                               Allocator};
11211   llvm::FunctionCallee FiniRTLFn =
11212       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(CGM.getModule(),
11213                                                         OMPRTL___kmpc_free);
11214 
11215   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11216                                                 llvm::makeArrayRef(FiniArgs));
11217   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11218       Addr,
11219       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11220       getName({CVD->getName(), ".addr"}));
11221   return Address(Addr, Align);
11222 }
11223 
11224 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11225     CodeGenModule &CGM, const OMPLoopDirective &S)
11226     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11227   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11228   if (!NeedToPush)
11229     return;
11230   NontemporalDeclsSet &DS =
11231       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11232   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11233     for (const Stmt *Ref : C->private_refs()) {
11234       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11235       const ValueDecl *VD;
11236       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11237         VD = DRE->getDecl();
11238       } else {
11239         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11240         assert((ME->isImplicitCXXThis() ||
11241                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11242                "Expected member of current class.");
11243         VD = ME->getMemberDecl();
11244       }
11245       DS.insert(VD);
11246     }
11247   }
11248 }
11249 
11250 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11251   if (!NeedToPush)
11252     return;
11253   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11254 }
11255 
11256 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11257   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11258 
11259   return llvm::any_of(
11260       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11261       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11262 }
11263 
11264 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11265     const OMPExecutableDirective &S,
11266     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11267     const {
11268   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11269   // Vars in target/task regions must be excluded completely.
11270   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11271       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11272     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11273     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11274     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11275     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11276       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11277         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11278     }
11279   }
11280   // Exclude vars in private clauses.
11281   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11282     for (const Expr *Ref : C->varlists()) {
11283       if (!Ref->getType()->isScalarType())
11284         continue;
11285       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11286       if (!DRE)
11287         continue;
11288       NeedToCheckForLPCs.insert(DRE->getDecl());
11289     }
11290   }
11291   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11292     for (const Expr *Ref : C->varlists()) {
11293       if (!Ref->getType()->isScalarType())
11294         continue;
11295       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11296       if (!DRE)
11297         continue;
11298       NeedToCheckForLPCs.insert(DRE->getDecl());
11299     }
11300   }
11301   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11302     for (const Expr *Ref : C->varlists()) {
11303       if (!Ref->getType()->isScalarType())
11304         continue;
11305       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11306       if (!DRE)
11307         continue;
11308       NeedToCheckForLPCs.insert(DRE->getDecl());
11309     }
11310   }
11311   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11312     for (const Expr *Ref : C->varlists()) {
11313       if (!Ref->getType()->isScalarType())
11314         continue;
11315       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11316       if (!DRE)
11317         continue;
11318       NeedToCheckForLPCs.insert(DRE->getDecl());
11319     }
11320   }
11321   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11322     for (const Expr *Ref : C->varlists()) {
11323       if (!Ref->getType()->isScalarType())
11324         continue;
11325       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11326       if (!DRE)
11327         continue;
11328       NeedToCheckForLPCs.insert(DRE->getDecl());
11329     }
11330   }
11331   for (const Decl *VD : NeedToCheckForLPCs) {
11332     for (const LastprivateConditionalData &Data :
11333          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11334       if (Data.DeclToUniqueName.count(VD) > 0) {
11335         if (!Data.Disabled)
11336           NeedToAddForLPCsAsDisabled.insert(VD);
11337         break;
11338       }
11339     }
11340   }
11341 }
11342 
11343 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11344     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11345     : CGM(CGF.CGM),
11346       Action((CGM.getLangOpts().OpenMP >= 50 &&
11347               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11348                            [](const OMPLastprivateClause *C) {
11349                              return C->getKind() ==
11350                                     OMPC_LASTPRIVATE_conditional;
11351                            }))
11352                  ? ActionToDo::PushAsLastprivateConditional
11353                  : ActionToDo::DoNotPush) {
11354   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11355   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11356     return;
11357   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11358          "Expected a push action.");
11359   LastprivateConditionalData &Data =
11360       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11361   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11362     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11363       continue;
11364 
11365     for (const Expr *Ref : C->varlists()) {
11366       Data.DeclToUniqueName.insert(std::make_pair(
11367           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11368           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11369     }
11370   }
11371   Data.IVLVal = IVLVal;
11372   Data.Fn = CGF.CurFn;
11373 }
11374 
11375 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11376     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11377     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11378   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11379   if (CGM.getLangOpts().OpenMP < 50)
11380     return;
11381   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11382   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11383   if (!NeedToAddForLPCsAsDisabled.empty()) {
11384     Action = ActionToDo::DisableLastprivateConditional;
11385     LastprivateConditionalData &Data =
11386         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11387     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11388       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11389     Data.Fn = CGF.CurFn;
11390     Data.Disabled = true;
11391   }
11392 }
11393 
11394 CGOpenMPRuntime::LastprivateConditionalRAII
11395 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11396     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11397   return LastprivateConditionalRAII(CGF, S);
11398 }
11399 
11400 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11401   if (CGM.getLangOpts().OpenMP < 50)
11402     return;
11403   if (Action == ActionToDo::DisableLastprivateConditional) {
11404     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11405            "Expected list of disabled private vars.");
11406     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11407   }
11408   if (Action == ActionToDo::PushAsLastprivateConditional) {
11409     assert(
11410         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11411         "Expected list of lastprivate conditional vars.");
11412     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11413   }
11414 }
11415 
11416 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11417                                                         const VarDecl *VD) {
11418   ASTContext &C = CGM.getContext();
11419   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11420   if (I == LastprivateConditionalToTypes.end())
11421     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11422   QualType NewType;
11423   const FieldDecl *VDField;
11424   const FieldDecl *FiredField;
11425   LValue BaseLVal;
11426   auto VI = I->getSecond().find(VD);
11427   if (VI == I->getSecond().end()) {
11428     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11429     RD->startDefinition();
11430     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11431     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11432     RD->completeDefinition();
11433     NewType = C.getRecordType(RD);
11434     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11435     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11436     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11437   } else {
11438     NewType = std::get<0>(VI->getSecond());
11439     VDField = std::get<1>(VI->getSecond());
11440     FiredField = std::get<2>(VI->getSecond());
11441     BaseLVal = std::get<3>(VI->getSecond());
11442   }
11443   LValue FiredLVal =
11444       CGF.EmitLValueForField(BaseLVal, FiredField);
11445   CGF.EmitStoreOfScalar(
11446       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11447       FiredLVal);
11448   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11449 }
11450 
11451 namespace {
11452 /// Checks if the lastprivate conditional variable is referenced in LHS.
11453 class LastprivateConditionalRefChecker final
11454     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11455   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11456   const Expr *FoundE = nullptr;
11457   const Decl *FoundD = nullptr;
11458   StringRef UniqueDeclName;
11459   LValue IVLVal;
11460   llvm::Function *FoundFn = nullptr;
11461   SourceLocation Loc;
11462 
11463 public:
11464   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11465     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11466          llvm::reverse(LPM)) {
11467       auto It = D.DeclToUniqueName.find(E->getDecl());
11468       if (It == D.DeclToUniqueName.end())
11469         continue;
11470       if (D.Disabled)
11471         return false;
11472       FoundE = E;
11473       FoundD = E->getDecl()->getCanonicalDecl();
11474       UniqueDeclName = It->second;
11475       IVLVal = D.IVLVal;
11476       FoundFn = D.Fn;
11477       break;
11478     }
11479     return FoundE == E;
11480   }
11481   bool VisitMemberExpr(const MemberExpr *E) {
11482     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11483       return false;
11484     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11485          llvm::reverse(LPM)) {
11486       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11487       if (It == D.DeclToUniqueName.end())
11488         continue;
11489       if (D.Disabled)
11490         return false;
11491       FoundE = E;
11492       FoundD = E->getMemberDecl()->getCanonicalDecl();
11493       UniqueDeclName = It->second;
11494       IVLVal = D.IVLVal;
11495       FoundFn = D.Fn;
11496       break;
11497     }
11498     return FoundE == E;
11499   }
11500   bool VisitStmt(const Stmt *S) {
11501     for (const Stmt *Child : S->children()) {
11502       if (!Child)
11503         continue;
11504       if (const auto *E = dyn_cast<Expr>(Child))
11505         if (!E->isGLValue())
11506           continue;
11507       if (Visit(Child))
11508         return true;
11509     }
11510     return false;
11511   }
11512   explicit LastprivateConditionalRefChecker(
11513       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11514       : LPM(LPM) {}
11515   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11516   getFoundData() const {
11517     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11518   }
11519 };
11520 } // namespace
11521 
11522 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11523                                                        LValue IVLVal,
11524                                                        StringRef UniqueDeclName,
11525                                                        LValue LVal,
11526                                                        SourceLocation Loc) {
11527   // Last updated loop counter for the lastprivate conditional var.
11528   // int<xx> last_iv = 0;
11529   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11530   llvm::Constant *LastIV =
11531       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11532   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11533       IVLVal.getAlignment().getAsAlign());
11534   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11535 
11536   // Last value of the lastprivate conditional.
11537   // decltype(priv_a) last_a;
11538   llvm::Constant *Last = getOrCreateInternalVariable(
11539       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11540   cast<llvm::GlobalVariable>(Last)->setAlignment(
11541       LVal.getAlignment().getAsAlign());
11542   LValue LastLVal =
11543       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11544 
11545   // Global loop counter. Required to handle inner parallel-for regions.
11546   // iv
11547   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11548 
11549   // #pragma omp critical(a)
11550   // if (last_iv <= iv) {
11551   //   last_iv = iv;
11552   //   last_a = priv_a;
11553   // }
11554   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11555                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11556     Action.Enter(CGF);
11557     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11558     // (last_iv <= iv) ? Check if the variable is updated and store new
11559     // value in global var.
11560     llvm::Value *CmpRes;
11561     if (IVLVal.getType()->isSignedIntegerType()) {
11562       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11563     } else {
11564       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11565              "Loop iteration variable must be integer.");
11566       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11567     }
11568     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11569     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11570     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11571     // {
11572     CGF.EmitBlock(ThenBB);
11573 
11574     //   last_iv = iv;
11575     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11576 
11577     //   last_a = priv_a;
11578     switch (CGF.getEvaluationKind(LVal.getType())) {
11579     case TEK_Scalar: {
11580       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11581       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11582       break;
11583     }
11584     case TEK_Complex: {
11585       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11586       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11587       break;
11588     }
11589     case TEK_Aggregate:
11590       llvm_unreachable(
11591           "Aggregates are not supported in lastprivate conditional.");
11592     }
11593     // }
11594     CGF.EmitBranch(ExitBB);
11595     // There is no need to emit line number for unconditional branch.
11596     (void)ApplyDebugLocation::CreateEmpty(CGF);
11597     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11598   };
11599 
11600   if (CGM.getLangOpts().OpenMPSimd) {
11601     // Do not emit as a critical region as no parallel region could be emitted.
11602     RegionCodeGenTy ThenRCG(CodeGen);
11603     ThenRCG(CGF);
11604   } else {
11605     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11606   }
11607 }
11608 
11609 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11610                                                          const Expr *LHS) {
11611   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11612     return;
11613   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11614   if (!Checker.Visit(LHS))
11615     return;
11616   const Expr *FoundE;
11617   const Decl *FoundD;
11618   StringRef UniqueDeclName;
11619   LValue IVLVal;
11620   llvm::Function *FoundFn;
11621   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11622       Checker.getFoundData();
11623   if (FoundFn != CGF.CurFn) {
11624     // Special codegen for inner parallel regions.
11625     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11626     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11627     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11628            "Lastprivate conditional is not found in outer region.");
11629     QualType StructTy = std::get<0>(It->getSecond());
11630     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11631     LValue PrivLVal = CGF.EmitLValue(FoundE);
11632     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11633         PrivLVal.getAddress(CGF),
11634         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11635     LValue BaseLVal =
11636         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11637     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11638     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11639                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11640                         FiredLVal, llvm::AtomicOrdering::Unordered,
11641                         /*IsVolatile=*/true, /*isInit=*/false);
11642     return;
11643   }
11644 
11645   // Private address of the lastprivate conditional in the current context.
11646   // priv_a
11647   LValue LVal = CGF.EmitLValue(FoundE);
11648   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11649                                    FoundE->getExprLoc());
11650 }
11651 
11652 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11653     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11654     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11655   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11656     return;
11657   auto Range = llvm::reverse(LastprivateConditionalStack);
11658   auto It = llvm::find_if(
11659       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11660   if (It == Range.end() || It->Fn != CGF.CurFn)
11661     return;
11662   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11663   assert(LPCI != LastprivateConditionalToTypes.end() &&
11664          "Lastprivates must be registered already.");
11665   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11666   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11667   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11668   for (const auto &Pair : It->DeclToUniqueName) {
11669     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11670     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11671       continue;
11672     auto I = LPCI->getSecond().find(Pair.first);
11673     assert(I != LPCI->getSecond().end() &&
11674            "Lastprivate must be rehistered already.");
11675     // bool Cmp = priv_a.Fired != 0;
11676     LValue BaseLVal = std::get<3>(I->getSecond());
11677     LValue FiredLVal =
11678         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11679     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11680     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11681     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11682     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11683     // if (Cmp) {
11684     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11685     CGF.EmitBlock(ThenBB);
11686     Address Addr = CGF.GetAddrOfLocalVar(VD);
11687     LValue LVal;
11688     if (VD->getType()->isReferenceType())
11689       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11690                                            AlignmentSource::Decl);
11691     else
11692       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11693                                 AlignmentSource::Decl);
11694     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11695                                      D.getBeginLoc());
11696     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11697     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11698     // }
11699   }
11700 }
11701 
11702 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11703     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11704     SourceLocation Loc) {
11705   if (CGF.getLangOpts().OpenMP < 50)
11706     return;
11707   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11708   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11709          "Unknown lastprivate conditional variable.");
11710   StringRef UniqueName = It->second;
11711   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11712   // The variable was not updated in the region - exit.
11713   if (!GV)
11714     return;
11715   LValue LPLVal = CGF.MakeAddrLValue(
11716       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11717   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11718   CGF.EmitStoreOfScalar(Res, PrivLVal);
11719 }
11720 
11721 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11722     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11723     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11724   llvm_unreachable("Not supported in SIMD-only mode");
11725 }
11726 
11727 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11728     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11729     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11730   llvm_unreachable("Not supported in SIMD-only mode");
11731 }
11732 
11733 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11734     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11735     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11736     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11737     bool Tied, unsigned &NumberOfParts) {
11738   llvm_unreachable("Not supported in SIMD-only mode");
11739 }
11740 
11741 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11742                                            SourceLocation Loc,
11743                                            llvm::Function *OutlinedFn,
11744                                            ArrayRef<llvm::Value *> CapturedVars,
11745                                            const Expr *IfCond) {
11746   llvm_unreachable("Not supported in SIMD-only mode");
11747 }
11748 
11749 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11750     CodeGenFunction &CGF, StringRef CriticalName,
11751     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11752     const Expr *Hint) {
11753   llvm_unreachable("Not supported in SIMD-only mode");
11754 }
11755 
11756 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11757                                            const RegionCodeGenTy &MasterOpGen,
11758                                            SourceLocation Loc) {
11759   llvm_unreachable("Not supported in SIMD-only mode");
11760 }
11761 
11762 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11763                                             SourceLocation Loc) {
11764   llvm_unreachable("Not supported in SIMD-only mode");
11765 }
11766 
11767 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11768     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11769     SourceLocation Loc) {
11770   llvm_unreachable("Not supported in SIMD-only mode");
11771 }
11772 
11773 void CGOpenMPSIMDRuntime::emitSingleRegion(
11774     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11775     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11776     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11777     ArrayRef<const Expr *> AssignmentOps) {
11778   llvm_unreachable("Not supported in SIMD-only mode");
11779 }
11780 
11781 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11782                                             const RegionCodeGenTy &OrderedOpGen,
11783                                             SourceLocation Loc,
11784                                             bool IsThreads) {
11785   llvm_unreachable("Not supported in SIMD-only mode");
11786 }
11787 
11788 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11789                                           SourceLocation Loc,
11790                                           OpenMPDirectiveKind Kind,
11791                                           bool EmitChecks,
11792                                           bool ForceSimpleCall) {
11793   llvm_unreachable("Not supported in SIMD-only mode");
11794 }
11795 
11796 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11797     CodeGenFunction &CGF, SourceLocation Loc,
11798     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11799     bool Ordered, const DispatchRTInput &DispatchValues) {
11800   llvm_unreachable("Not supported in SIMD-only mode");
11801 }
11802 
11803 void CGOpenMPSIMDRuntime::emitForStaticInit(
11804     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11805     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11806   llvm_unreachable("Not supported in SIMD-only mode");
11807 }
11808 
11809 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11810     CodeGenFunction &CGF, SourceLocation Loc,
11811     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11812   llvm_unreachable("Not supported in SIMD-only mode");
11813 }
11814 
11815 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11816                                                      SourceLocation Loc,
11817                                                      unsigned IVSize,
11818                                                      bool IVSigned) {
11819   llvm_unreachable("Not supported in SIMD-only mode");
11820 }
11821 
11822 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11823                                               SourceLocation Loc,
11824                                               OpenMPDirectiveKind DKind) {
11825   llvm_unreachable("Not supported in SIMD-only mode");
11826 }
11827 
11828 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11829                                               SourceLocation Loc,
11830                                               unsigned IVSize, bool IVSigned,
11831                                               Address IL, Address LB,
11832                                               Address UB, Address ST) {
11833   llvm_unreachable("Not supported in SIMD-only mode");
11834 }
11835 
11836 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11837                                                llvm::Value *NumThreads,
11838                                                SourceLocation Loc) {
11839   llvm_unreachable("Not supported in SIMD-only mode");
11840 }
11841 
11842 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11843                                              ProcBindKind ProcBind,
11844                                              SourceLocation Loc) {
11845   llvm_unreachable("Not supported in SIMD-only mode");
11846 }
11847 
11848 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11849                                                     const VarDecl *VD,
11850                                                     Address VDAddr,
11851                                                     SourceLocation Loc) {
11852   llvm_unreachable("Not supported in SIMD-only mode");
11853 }
11854 
11855 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11856     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11857     CodeGenFunction *CGF) {
11858   llvm_unreachable("Not supported in SIMD-only mode");
11859 }
11860 
11861 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11862     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11863   llvm_unreachable("Not supported in SIMD-only mode");
11864 }
11865 
11866 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11867                                     ArrayRef<const Expr *> Vars,
11868                                     SourceLocation Loc,
11869                                     llvm::AtomicOrdering AO) {
11870   llvm_unreachable("Not supported in SIMD-only mode");
11871 }
11872 
11873 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11874                                        const OMPExecutableDirective &D,
11875                                        llvm::Function *TaskFunction,
11876                                        QualType SharedsTy, Address Shareds,
11877                                        const Expr *IfCond,
11878                                        const OMPTaskDataTy &Data) {
11879   llvm_unreachable("Not supported in SIMD-only mode");
11880 }
11881 
11882 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11883     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11884     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11885     const Expr *IfCond, const OMPTaskDataTy &Data) {
11886   llvm_unreachable("Not supported in SIMD-only mode");
11887 }
11888 
11889 void CGOpenMPSIMDRuntime::emitReduction(
11890     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11891     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11892     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11893   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11894   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11895                                  ReductionOps, Options);
11896 }
11897 
11898 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11899     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11900     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11901   llvm_unreachable("Not supported in SIMD-only mode");
11902 }
11903 
11904 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11905                                                 SourceLocation Loc,
11906                                                 bool IsWorksharingReduction) {
11907   llvm_unreachable("Not supported in SIMD-only mode");
11908 }
11909 
11910 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11911                                                   SourceLocation Loc,
11912                                                   ReductionCodeGen &RCG,
11913                                                   unsigned N) {
11914   llvm_unreachable("Not supported in SIMD-only mode");
11915 }
11916 
11917 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11918                                                   SourceLocation Loc,
11919                                                   llvm::Value *ReductionsPtr,
11920                                                   LValue SharedLVal) {
11921   llvm_unreachable("Not supported in SIMD-only mode");
11922 }
11923 
11924 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11925                                            SourceLocation Loc) {
11926   llvm_unreachable("Not supported in SIMD-only mode");
11927 }
11928 
11929 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11930     CodeGenFunction &CGF, SourceLocation Loc,
11931     OpenMPDirectiveKind CancelRegion) {
11932   llvm_unreachable("Not supported in SIMD-only mode");
11933 }
11934 
11935 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11936                                          SourceLocation Loc, const Expr *IfCond,
11937                                          OpenMPDirectiveKind CancelRegion) {
11938   llvm_unreachable("Not supported in SIMD-only mode");
11939 }
11940 
11941 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11942     const OMPExecutableDirective &D, StringRef ParentName,
11943     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11944     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11945   llvm_unreachable("Not supported in SIMD-only mode");
11946 }
11947 
11948 void CGOpenMPSIMDRuntime::emitTargetCall(
11949     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11950     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11951     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11952     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11953                                      const OMPLoopDirective &D)>
11954         SizeEmitter) {
11955   llvm_unreachable("Not supported in SIMD-only mode");
11956 }
11957 
11958 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11959   llvm_unreachable("Not supported in SIMD-only mode");
11960 }
11961 
11962 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11963   llvm_unreachable("Not supported in SIMD-only mode");
11964 }
11965 
11966 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11967   return false;
11968 }
11969 
11970 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11971                                         const OMPExecutableDirective &D,
11972                                         SourceLocation Loc,
11973                                         llvm::Function *OutlinedFn,
11974                                         ArrayRef<llvm::Value *> CapturedVars) {
11975   llvm_unreachable("Not supported in SIMD-only mode");
11976 }
11977 
11978 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11979                                              const Expr *NumTeams,
11980                                              const Expr *ThreadLimit,
11981                                              SourceLocation Loc) {
11982   llvm_unreachable("Not supported in SIMD-only mode");
11983 }
11984 
11985 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11986     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11987     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11988   llvm_unreachable("Not supported in SIMD-only mode");
11989 }
11990 
11991 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11992     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11993     const Expr *Device) {
11994   llvm_unreachable("Not supported in SIMD-only mode");
11995 }
11996 
11997 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11998                                            const OMPLoopDirective &D,
11999                                            ArrayRef<Expr *> NumIterations) {
12000   llvm_unreachable("Not supported in SIMD-only mode");
12001 }
12002 
12003 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12004                                               const OMPDependClause *C) {
12005   llvm_unreachable("Not supported in SIMD-only mode");
12006 }
12007 
12008 const VarDecl *
12009 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12010                                         const VarDecl *NativeParam) const {
12011   llvm_unreachable("Not supported in SIMD-only mode");
12012 }
12013 
12014 Address
12015 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12016                                          const VarDecl *NativeParam,
12017                                          const VarDecl *TargetParam) const {
12018   llvm_unreachable("Not supported in SIMD-only mode");
12019 }
12020