1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
573 /// region.
574 class CleanupTy final : public EHScopeStack::Cleanup {
575   PrePostActionTy *Action;
576 
577 public:
578   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
579   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
580     if (!CGF.HaveInsertPoint())
581       return;
582     Action->Exit(CGF);
583   }
584 };
585 
586 } // anonymous namespace
587 
588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
589   CodeGenFunction::RunCleanupsScope Scope(CGF);
590   if (PrePostAction) {
591     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
592     Callback(CodeGen, CGF, *PrePostAction);
593   } else {
594     PrePostActionTy Action;
595     Callback(CodeGen, CGF, Action);
596   }
597 }
598 
599 /// Check if the combiner is a call to UDR combiner and if it is so return the
600 /// UDR decl used for reduction.
601 static const OMPDeclareReductionDecl *
602 getReductionInit(const Expr *ReductionOp) {
603   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
604     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
605       if (const auto *DRE =
606               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
607         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
608           return DRD;
609   return nullptr;
610 }
611 
612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
613                                              const OMPDeclareReductionDecl *DRD,
614                                              const Expr *InitOp,
615                                              Address Private, Address Original,
616                                              QualType Ty) {
617   if (DRD->getInitializer()) {
618     std::pair<llvm::Function *, llvm::Function *> Reduction =
619         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
620     const auto *CE = cast<CallExpr>(InitOp);
621     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
622     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
623     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
624     const auto *LHSDRE =
625         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
626     const auto *RHSDRE =
627         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
628     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
629     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
630                             [=]() { return Private; });
631     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
632                             [=]() { return Original; });
633     (void)PrivateScope.Privatize();
634     RValue Func = RValue::get(Reduction.second);
635     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
636     CGF.EmitIgnoredExpr(InitOp);
637   } else {
638     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
639     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
640     auto *GV = new llvm::GlobalVariable(
641         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
642         llvm::GlobalValue::PrivateLinkage, Init, Name);
643     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
644     RValue InitRVal;
645     switch (CGF.getEvaluationKind(Ty)) {
646     case TEK_Scalar:
647       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
648       break;
649     case TEK_Complex:
650       InitRVal =
651           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
652       break;
653     case TEK_Aggregate:
654       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
655       break;
656     }
657     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
658     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
659     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
660                          /*IsInitializer=*/false);
661   }
662 }
663 
664 /// Emit initialization of arrays of complex types.
665 /// \param DestAddr Address of the array.
666 /// \param Type Type of array.
667 /// \param Init Initial expression of array.
668 /// \param SrcAddr Address of the original array.
669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
670                                  QualType Type, bool EmitDeclareReductionInit,
671                                  const Expr *Init,
672                                  const OMPDeclareReductionDecl *DRD,
673                                  Address SrcAddr = Address::invalid()) {
674   // Perform element-by-element initialization.
675   QualType ElementTy;
676 
677   // Drill down to the base element type on both arrays.
678   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
679   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
680   DestAddr =
681       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
682   if (DRD)
683     SrcAddr =
684         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
685 
686   llvm::Value *SrcBegin = nullptr;
687   if (DRD)
688     SrcBegin = SrcAddr.getPointer();
689   llvm::Value *DestBegin = DestAddr.getPointer();
690   // Cast from pointer to array type to pointer to single element.
691   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
692   // The basic structure here is a while-do loop.
693   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
694   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
695   llvm::Value *IsEmpty =
696       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
697   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
698 
699   // Enter the loop body, making that address the current address.
700   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
701   CGF.EmitBlock(BodyBB);
702 
703   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
704 
705   llvm::PHINode *SrcElementPHI = nullptr;
706   Address SrcElementCurrent = Address::invalid();
707   if (DRD) {
708     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
709                                           "omp.arraycpy.srcElementPast");
710     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
711     SrcElementCurrent =
712         Address(SrcElementPHI,
713                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
714   }
715   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
716       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
717   DestElementPHI->addIncoming(DestBegin, EntryBB);
718   Address DestElementCurrent =
719       Address(DestElementPHI,
720               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
721 
722   // Emit copy.
723   {
724     CodeGenFunction::RunCleanupsScope InitScope(CGF);
725     if (EmitDeclareReductionInit) {
726       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
727                                        SrcElementCurrent, ElementTy);
728     } else
729       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
730                            /*IsInitializer=*/false);
731   }
732 
733   if (DRD) {
734     // Shift the address forward by one element.
735     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
736         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
737     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
738   }
739 
740   // Shift the address forward by one element.
741   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
742       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
743   // Check whether we've reached the end.
744   llvm::Value *Done =
745       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
746   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
747   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
748 
749   // Done.
750   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
751 }
752 
753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
754   return CGF.EmitOMPSharedLValue(E);
755 }
756 
757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
758                                             const Expr *E) {
759   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
760     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
761   return LValue();
762 }
763 
764 void ReductionCodeGen::emitAggregateInitialization(
765     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
766     const OMPDeclareReductionDecl *DRD) {
767   // Emit VarDecl with copy init for arrays.
768   // Get the address of the original variable captured in current
769   // captured region.
770   const auto *PrivateVD =
771       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
772   bool EmitDeclareReductionInit =
773       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
774   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
775                        EmitDeclareReductionInit,
776                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
777                                                 : PrivateVD->getInit(),
778                        DRD, SharedLVal.getAddress(CGF));
779 }
780 
781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
782                                    ArrayRef<const Expr *> Origs,
783                                    ArrayRef<const Expr *> Privates,
784                                    ArrayRef<const Expr *> ReductionOps) {
785   ClausesData.reserve(Shareds.size());
786   SharedAddresses.reserve(Shareds.size());
787   Sizes.reserve(Shareds.size());
788   BaseDecls.reserve(Shareds.size());
789   const auto *IOrig = Origs.begin();
790   const auto *IPriv = Privates.begin();
791   const auto *IRed = ReductionOps.begin();
792   for (const Expr *Ref : Shareds) {
793     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
794     std::advance(IOrig, 1);
795     std::advance(IPriv, 1);
796     std::advance(IRed, 1);
797   }
798 }
799 
800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
801   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
802          "Number of generated lvalues must be exactly N.");
803   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
804   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
805   SharedAddresses.emplace_back(First, Second);
806   if (ClausesData[N].Shared == ClausesData[N].Ref) {
807     OrigAddresses.emplace_back(First, Second);
808   } else {
809     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
810     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
811     OrigAddresses.emplace_back(First, Second);
812   }
813 }
814 
815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
816   const auto *PrivateVD =
817       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
818   QualType PrivateType = PrivateVD->getType();
819   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
820   if (!PrivateType->isVariablyModifiedType()) {
821     Sizes.emplace_back(
822         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
823         nullptr);
824     return;
825   }
826   llvm::Value *Size;
827   llvm::Value *SizeInChars;
828   auto *ElemType =
829       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
830           ->getElementType();
831   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
832   if (AsArraySection) {
833     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
834                                      OrigAddresses[N].first.getPointer(CGF));
835     Size = CGF.Builder.CreateNUWAdd(
836         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
837     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
838   } else {
839     SizeInChars =
840         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
841     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
842   }
843   Sizes.emplace_back(SizeInChars, Size);
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
853                                          llvm::Value *Size) {
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   QualType PrivateType = PrivateVD->getType();
857   if (!PrivateType->isVariablyModifiedType()) {
858     assert(!Size && !Sizes[N].second &&
859            "Size should be nullptr for non-variably modified reduction "
860            "items.");
861     return;
862   }
863   CodeGenFunction::OpaqueValueMapping OpaqueMap(
864       CGF,
865       cast<OpaqueValueExpr>(
866           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
867       RValue::get(Size));
868   CGF.EmitVariablyModifiedType(PrivateType);
869 }
870 
871 void ReductionCodeGen::emitInitialization(
872     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
873     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
874   assert(SharedAddresses.size() > N && "No variable was generated");
875   const auto *PrivateVD =
876       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
877   const OMPDeclareReductionDecl *DRD =
878       getReductionInit(ClausesData[N].ReductionOp);
879   QualType PrivateType = PrivateVD->getType();
880   PrivateAddr = CGF.Builder.CreateElementBitCast(
881       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
882   QualType SharedType = SharedAddresses[N].first.getType();
883   SharedLVal = CGF.MakeAddrLValue(
884       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
885                                        CGF.ConvertTypeForMem(SharedType)),
886       SharedType, SharedAddresses[N].first.getBaseInfo(),
887       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
888   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
889     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
890   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
891     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
892                                      PrivateAddr, SharedLVal.getAddress(CGF),
893                                      SharedLVal.getType());
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   const auto *PrivateVD =
904       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
905   QualType PrivateType = PrivateVD->getType();
906   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
907   return DTorKind != QualType::DK_none;
908 }
909 
910 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
911                                     Address PrivateAddr) {
912   const auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916   if (needCleanups(N)) {
917     PrivateAddr = CGF.Builder.CreateElementBitCast(
918         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
919     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
920   }
921 }
922 
923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
924                           LValue BaseLV) {
925   BaseTy = BaseTy.getNonReferenceType();
926   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
927          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
928     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
929       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
930     } else {
931       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
932       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
933     }
934     BaseTy = BaseTy->getPointeeType();
935   }
936   return CGF.MakeAddrLValue(
937       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
938                                        CGF.ConvertTypeForMem(ElTy)),
939       BaseLV.getType(), BaseLV.getBaseInfo(),
940       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
941 }
942 
943 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
944                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
945                           llvm::Value *Addr) {
946   Address Tmp = Address::invalid();
947   Address TopTmp = Address::invalid();
948   Address MostTopTmp = Address::invalid();
949   BaseTy = BaseTy.getNonReferenceType();
950   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
951          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
952     Tmp = CGF.CreateMemTemp(BaseTy);
953     if (TopTmp.isValid())
954       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
955     else
956       MostTopTmp = Tmp;
957     TopTmp = Tmp;
958     BaseTy = BaseTy->getPointeeType();
959   }
960   llvm::Type *Ty = BaseLVType;
961   if (Tmp.isValid())
962     Ty = Tmp.getElementType();
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
964   if (Tmp.isValid()) {
965     CGF.Builder.CreateStore(Addr, Tmp);
966     return MostTopTmp;
967   }
968   return Address(Addr, BaseLVAlignment);
969 }
970 
971 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
972   const VarDecl *OrigVD = nullptr;
973   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
974     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
975     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
976       Base = TempOASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
978       Base = TempASE->getBase()->IgnoreParenImpCasts();
979     DE = cast<DeclRefExpr>(Base);
980     OrigVD = cast<VarDecl>(DE->getDecl());
981   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
982     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
984       Base = TempASE->getBase()->IgnoreParenImpCasts();
985     DE = cast<DeclRefExpr>(Base);
986     OrigVD = cast<VarDecl>(DE->getDecl());
987   }
988   return OrigVD;
989 }
990 
991 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
992                                                Address PrivateAddr) {
993   const DeclRefExpr *DE;
994   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
995     BaseDecls.emplace_back(OrigVD);
996     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
997     LValue BaseLValue =
998         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
999                     OriginalBaseLValue);
1000     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1001         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1002     llvm::Value *PrivatePointer =
1003         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1004             PrivateAddr.getPointer(),
1005             SharedAddresses[N].first.getAddress(CGF).getType());
1006     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1007     return castToBase(CGF, OrigVD->getType(),
1008                       SharedAddresses[N].first.getType(),
1009                       OriginalBaseLValue.getAddress(CGF).getType(),
1010                       OriginalBaseLValue.getAlignment(), Ptr);
1011   }
1012   BaseDecls.emplace_back(
1013       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1014   return PrivateAddr;
1015 }
1016 
1017 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1018   const OMPDeclareReductionDecl *DRD =
1019       getReductionInit(ClausesData[N].ReductionOp);
1020   return DRD && DRD->getInitializer();
1021 }
1022 
1023 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1024   return CGF.EmitLoadOfPointerLValue(
1025       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1026       getThreadIDVariable()->getType()->castAs<PointerType>());
1027 }
1028 
1029 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1030   if (!CGF.HaveInsertPoint())
1031     return;
1032   // 1.2.2 OpenMP Language Terminology
1033   // Structured block - An executable statement with a single entry at the
1034   // top and a single exit at the bottom.
1035   // The point of exit cannot be a branch out of the structured block.
1036   // longjmp() and throw() must not violate the entry/exit criteria.
1037   CGF.EHStack.pushTerminate();
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OffloadEntriesInfoManager(CGM) {
1064   ASTContext &C = CGM.getContext();
1065   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1066   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1067   RD->startDefinition();
1068   // reserved_1
1069   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1070   // flags
1071   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1072   // reserved_2
1073   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1074   // reserved_3
1075   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1076   // psource
1077   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1078   RD->completeDefinition();
1079   IdentQTy = C.getRecordType(RD);
1080   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1081   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1082 
1083   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1084   llvm::omp::types::initializeTypes(CGM.getModule());
1085   loadOffloadInfoMetadata();
1086 }
1087 
1088 void CGOpenMPRuntime::clear() {
1089   InternalVars.clear();
1090   // Clean non-target variable declarations possibly used only in debug info.
1091   for (const auto &Data : EmittedNonTargetVariables) {
1092     if (!Data.getValue().pointsToAliveValue())
1093       continue;
1094     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1095     if (!GV)
1096       continue;
1097     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1098       continue;
1099     GV->eraseFromParent();
1100   }
1101 }
1102 
1103 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1104   SmallString<128> Buffer;
1105   llvm::raw_svector_ostream OS(Buffer);
1106   StringRef Sep = FirstSeparator;
1107   for (StringRef Part : Parts) {
1108     OS << Sep << Part;
1109     Sep = Separator;
1110   }
1111   return std::string(OS.str());
1112 }
1113 
1114 static llvm::Function *
1115 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1116                           const Expr *CombinerInitializer, const VarDecl *In,
1117                           const VarDecl *Out, bool IsCombiner) {
1118   // void .omp_combiner.(Ty *in, Ty *out);
1119   ASTContext &C = CGM.getContext();
1120   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1121   FunctionArgList Args;
1122   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1123                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1125                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   Args.push_back(&OmpOutParm);
1127   Args.push_back(&OmpInParm);
1128   const CGFunctionInfo &FnInfo =
1129       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1130   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1131   std::string Name = CGM.getOpenMPRuntime().getName(
1132       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1133   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1134                                     Name, &CGM.getModule());
1135   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1136   if (CGM.getLangOpts().Optimize) {
1137     Fn->removeFnAttr(llvm::Attribute::NoInline);
1138     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1139     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1140   }
1141   CodeGenFunction CGF(CGM);
1142   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1143   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1144   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1145                     Out->getLocation());
1146   CodeGenFunction::OMPPrivateScope Scope(CGF);
1147   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1148   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1153   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1154     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1155         .getAddress(CGF);
1156   });
1157   (void)Scope.Privatize();
1158   if (!IsCombiner && Out->hasInit() &&
1159       !CGF.isTrivialInitializer(Out->getInit())) {
1160     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1161                          Out->getType().getQualifiers(),
1162                          /*IsInitializer=*/true);
1163   }
1164   if (CombinerInitializer)
1165     CGF.EmitIgnoredExpr(CombinerInitializer);
1166   Scope.ForceCleanup();
1167   CGF.FinishFunction();
1168   return Fn;
1169 }
1170 
1171 void CGOpenMPRuntime::emitUserDefinedReduction(
1172     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1173   if (UDRMap.count(D) > 0)
1174     return;
1175   llvm::Function *Combiner = emitCombinerOrInitializer(
1176       CGM, D->getType(), D->getCombiner(),
1177       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1178       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1179       /*IsCombiner=*/true);
1180   llvm::Function *Initializer = nullptr;
1181   if (const Expr *Init = D->getInitializer()) {
1182     Initializer = emitCombinerOrInitializer(
1183         CGM, D->getType(),
1184         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1185                                                                      : nullptr,
1186         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1187         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1188         /*IsCombiner=*/false);
1189   }
1190   UDRMap.try_emplace(D, Combiner, Initializer);
1191   if (CGF) {
1192     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1193     Decls.second.push_back(D);
1194   }
1195 }
1196 
1197 std::pair<llvm::Function *, llvm::Function *>
1198 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1199   auto I = UDRMap.find(D);
1200   if (I != UDRMap.end())
1201     return I->second;
1202   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1203   return UDRMap.lookup(D);
1204 }
1205 
1206 namespace {
1207 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1208 // Builder if one is present.
1209 struct PushAndPopStackRAII {
1210   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1211                       bool HasCancel)
1212       : OMPBuilder(OMPBuilder) {
1213     if (!OMPBuilder)
1214       return;
1215 
1216     // The following callback is the crucial part of clangs cleanup process.
1217     //
1218     // NOTE:
1219     // Once the OpenMPIRBuilder is used to create parallel regions (and
1220     // similar), the cancellation destination (Dest below) is determined via
1221     // IP. That means if we have variables to finalize we split the block at IP,
1222     // use the new block (=BB) as destination to build a JumpDest (via
1223     // getJumpDestInCurrentScope(BB)) which then is fed to
1224     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1225     // to push & pop an FinalizationInfo object.
1226     // The FiniCB will still be needed but at the point where the
1227     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1228     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1229       assert(IP.getBlock()->end() == IP.getPoint() &&
1230              "Clang CG should cause non-terminated block!");
1231       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1232       CGF.Builder.restoreIP(IP);
1233       CodeGenFunction::JumpDest Dest =
1234           CGF.getOMPCancelDestination(OMPD_parallel);
1235       CGF.EmitBranchThroughCleanup(Dest);
1236     };
1237 
1238     // TODO: Remove this once we emit parallel regions through the
1239     //       OpenMPIRBuilder as it can do this setup internally.
1240     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1241         {FiniCB, OMPD_parallel, HasCancel});
1242     OMPBuilder->pushFinalizationCB(std::move(FI));
1243   }
1244   ~PushAndPopStackRAII() {
1245     if (OMPBuilder)
1246       OMPBuilder->popFinalizationCB();
1247   }
1248   llvm::OpenMPIRBuilder *OMPBuilder;
1249 };
1250 } // namespace
1251 
1252 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1253     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1254     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1255     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1256   assert(ThreadIDVar->getType()->isPointerType() &&
1257          "thread id variable must be of type kmp_int32 *");
1258   CodeGenFunction CGF(CGM, true);
1259   bool HasCancel = false;
1260   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1261     HasCancel = OPD->hasCancel();
1262   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1263     HasCancel = OPD->hasCancel();
1264   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1265     HasCancel = OPSD->hasCancel();
1266   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1267     HasCancel = OPFD->hasCancel();
1268   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275   else if (const auto *OPFD =
1276                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1277     HasCancel = OPFD->hasCancel();
1278 
1279   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1280   //       parallel region to make cancellation barriers work properly.
1281   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1282   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1283   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1284                                     HasCancel, OutlinedHelperName);
1285   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1286   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1287 }
1288 
1289 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1290     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1291     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1292   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1293   return emitParallelOrTeamsOutlinedFunction(
1294       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1295 }
1296 
1297 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1298     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1299     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1300   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1301   return emitParallelOrTeamsOutlinedFunction(
1302       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1303 }
1304 
1305 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1306     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1307     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1308     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1309     bool Tied, unsigned &NumberOfParts) {
1310   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1311                                               PrePostActionTy &) {
1312     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1313     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1314     llvm::Value *TaskArgs[] = {
1315         UpLoc, ThreadID,
1316         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1317                                     TaskTVar->getType()->castAs<PointerType>())
1318             .getPointer(CGF)};
1319     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1320                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1321                         TaskArgs);
1322   };
1323   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1324                                                             UntiedCodeGen);
1325   CodeGen.setAction(Action);
1326   assert(!ThreadIDVar->getType()->isPointerType() &&
1327          "thread id variable must be of type kmp_int32 for tasks");
1328   const OpenMPDirectiveKind Region =
1329       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1330                                                       : OMPD_task;
1331   const CapturedStmt *CS = D.getCapturedStmt(Region);
1332   bool HasCancel = false;
1333   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1336     HasCancel = TD->hasCancel();
1337   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1338     HasCancel = TD->hasCancel();
1339   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1340     HasCancel = TD->hasCancel();
1341 
1342   CodeGenFunction CGF(CGM, true);
1343   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1344                                         InnermostKind, HasCancel, Action);
1345   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1346   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1347   if (!Tied)
1348     NumberOfParts = Action.getNumberOfParts();
1349   return Res;
1350 }
1351 
1352 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1353                              const RecordDecl *RD, const CGRecordLayout &RL,
1354                              ArrayRef<llvm::Constant *> Data) {
1355   llvm::StructType *StructTy = RL.getLLVMType();
1356   unsigned PrevIdx = 0;
1357   ConstantInitBuilder CIBuilder(CGM);
1358   auto DI = Data.begin();
1359   for (const FieldDecl *FD : RD->fields()) {
1360     unsigned Idx = RL.getLLVMFieldNo(FD);
1361     // Fill the alignment.
1362     for (unsigned I = PrevIdx; I < Idx; ++I)
1363       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1364     PrevIdx = Idx + 1;
1365     Fields.add(*DI);
1366     ++DI;
1367   }
1368 }
1369 
1370 template <class... As>
1371 static llvm::GlobalVariable *
1372 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1373                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1374                    As &&... Args) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantInitBuilder CIBuilder(CGM);
1378   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1379   buildStructValue(Fields, CGM, RD, RL, Data);
1380   return Fields.finishAndCreateGlobal(
1381       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1382       std::forward<As>(Args)...);
1383 }
1384 
1385 template <typename T>
1386 static void
1387 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1388                                          ArrayRef<llvm::Constant *> Data,
1389                                          T &Parent) {
1390   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1391   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1392   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1393   buildStructValue(Fields, CGM, RD, RL, Data);
1394   Fields.finishAndAddTo(Parent);
1395 }
1396 
1397 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1398   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1399   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1400   FlagsTy FlagsKey(Flags, Reserved2Flags);
1401   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1402   if (!Entry) {
1403     if (!DefaultOpenMPPSource) {
1404       // Initialize default location for psource field of ident_t structure of
1405       // all ident_t objects. Format is ";file;function;line;column;;".
1406       // Taken from
1407       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1408       DefaultOpenMPPSource =
1409           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1410       DefaultOpenMPPSource =
1411           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1412     }
1413 
1414     llvm::Constant *Data[] = {
1415         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1416         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1417         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1418         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1419     llvm::GlobalValue *DefaultOpenMPLocation =
1420         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1421                            llvm::GlobalValue::PrivateLinkage);
1422     DefaultOpenMPLocation->setUnnamedAddr(
1423         llvm::GlobalValue::UnnamedAddr::Global);
1424 
1425     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1426   }
1427   return Address(Entry, Align);
1428 }
1429 
1430 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1431                                              bool AtCurrentPoint) {
1432   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1433   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1434 
1435   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1436   if (AtCurrentPoint) {
1437     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1438         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1439   } else {
1440     Elem.second.ServiceInsertPt =
1441         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1442     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1443   }
1444 }
1445 
1446 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1447   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1448   if (Elem.second.ServiceInsertPt) {
1449     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1450     Elem.second.ServiceInsertPt = nullptr;
1451     Ptr->eraseFromParent();
1452   }
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1456                                                  SourceLocation Loc,
1457                                                  unsigned Flags) {
1458   Flags |= OMP_IDENT_KMPC;
1459   // If no debug info is generated - return global default location.
1460   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1461       Loc.isInvalid())
1462     return getOrCreateDefaultLocation(Flags).getPointer();
1463 
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 
1466   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1467   Address LocValue = Address::invalid();
1468   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1469   if (I != OpenMPLocThreadIDMap.end())
1470     LocValue = Address(I->second.DebugLoc, Align);
1471 
1472   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1473   // GetOpenMPThreadID was called before this routine.
1474   if (!LocValue.isValid()) {
1475     // Generate "ident_t .kmpc_loc.addr;"
1476     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1477     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1478     Elem.second.DebugLoc = AI.getPointer();
1479     LocValue = AI;
1480 
1481     if (!Elem.second.ServiceInsertPt)
1482       setLocThreadIdInsertPt(CGF);
1483     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1484     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1485     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1486                              CGF.getTypeSize(IdentQTy));
1487   }
1488 
1489   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1490   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1491   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1492   LValue PSource =
1493       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1494 
1495   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1496   if (OMPDebugLoc == nullptr) {
1497     SmallString<128> Buffer2;
1498     llvm::raw_svector_ostream OS2(Buffer2);
1499     // Build debug location
1500     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1501     OS2 << ";" << PLoc.getFilename() << ";";
1502     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1503       OS2 << FD->getQualifiedNameAsString();
1504     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1505     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1506     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1507   }
1508   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1509   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1510 
1511   // Our callers always pass this to a runtime function, so for
1512   // convenience, go ahead and return a naked pointer.
1513   return LocValue.getPointer();
1514 }
1515 
1516 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1517                                           SourceLocation Loc) {
1518   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1519 
1520   llvm::Value *ThreadID = nullptr;
1521   // Check whether we've already cached a load of the thread id in this
1522   // function.
1523   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1524   if (I != OpenMPLocThreadIDMap.end()) {
1525     ThreadID = I->second.ThreadID;
1526     if (ThreadID != nullptr)
1527       return ThreadID;
1528   }
1529   // If exceptions are enabled, do not use parameter to avoid possible crash.
1530   if (auto *OMPRegionInfo =
1531           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1532     if (OMPRegionInfo->getThreadIDVariable()) {
1533       // Check if this an outlined function with thread id passed as argument.
1534       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1535       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1536       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1537           !CGF.getLangOpts().CXXExceptions ||
1538           CGF.Builder.GetInsertBlock() == TopBlock ||
1539           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1540           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1541               TopBlock ||
1542           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1543               CGF.Builder.GetInsertBlock()) {
1544         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1545         // If value loaded in entry block, cache it and use it everywhere in
1546         // function.
1547         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1548           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1549           Elem.second.ThreadID = ThreadID;
1550         }
1551         return ThreadID;
1552       }
1553     }
1554   }
1555 
1556   // This is not an outlined function region - need to call __kmpc_int32
1557   // kmpc_global_thread_num(ident_t *loc).
1558   // Generate thread id value and cache this value for use across the
1559   // function.
1560   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1561   if (!Elem.second.ServiceInsertPt)
1562     setLocThreadIdInsertPt(CGF);
1563   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1564   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1565   llvm::CallInst *Call = CGF.Builder.CreateCall(
1566       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1567           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1568       emitUpdateLocation(CGF, Loc));
1569   Call->setCallingConv(CGF.getRuntimeCC());
1570   Elem.second.ThreadID = Call;
1571   return Call;
1572 }
1573 
1574 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1575   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1577     clearLocThreadIdInsertPt(CGF);
1578     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1579   }
1580   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1581     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1582       UDRMap.erase(D);
1583     FunctionUDRMap.erase(CGF.CurFn);
1584   }
1585   auto I = FunctionUDMMap.find(CGF.CurFn);
1586   if (I != FunctionUDMMap.end()) {
1587     for(const auto *D : I->second)
1588       UDMMap.erase(D);
1589     FunctionUDMMap.erase(I);
1590   }
1591   LastprivateConditionalToTypes.erase(CGF.CurFn);
1592 }
1593 
1594 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1595   return IdentTy->getPointerTo();
1596 }
1597 
1598 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1599   if (!Kmpc_MicroTy) {
1600     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1601     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1602                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1603     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1604   }
1605   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1606 }
1607 
1608 llvm::FunctionCallee
1609 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1610   assert((IVSize == 32 || IVSize == 64) &&
1611          "IV size is not compatible with the omp runtime");
1612   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1613                                             : "__kmpc_for_static_init_4u")
1614                                 : (IVSigned ? "__kmpc_for_static_init_8"
1615                                             : "__kmpc_for_static_init_8u");
1616   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1617   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1618   llvm::Type *TypeParams[] = {
1619     getIdentTyPointerTy(),                     // loc
1620     CGM.Int32Ty,                               // tid
1621     CGM.Int32Ty,                               // schedtype
1622     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1623     PtrTy,                                     // p_lower
1624     PtrTy,                                     // p_upper
1625     PtrTy,                                     // p_stride
1626     ITy,                                       // incr
1627     ITy                                        // chunk
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 llvm::FunctionCallee
1635 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1636   assert((IVSize == 32 || IVSize == 64) &&
1637          "IV size is not compatible with the omp runtime");
1638   StringRef Name =
1639       IVSize == 32
1640           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1641           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1642   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1643   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1644                                CGM.Int32Ty,           // tid
1645                                CGM.Int32Ty,           // schedtype
1646                                ITy,                   // lower
1647                                ITy,                   // upper
1648                                ITy,                   // stride
1649                                ITy                    // chunk
1650   };
1651   auto *FnTy =
1652       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1653   return CGM.CreateRuntimeFunction(FnTy, Name);
1654 }
1655 
1656 llvm::FunctionCallee
1657 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1658   assert((IVSize == 32 || IVSize == 64) &&
1659          "IV size is not compatible with the omp runtime");
1660   StringRef Name =
1661       IVSize == 32
1662           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1663           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1664   llvm::Type *TypeParams[] = {
1665       getIdentTyPointerTy(), // loc
1666       CGM.Int32Ty,           // tid
1667   };
1668   auto *FnTy =
1669       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1670   return CGM.CreateRuntimeFunction(FnTy, Name);
1671 }
1672 
1673 llvm::FunctionCallee
1674 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1675   assert((IVSize == 32 || IVSize == 64) &&
1676          "IV size is not compatible with the omp runtime");
1677   StringRef Name =
1678       IVSize == 32
1679           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1680           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1681   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1682   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1683   llvm::Type *TypeParams[] = {
1684     getIdentTyPointerTy(),                     // loc
1685     CGM.Int32Ty,                               // tid
1686     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1687     PtrTy,                                     // p_lower
1688     PtrTy,                                     // p_upper
1689     PtrTy                                      // p_stride
1690   };
1691   auto *FnTy =
1692       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1693   return CGM.CreateRuntimeFunction(FnTy, Name);
1694 }
1695 
1696 /// Obtain information that uniquely identifies a target entry. This
1697 /// consists of the file and device IDs as well as line number associated with
1698 /// the relevant entry source location.
1699 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1700                                      unsigned &DeviceID, unsigned &FileID,
1701                                      unsigned &LineNum) {
1702   SourceManager &SM = C.getSourceManager();
1703 
1704   // The loc should be always valid and have a file ID (the user cannot use
1705   // #pragma directives in macros)
1706 
1707   assert(Loc.isValid() && "Source location is expected to be always valid.");
1708 
1709   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1710   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1711 
1712   llvm::sys::fs::UniqueID ID;
1713   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1714     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1715         << PLoc.getFilename() << EC.message();
1716 
1717   DeviceID = ID.getDevice();
1718   FileID = ID.getFile();
1719   LineNum = PLoc.getLine();
1720 }
1721 
1722 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1723   if (CGM.getLangOpts().OpenMPSimd)
1724     return Address::invalid();
1725   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1726       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1727   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1728               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1729                HasRequiresUnifiedSharedMemory))) {
1730     SmallString<64> PtrName;
1731     {
1732       llvm::raw_svector_ostream OS(PtrName);
1733       OS << CGM.getMangledName(GlobalDecl(VD));
1734       if (!VD->isExternallyVisible()) {
1735         unsigned DeviceID, FileID, Line;
1736         getTargetEntryUniqueInfo(CGM.getContext(),
1737                                  VD->getCanonicalDecl()->getBeginLoc(),
1738                                  DeviceID, FileID, Line);
1739         OS << llvm::format("_%x", FileID);
1740       }
1741       OS << "_decl_tgt_ref_ptr";
1742     }
1743     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1744     if (!Ptr) {
1745       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1746       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1747                                         PtrName);
1748 
1749       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1750       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1751 
1752       if (!CGM.getLangOpts().OpenMPIsDevice)
1753         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1754       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1755     }
1756     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1757   }
1758   return Address::invalid();
1759 }
1760 
1761 llvm::Constant *
1762 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1763   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1764          !CGM.getContext().getTargetInfo().isTLSSupported());
1765   // Lookup the entry, lazily creating it if necessary.
1766   std::string Suffix = getName({"cache", ""});
1767   return getOrCreateInternalVariable(
1768       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1769 }
1770 
1771 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1772                                                 const VarDecl *VD,
1773                                                 Address VDAddr,
1774                                                 SourceLocation Loc) {
1775   if (CGM.getLangOpts().OpenMPUseTLS &&
1776       CGM.getContext().getTargetInfo().isTLSSupported())
1777     return VDAddr;
1778 
1779   llvm::Type *VarTy = VDAddr.getElementType();
1780   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1781                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1782                                                        CGM.Int8PtrTy),
1783                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1784                          getOrCreateThreadPrivateCache(VD)};
1785   return Address(CGF.EmitRuntimeCall(
1786                      llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1787                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1788                      Args),
1789                  VDAddr.getAlignment());
1790 }
1791 
1792 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1793     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1794     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1795   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1796   // library.
1797   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1798   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1799                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1800                       OMPLoc);
1801   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1802   // to register constructor/destructor for variable.
1803   llvm::Value *Args[] = {
1804       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1805       Ctor, CopyCtor, Dtor};
1806   CGF.EmitRuntimeCall(
1807       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
1808           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1809       Args);
1810 }
1811 
1812 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1813     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1814     bool PerformInit, CodeGenFunction *CGF) {
1815   if (CGM.getLangOpts().OpenMPUseTLS &&
1816       CGM.getContext().getTargetInfo().isTLSSupported())
1817     return nullptr;
1818 
1819   VD = VD->getDefinition(CGM.getContext());
1820   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1821     QualType ASTTy = VD->getType();
1822 
1823     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1824     const Expr *Init = VD->getAnyInitializer();
1825     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1826       // Generate function that re-emits the declaration's initializer into the
1827       // threadprivate copy of the variable VD
1828       CodeGenFunction CtorCGF(CGM);
1829       FunctionArgList Args;
1830       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1832                             ImplicitParamDecl::Other);
1833       Args.push_back(&Dst);
1834 
1835       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836           CGM.getContext().VoidPtrTy, Args);
1837       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838       std::string Name = getName({"__kmpc_global_ctor_", ""});
1839       llvm::Function *Fn =
1840           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1841       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1842                             Args, Loc, Loc);
1843       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1844           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1845           CGM.getContext().VoidPtrTy, Dst.getLocation());
1846       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1847       Arg = CtorCGF.Builder.CreateElementBitCast(
1848           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1849       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1850                                /*IsInitializer=*/true);
1851       ArgVal = CtorCGF.EmitLoadOfScalar(
1852           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1853           CGM.getContext().VoidPtrTy, Dst.getLocation());
1854       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1855       CtorCGF.FinishFunction();
1856       Ctor = Fn;
1857     }
1858     if (VD->getType().isDestructedType() != QualType::DK_none) {
1859       // Generate function that emits destructor call for the threadprivate copy
1860       // of the variable VD
1861       CodeGenFunction DtorCGF(CGM);
1862       FunctionArgList Args;
1863       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1864                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1865                             ImplicitParamDecl::Other);
1866       Args.push_back(&Dst);
1867 
1868       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1869           CGM.getContext().VoidTy, Args);
1870       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1871       std::string Name = getName({"__kmpc_global_dtor_", ""});
1872       llvm::Function *Fn =
1873           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1874       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1875       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1876                             Loc, Loc);
1877       // Create a scope with an artificial location for the body of this function.
1878       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1879       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1880           DtorCGF.GetAddrOfLocalVar(&Dst),
1881           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1882       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1883                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1884                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1885       DtorCGF.FinishFunction();
1886       Dtor = Fn;
1887     }
1888     // Do not emit init function if it is not required.
1889     if (!Ctor && !Dtor)
1890       return nullptr;
1891 
1892     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1893     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1894                                                /*isVarArg=*/false)
1895                            ->getPointerTo();
1896     // Copying constructor for the threadprivate variable.
1897     // Must be NULL - reserved by runtime, but currently it requires that this
1898     // parameter is always NULL. Otherwise it fires assertion.
1899     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1900     if (Ctor == nullptr) {
1901       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1902                                              /*isVarArg=*/false)
1903                          ->getPointerTo();
1904       Ctor = llvm::Constant::getNullValue(CtorTy);
1905     }
1906     if (Dtor == nullptr) {
1907       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1908                                              /*isVarArg=*/false)
1909                          ->getPointerTo();
1910       Dtor = llvm::Constant::getNullValue(DtorTy);
1911     }
1912     if (!CGF) {
1913       auto *InitFunctionTy =
1914           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1915       std::string Name = getName({"__omp_threadprivate_init_", ""});
1916       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1917           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1918       CodeGenFunction InitCGF(CGM);
1919       FunctionArgList ArgList;
1920       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1921                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1922                             Loc, Loc);
1923       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1924       InitCGF.FinishFunction();
1925       return InitFunction;
1926     }
1927     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1928   }
1929   return nullptr;
1930 }
1931 
1932 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1933                                                      llvm::GlobalVariable *Addr,
1934                                                      bool PerformInit) {
1935   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1936       !CGM.getLangOpts().OpenMPIsDevice)
1937     return false;
1938   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1939       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1940   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1941       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1942        HasRequiresUnifiedSharedMemory))
1943     return CGM.getLangOpts().OpenMPIsDevice;
1944   VD = VD->getDefinition(CGM.getContext());
1945   assert(VD && "Unknown VarDecl");
1946 
1947   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1948     return CGM.getLangOpts().OpenMPIsDevice;
1949 
1950   QualType ASTTy = VD->getType();
1951   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1952 
1953   // Produce the unique prefix to identify the new target regions. We use
1954   // the source location of the variable declaration which we know to not
1955   // conflict with any target region.
1956   unsigned DeviceID;
1957   unsigned FileID;
1958   unsigned Line;
1959   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1960   SmallString<128> Buffer, Out;
1961   {
1962     llvm::raw_svector_ostream OS(Buffer);
1963     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1964        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1965   }
1966 
1967   const Expr *Init = VD->getAnyInitializer();
1968   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1969     llvm::Constant *Ctor;
1970     llvm::Constant *ID;
1971     if (CGM.getLangOpts().OpenMPIsDevice) {
1972       // Generate function that re-emits the declaration's initializer into
1973       // the threadprivate copy of the variable VD
1974       CodeGenFunction CtorCGF(CGM);
1975 
1976       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1979           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1980       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1981       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982                             FunctionArgList(), Loc, Loc);
1983       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1984       CtorCGF.EmitAnyExprToMem(Init,
1985                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1986                                Init->getType().getQualifiers(),
1987                                /*IsInitializer=*/true);
1988       CtorCGF.FinishFunction();
1989       Ctor = Fn;
1990       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1991       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1992     } else {
1993       Ctor = new llvm::GlobalVariable(
1994           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1995           llvm::GlobalValue::PrivateLinkage,
1996           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1997       ID = Ctor;
1998     }
1999 
2000     // Register the information for the entry associated with the constructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2005   }
2006   if (VD->getType().isDestructedType() != QualType::DK_none) {
2007     llvm::Constant *Dtor;
2008     llvm::Constant *ID;
2009     if (CGM.getLangOpts().OpenMPIsDevice) {
2010       // Generate function that emits destructor call for the threadprivate
2011       // copy of the variable VD
2012       CodeGenFunction DtorCGF(CGM);
2013 
2014       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2015       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2016       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2017           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2018       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2019       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2020                             FunctionArgList(), Loc, Loc);
2021       // Create a scope with an artificial location for the body of this
2022       // function.
2023       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2024       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2025                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2026                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2027       DtorCGF.FinishFunction();
2028       Dtor = Fn;
2029       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2030       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2031     } else {
2032       Dtor = new llvm::GlobalVariable(
2033           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2034           llvm::GlobalValue::PrivateLinkage,
2035           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2036       ID = Dtor;
2037     }
2038     // Register the information for the entry associated with the destructor.
2039     Out.clear();
2040     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2041         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2042         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2043   }
2044   return CGM.getLangOpts().OpenMPIsDevice;
2045 }
2046 
2047 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2048                                                           QualType VarType,
2049                                                           StringRef Name) {
2050   std::string Suffix = getName({"artificial", ""});
2051   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2052   llvm::Value *GAddr =
2053       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2054   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2055       CGM.getTarget().isTLSSupported()) {
2056     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2057     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2058   }
2059   std::string CacheSuffix = getName({"cache", ""});
2060   llvm::Value *Args[] = {
2061       emitUpdateLocation(CGF, SourceLocation()),
2062       getThreadID(CGF, SourceLocation()),
2063       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2064       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2065                                 /*isSigned=*/false),
2066       getOrCreateInternalVariable(
2067           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2068   return Address(
2069       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2070           CGF.EmitRuntimeCall(
2071               llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2072                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2073               Args),
2074           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2075       CGM.getContext().getTypeAlignInChars(VarType));
2076 }
2077 
2078 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2079                                    const RegionCodeGenTy &ThenGen,
2080                                    const RegionCodeGenTy &ElseGen) {
2081   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2082 
2083   // If the condition constant folds and can be elided, try to avoid emitting
2084   // the condition and the dead arm of the if/else.
2085   bool CondConstant;
2086   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2087     if (CondConstant)
2088       ThenGen(CGF);
2089     else
2090       ElseGen(CGF);
2091     return;
2092   }
2093 
2094   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2095   // emit the conditional branch.
2096   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2097   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2098   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2099   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2100 
2101   // Emit the 'then' code.
2102   CGF.EmitBlock(ThenBlock);
2103   ThenGen(CGF);
2104   CGF.EmitBranch(ContBlock);
2105   // Emit the 'else' code if present.
2106   // There is no need to emit line number for unconditional branch.
2107   (void)ApplyDebugLocation::CreateEmpty(CGF);
2108   CGF.EmitBlock(ElseBlock);
2109   ElseGen(CGF);
2110   // There is no need to emit line number for unconditional branch.
2111   (void)ApplyDebugLocation::CreateEmpty(CGF);
2112   CGF.EmitBranch(ContBlock);
2113   // Emit the continuation block for code after the if.
2114   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2115 }
2116 
2117 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2118                                        llvm::Function *OutlinedFn,
2119                                        ArrayRef<llvm::Value *> CapturedVars,
2120                                        const Expr *IfCond) {
2121   if (!CGF.HaveInsertPoint())
2122     return;
2123   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2124   auto &M = CGM.getModule();
2125   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2126                                                          PrePostActionTy &) {
2127     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2128     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2129     llvm::Value *Args[] = {
2130         RTLoc,
2131         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2132         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2133     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2134     RealArgs.append(std::begin(Args), std::end(Args));
2135     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2136 
2137     llvm::FunctionCallee RTLFn =
2138         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2139             M, OMPRTL___kmpc_fork_call);
2140     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2141   };
2142   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2143                     Loc](CodeGenFunction &CGF, PrePostActionTy &) {
2144     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2145     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2146     // Build calls:
2147     // __kmpc_serialized_parallel(&Loc, GTid);
2148     llvm::Value *Args[] = {RTLoc, ThreadID};
2149     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2150                             M, OMPRTL___kmpc_serialized_parallel),
2151                         Args);
2152 
2153     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2154     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2155     Address ZeroAddrBound =
2156         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2157                                          /*Name=*/".bound.zero.addr");
2158     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2159     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2160     // ThreadId for serialized parallels is 0.
2161     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2162     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2163     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2164     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2165 
2166     // __kmpc_end_serialized_parallel(&Loc, GTid);
2167     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2168     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2169                             M, OMPRTL___kmpc_end_serialized_parallel),
2170                         EndArgs);
2171   };
2172   if (IfCond) {
2173     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2174   } else {
2175     RegionCodeGenTy ThenRCG(ThenGen);
2176     ThenRCG(CGF);
2177   }
2178 }
2179 
2180 // If we're inside an (outlined) parallel region, use the region info's
2181 // thread-ID variable (it is passed in a first argument of the outlined function
2182 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2183 // regular serial code region, get thread ID by calling kmp_int32
2184 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2185 // return the address of that temp.
2186 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2187                                              SourceLocation Loc) {
2188   if (auto *OMPRegionInfo =
2189           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2190     if (OMPRegionInfo->getThreadIDVariable())
2191       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2192 
2193   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2194   QualType Int32Ty =
2195       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2196   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2197   CGF.EmitStoreOfScalar(ThreadID,
2198                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2199 
2200   return ThreadIDTemp;
2201 }
2202 
2203 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2204     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2205   SmallString<256> Buffer;
2206   llvm::raw_svector_ostream Out(Buffer);
2207   Out << Name;
2208   StringRef RuntimeName = Out.str();
2209   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2210   if (Elem.second) {
2211     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2212            "OMP internal variable has different type than requested");
2213     return &*Elem.second;
2214   }
2215 
2216   return Elem.second = new llvm::GlobalVariable(
2217              CGM.getModule(), Ty, /*IsConstant*/ false,
2218              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2219              Elem.first(), /*InsertBefore=*/nullptr,
2220              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2221 }
2222 
2223 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2224   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2225   std::string Name = getName({Prefix, "var"});
2226   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2227 }
2228 
2229 namespace {
2230 /// Common pre(post)-action for different OpenMP constructs.
2231 class CommonActionTy final : public PrePostActionTy {
2232   llvm::FunctionCallee EnterCallee;
2233   ArrayRef<llvm::Value *> EnterArgs;
2234   llvm::FunctionCallee ExitCallee;
2235   ArrayRef<llvm::Value *> ExitArgs;
2236   bool Conditional;
2237   llvm::BasicBlock *ContBlock = nullptr;
2238 
2239 public:
2240   CommonActionTy(llvm::FunctionCallee EnterCallee,
2241                  ArrayRef<llvm::Value *> EnterArgs,
2242                  llvm::FunctionCallee ExitCallee,
2243                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2244       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2245         ExitArgs(ExitArgs), Conditional(Conditional) {}
2246   void Enter(CodeGenFunction &CGF) override {
2247     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2248     if (Conditional) {
2249       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2250       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2251       ContBlock = CGF.createBasicBlock("omp_if.end");
2252       // Generate the branch (If-stmt)
2253       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2254       CGF.EmitBlock(ThenBlock);
2255     }
2256   }
2257   void Done(CodeGenFunction &CGF) {
2258     // Emit the rest of blocks/branches
2259     CGF.EmitBranch(ContBlock);
2260     CGF.EmitBlock(ContBlock, true);
2261   }
2262   void Exit(CodeGenFunction &CGF) override {
2263     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2264   }
2265 };
2266 } // anonymous namespace
2267 
2268 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2269                                          StringRef CriticalName,
2270                                          const RegionCodeGenTy &CriticalOpGen,
2271                                          SourceLocation Loc, const Expr *Hint) {
2272   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2273   // CriticalOpGen();
2274   // __kmpc_end_critical(ident_t *, gtid, Lock);
2275   // Prepare arguments and build a call to __kmpc_critical
2276   if (!CGF.HaveInsertPoint())
2277     return;
2278   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2279                          getCriticalRegionLock(CriticalName)};
2280   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2281                                                 std::end(Args));
2282   if (Hint) {
2283     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2284         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2285   }
2286   CommonActionTy Action(
2287       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2288           CGM.getModule(),
2289           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2290       EnterArgs,
2291       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2292           CGM.getModule(), OMPRTL___kmpc_end_critical),
2293       Args);
2294   CriticalOpGen.setAction(Action);
2295   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2296 }
2297 
2298 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2299                                        const RegionCodeGenTy &MasterOpGen,
2300                                        SourceLocation Loc) {
2301   if (!CGF.HaveInsertPoint())
2302     return;
2303   // if(__kmpc_master(ident_t *, gtid)) {
2304   //   MasterOpGen();
2305   //   __kmpc_end_master(ident_t *, gtid);
2306   // }
2307   // Prepare arguments and build a call to __kmpc_master
2308   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2309   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2310                             CGM.getModule(), OMPRTL___kmpc_master),
2311                         Args,
2312                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2313                             CGM.getModule(), OMPRTL___kmpc_end_master),
2314                         Args,
2315                         /*Conditional=*/true);
2316   MasterOpGen.setAction(Action);
2317   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2318   Action.Done(CGF);
2319 }
2320 
2321 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2322                                         SourceLocation Loc) {
2323   if (!CGF.HaveInsertPoint())
2324     return;
2325   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2326   if (OMPBuilder) {
2327     OMPBuilder->CreateTaskyield(CGF.Builder);
2328   } else {
2329     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330     llvm::Value *Args[] = {
2331         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2334                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335                         Args);
2336   }
2337 
2338   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339     Region->emitUntiedSwitch(CGF);
2340 }
2341 
2342 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343                                           const RegionCodeGenTy &TaskgroupOpGen,
2344                                           SourceLocation Loc) {
2345   if (!CGF.HaveInsertPoint())
2346     return;
2347   // __kmpc_taskgroup(ident_t *, gtid);
2348   // TaskgroupOpGen();
2349   // __kmpc_end_taskgroup(ident_t *, gtid);
2350   // Prepare arguments and build a call to __kmpc_taskgroup
2351   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2353                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354                         Args,
2355                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2356                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357                         Args);
2358   TaskgroupOpGen.setAction(Action);
2359   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360 }
2361 
2362 /// Given an array of pointers to variables, project the address of a
2363 /// given variable.
2364 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365                                       unsigned Index, const VarDecl *Var) {
2366   // Pull out the pointer to the variable.
2367   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369 
2370   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2371   Addr = CGF.Builder.CreateElementBitCast(
2372       Addr, CGF.ConvertTypeForMem(Var->getType()));
2373   return Addr;
2374 }
2375 
2376 static llvm::Value *emitCopyprivateCopyFunction(
2377     CodeGenModule &CGM, llvm::Type *ArgsType,
2378     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2379     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2380     SourceLocation Loc) {
2381   ASTContext &C = CGM.getContext();
2382   // void copy_func(void *LHSArg, void *RHSArg);
2383   FunctionArgList Args;
2384   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387                            ImplicitParamDecl::Other);
2388   Args.push_back(&LHSArg);
2389   Args.push_back(&RHSArg);
2390   const auto &CGFI =
2391       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2392   std::string Name =
2393       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2394   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2395                                     llvm::GlobalValue::InternalLinkage, Name,
2396                                     &CGM.getModule());
2397   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2398   Fn->setDoesNotRecurse();
2399   CodeGenFunction CGF(CGM);
2400   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2401   // Dest = (void*[n])(LHSArg);
2402   // Src = (void*[n])(RHSArg);
2403   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2404       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2405       ArgsType), CGF.getPointerAlign());
2406   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2408       ArgsType), CGF.getPointerAlign());
2409   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411   // ...
2412   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414     const auto *DestVar =
2415         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417 
2418     const auto *SrcVar =
2419         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421 
2422     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423     QualType Type = VD->getType();
2424     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425   }
2426   CGF.FinishFunction();
2427   return Fn;
2428 }
2429 
2430 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431                                        const RegionCodeGenTy &SingleOpGen,
2432                                        SourceLocation Loc,
2433                                        ArrayRef<const Expr *> CopyprivateVars,
2434                                        ArrayRef<const Expr *> SrcExprs,
2435                                        ArrayRef<const Expr *> DstExprs,
2436                                        ArrayRef<const Expr *> AssignmentOps) {
2437   if (!CGF.HaveInsertPoint())
2438     return;
2439   assert(CopyprivateVars.size() == SrcExprs.size() &&
2440          CopyprivateVars.size() == DstExprs.size() &&
2441          CopyprivateVars.size() == AssignmentOps.size());
2442   ASTContext &C = CGM.getContext();
2443   // int32 did_it = 0;
2444   // if(__kmpc_single(ident_t *, gtid)) {
2445   //   SingleOpGen();
2446   //   __kmpc_end_single(ident_t *, gtid);
2447   //   did_it = 1;
2448   // }
2449   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450   // <copy_func>, did_it);
2451 
2452   Address DidIt = Address::invalid();
2453   if (!CopyprivateVars.empty()) {
2454     // int32 did_it = 0;
2455     QualType KmpInt32Ty =
2456         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459   }
2460   // Prepare arguments and build a call to __kmpc_single
2461   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462   CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2463                             CGM.getModule(), OMPRTL___kmpc_single),
2464                         Args,
2465                         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2466                             CGM.getModule(), OMPRTL___kmpc_end_single),
2467                         Args,
2468                         /*Conditional=*/true);
2469   SingleOpGen.setAction(Action);
2470   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471   if (DidIt.isValid()) {
2472     // did_it = 1;
2473     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474   }
2475   Action.Done(CGF);
2476   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477   // <copy_func>, did_it);
2478   if (DidIt.isValid()) {
2479     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480     QualType CopyprivateArrayTy = C.getConstantArrayType(
2481         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482         /*IndexTypeQuals=*/0);
2483     // Create a list of all private variables for copyprivate.
2484     Address CopyprivateList =
2485         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488       CGF.Builder.CreateStore(
2489           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491               CGF.VoidPtrTy),
2492           Elem);
2493     }
2494     // Build function that copies private values from single region to all other
2495     // threads in the corresponding parallel region.
2496     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500     Address CL =
2501       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2502                                                       CGF.VoidPtrTy);
2503     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2504     llvm::Value *Args[] = {
2505         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2506         getThreadID(CGF, Loc),        // i32 <gtid>
2507         BufSize,                      // size_t <buf_size>
2508         CL.getPointer(),              // void *<copyprivate list>
2509         CpyFn,                        // void (*) (void *, void *) <copy_func>
2510         DidItVal                      // i32 did_it
2511     };
2512     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2513                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2514                         Args);
2515   }
2516 }
2517 
2518 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2519                                         const RegionCodeGenTy &OrderedOpGen,
2520                                         SourceLocation Loc, bool IsThreads) {
2521   if (!CGF.HaveInsertPoint())
2522     return;
2523   // __kmpc_ordered(ident_t *, gtid);
2524   // OrderedOpGen();
2525   // __kmpc_end_ordered(ident_t *, gtid);
2526   // Prepare arguments and build a call to __kmpc_ordered
2527   if (IsThreads) {
2528     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2529     CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2530                               CGM.getModule(), OMPRTL___kmpc_ordered),
2531                           Args,
2532                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2533                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2534                           Args);
2535     OrderedOpGen.setAction(Action);
2536     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2537     return;
2538   }
2539   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540 }
2541 
2542 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2543   unsigned Flags;
2544   if (Kind == OMPD_for)
2545     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2546   else if (Kind == OMPD_sections)
2547     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2548   else if (Kind == OMPD_single)
2549     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2550   else if (Kind == OMPD_barrier)
2551     Flags = OMP_IDENT_BARRIER_EXPL;
2552   else
2553     Flags = OMP_IDENT_BARRIER_IMPL;
2554   return Flags;
2555 }
2556 
2557 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2558     CodeGenFunction &CGF, const OMPLoopDirective &S,
2559     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2560   // Check if the loop directive is actually a doacross loop directive. In this
2561   // case choose static, 1 schedule.
2562   if (llvm::any_of(
2563           S.getClausesOfKind<OMPOrderedClause>(),
2564           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2565     ScheduleKind = OMPC_SCHEDULE_static;
2566     // Chunk size is 1 in this case.
2567     llvm::APInt ChunkSize(32, 1);
2568     ChunkExpr = IntegerLiteral::Create(
2569         CGF.getContext(), ChunkSize,
2570         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2571         SourceLocation());
2572   }
2573 }
2574 
2575 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2576                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2577                                       bool ForceSimpleCall) {
2578   // Check if we should use the OMPBuilder
2579   auto *OMPRegionInfo =
2580       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2581   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2582   if (OMPBuilder) {
2583     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
2584         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2585     return;
2586   }
2587 
2588   if (!CGF.HaveInsertPoint())
2589     return;
2590   // Build call __kmpc_cancel_barrier(loc, thread_id);
2591   // Build call __kmpc_barrier(loc, thread_id);
2592   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2593   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2594   // thread_id);
2595   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2596                          getThreadID(CGF, Loc)};
2597   if (OMPRegionInfo) {
2598     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2599       llvm::Value *Result = CGF.EmitRuntimeCall(
2600           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2601               CGM.getModule(), OMPRTL___kmpc_cancel_barrier),
2602           Args);
2603       if (EmitChecks) {
2604         // if (__kmpc_cancel_barrier()) {
2605         //   exit from construct;
2606         // }
2607         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2608         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2609         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2610         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2611         CGF.EmitBlock(ExitBB);
2612         //   exit from construct;
2613         CodeGenFunction::JumpDest CancelDestination =
2614             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2615         CGF.EmitBranchThroughCleanup(CancelDestination);
2616         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2617       }
2618       return;
2619     }
2620   }
2621   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2622                           CGM.getModule(), OMPRTL___kmpc_barrier),
2623                       Args);
2624 }
2625 
2626 /// Map the OpenMP loop schedule to the runtime enumeration.
2627 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2628                                           bool Chunked, bool Ordered) {
2629   switch (ScheduleKind) {
2630   case OMPC_SCHEDULE_static:
2631     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2632                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2633   case OMPC_SCHEDULE_dynamic:
2634     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2635   case OMPC_SCHEDULE_guided:
2636     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2637   case OMPC_SCHEDULE_runtime:
2638     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2639   case OMPC_SCHEDULE_auto:
2640     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2641   case OMPC_SCHEDULE_unknown:
2642     assert(!Chunked && "chunk was specified but schedule kind not known");
2643     return Ordered ? OMP_ord_static : OMP_sch_static;
2644   }
2645   llvm_unreachable("Unexpected runtime schedule");
2646 }
2647 
2648 /// Map the OpenMP distribute schedule to the runtime enumeration.
2649 static OpenMPSchedType
2650 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2651   // only static is allowed for dist_schedule
2652   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2653 }
2654 
2655 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2656                                          bool Chunked) const {
2657   OpenMPSchedType Schedule =
2658       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2659   return Schedule == OMP_sch_static;
2660 }
2661 
2662 bool CGOpenMPRuntime::isStaticNonchunked(
2663     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2664   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2665   return Schedule == OMP_dist_sch_static;
2666 }
2667 
2668 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2669                                       bool Chunked) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2672   return Schedule == OMP_sch_static_chunked;
2673 }
2674 
2675 bool CGOpenMPRuntime::isStaticChunked(
2676     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2677   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2678   return Schedule == OMP_dist_sch_static_chunked;
2679 }
2680 
2681 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2682   OpenMPSchedType Schedule =
2683       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2684   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2685   return Schedule != OMP_sch_static;
2686 }
2687 
2688 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2689                                   OpenMPScheduleClauseModifier M1,
2690                                   OpenMPScheduleClauseModifier M2) {
2691   int Modifier = 0;
2692   switch (M1) {
2693   case OMPC_SCHEDULE_MODIFIER_monotonic:
2694     Modifier = OMP_sch_modifier_monotonic;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2697     Modifier = OMP_sch_modifier_nonmonotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_simd:
2700     if (Schedule == OMP_sch_static_chunked)
2701       Schedule = OMP_sch_static_balanced_chunked;
2702     break;
2703   case OMPC_SCHEDULE_MODIFIER_last:
2704   case OMPC_SCHEDULE_MODIFIER_unknown:
2705     break;
2706   }
2707   switch (M2) {
2708   case OMPC_SCHEDULE_MODIFIER_monotonic:
2709     Modifier = OMP_sch_modifier_monotonic;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2712     Modifier = OMP_sch_modifier_nonmonotonic;
2713     break;
2714   case OMPC_SCHEDULE_MODIFIER_simd:
2715     if (Schedule == OMP_sch_static_chunked)
2716       Schedule = OMP_sch_static_balanced_chunked;
2717     break;
2718   case OMPC_SCHEDULE_MODIFIER_last:
2719   case OMPC_SCHEDULE_MODIFIER_unknown:
2720     break;
2721   }
2722   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2723   // If the static schedule kind is specified or if the ordered clause is
2724   // specified, and if the nonmonotonic modifier is not specified, the effect is
2725   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2726   // modifier is specified, the effect is as if the nonmonotonic modifier is
2727   // specified.
2728   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2729     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2730           Schedule == OMP_sch_static_balanced_chunked ||
2731           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2732           Schedule == OMP_dist_sch_static_chunked ||
2733           Schedule == OMP_dist_sch_static))
2734       Modifier = OMP_sch_modifier_nonmonotonic;
2735   }
2736   return Schedule | Modifier;
2737 }
2738 
2739 void CGOpenMPRuntime::emitForDispatchInit(
2740     CodeGenFunction &CGF, SourceLocation Loc,
2741     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2742     bool Ordered, const DispatchRTInput &DispatchValues) {
2743   if (!CGF.HaveInsertPoint())
2744     return;
2745   OpenMPSchedType Schedule = getRuntimeSchedule(
2746       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2747   assert(Ordered ||
2748          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2749           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2750           Schedule != OMP_sch_static_balanced_chunked));
2751   // Call __kmpc_dispatch_init(
2752   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2753   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2754   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2755 
2756   // If the Chunk was not specified in the clause - use default value 1.
2757   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2758                                             : CGF.Builder.getIntN(IVSize, 1);
2759   llvm::Value *Args[] = {
2760       emitUpdateLocation(CGF, Loc),
2761       getThreadID(CGF, Loc),
2762       CGF.Builder.getInt32(addMonoNonMonoModifier(
2763           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2764       DispatchValues.LB,                                     // Lower
2765       DispatchValues.UB,                                     // Upper
2766       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2767       Chunk                                                  // Chunk
2768   };
2769   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2770 }
2771 
2772 static void emitForStaticInitCall(
2773     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2774     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2775     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2776     const CGOpenMPRuntime::StaticRTInput &Values) {
2777   if (!CGF.HaveInsertPoint())
2778     return;
2779 
2780   assert(!Values.Ordered);
2781   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2782          Schedule == OMP_sch_static_balanced_chunked ||
2783          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2784          Schedule == OMP_dist_sch_static ||
2785          Schedule == OMP_dist_sch_static_chunked);
2786 
2787   // Call __kmpc_for_static_init(
2788   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2789   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2790   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2791   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2792   llvm::Value *Chunk = Values.Chunk;
2793   if (Chunk == nullptr) {
2794     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2795             Schedule == OMP_dist_sch_static) &&
2796            "expected static non-chunked schedule");
2797     // If the Chunk was not specified in the clause - use default value 1.
2798     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2799   } else {
2800     assert((Schedule == OMP_sch_static_chunked ||
2801             Schedule == OMP_sch_static_balanced_chunked ||
2802             Schedule == OMP_ord_static_chunked ||
2803             Schedule == OMP_dist_sch_static_chunked) &&
2804            "expected static chunked schedule");
2805   }
2806   llvm::Value *Args[] = {
2807       UpdateLocation,
2808       ThreadId,
2809       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2810                                                   M2)), // Schedule type
2811       Values.IL.getPointer(),                           // &isLastIter
2812       Values.LB.getPointer(),                           // &LB
2813       Values.UB.getPointer(),                           // &UB
2814       Values.ST.getPointer(),                           // &Stride
2815       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2816       Chunk                                             // Chunk
2817   };
2818   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2819 }
2820 
2821 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2822                                         SourceLocation Loc,
2823                                         OpenMPDirectiveKind DKind,
2824                                         const OpenMPScheduleTy &ScheduleKind,
2825                                         const StaticRTInput &Values) {
2826   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2827       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2828   assert(isOpenMPWorksharingDirective(DKind) &&
2829          "Expected loop-based or sections-based directive.");
2830   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2831                                              isOpenMPLoopDirective(DKind)
2832                                                  ? OMP_IDENT_WORK_LOOP
2833                                                  : OMP_IDENT_WORK_SECTIONS);
2834   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2835   llvm::FunctionCallee StaticInitFunction =
2836       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2837   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2839                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2840 }
2841 
2842 void CGOpenMPRuntime::emitDistributeStaticInit(
2843     CodeGenFunction &CGF, SourceLocation Loc,
2844     OpenMPDistScheduleClauseKind SchedKind,
2845     const CGOpenMPRuntime::StaticRTInput &Values) {
2846   OpenMPSchedType ScheduleNum =
2847       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2848   llvm::Value *UpdatedLocation =
2849       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2850   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2851   llvm::FunctionCallee StaticInitFunction =
2852       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2853   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2854                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2855                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2856 }
2857 
2858 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2859                                           SourceLocation Loc,
2860                                           OpenMPDirectiveKind DKind) {
2861   if (!CGF.HaveInsertPoint())
2862     return;
2863   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2864   llvm::Value *Args[] = {
2865       emitUpdateLocation(CGF, Loc,
2866                          isOpenMPDistributeDirective(DKind)
2867                              ? OMP_IDENT_WORK_DISTRIBUTE
2868                              : isOpenMPLoopDirective(DKind)
2869                                    ? OMP_IDENT_WORK_LOOP
2870                                    : OMP_IDENT_WORK_SECTIONS),
2871       getThreadID(CGF, Loc)};
2872   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2873   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2874                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2875                       Args);
2876 }
2877 
2878 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2879                                                  SourceLocation Loc,
2880                                                  unsigned IVSize,
2881                                                  bool IVSigned) {
2882   if (!CGF.HaveInsertPoint())
2883     return;
2884   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2885   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2886   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2887 }
2888 
2889 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2890                                           SourceLocation Loc, unsigned IVSize,
2891                                           bool IVSigned, Address IL,
2892                                           Address LB, Address UB,
2893                                           Address ST) {
2894   // Call __kmpc_dispatch_next(
2895   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2896   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2897   //          kmp_int[32|64] *p_stride);
2898   llvm::Value *Args[] = {
2899       emitUpdateLocation(CGF, Loc),
2900       getThreadID(CGF, Loc),
2901       IL.getPointer(), // &isLastIter
2902       LB.getPointer(), // &Lower
2903       UB.getPointer(), // &Upper
2904       ST.getPointer()  // &Stride
2905   };
2906   llvm::Value *Call =
2907       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2908   return CGF.EmitScalarConversion(
2909       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2910       CGF.getContext().BoolTy, Loc);
2911 }
2912 
2913 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2914                                            llvm::Value *NumThreads,
2915                                            SourceLocation Loc) {
2916   if (!CGF.HaveInsertPoint())
2917     return;
2918   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2919   llvm::Value *Args[] = {
2920       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2921       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2922   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2923                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2924                       Args);
2925 }
2926 
2927 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2928                                          ProcBindKind ProcBind,
2929                                          SourceLocation Loc) {
2930   if (!CGF.HaveInsertPoint())
2931     return;
2932   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2933   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2934   llvm::Value *Args[] = {
2935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2936       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2937   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2938                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2939                       Args);
2940 }
2941 
2942 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2943                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2944   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
2945   if (OMPBuilder) {
2946     OMPBuilder->CreateFlush(CGF.Builder);
2947   } else {
2948     if (!CGF.HaveInsertPoint())
2949       return;
2950     // Build call void __kmpc_flush(ident_t *loc)
2951     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
2952                             CGM.getModule(), OMPRTL___kmpc_flush),
2953                         emitUpdateLocation(CGF, Loc));
2954   }
2955 }
2956 
2957 namespace {
2958 /// Indexes of fields for type kmp_task_t.
2959 enum KmpTaskTFields {
2960   /// List of shared variables.
2961   KmpTaskTShareds,
2962   /// Task routine.
2963   KmpTaskTRoutine,
2964   /// Partition id for the untied tasks.
2965   KmpTaskTPartId,
2966   /// Function with call of destructors for private variables.
2967   Data1,
2968   /// Task priority.
2969   Data2,
2970   /// (Taskloops only) Lower bound.
2971   KmpTaskTLowerBound,
2972   /// (Taskloops only) Upper bound.
2973   KmpTaskTUpperBound,
2974   /// (Taskloops only) Stride.
2975   KmpTaskTStride,
2976   /// (Taskloops only) Is last iteration flag.
2977   KmpTaskTLastIter,
2978   /// (Taskloops only) Reduction data.
2979   KmpTaskTReductions,
2980 };
2981 } // anonymous namespace
2982 
2983 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2984   return OffloadEntriesTargetRegion.empty() &&
2985          OffloadEntriesDeviceGlobalVar.empty();
2986 }
2987 
2988 /// Initialize target region entry.
2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2990     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2991                                     StringRef ParentName, unsigned LineNum,
2992                                     unsigned Order) {
2993   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2994                                              "only required for the device "
2995                                              "code generation.");
2996   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2997       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2998                                    OMPTargetRegionEntryTargetRegion);
2999   ++OffloadingEntriesNum;
3000 }
3001 
3002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3003     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3004                                   StringRef ParentName, unsigned LineNum,
3005                                   llvm::Constant *Addr, llvm::Constant *ID,
3006                                   OMPTargetRegionEntryKind Flags) {
3007   // If we are emitting code for a target, the entry is already initialized,
3008   // only has to be registered.
3009   if (CGM.getLangOpts().OpenMPIsDevice) {
3010     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3011       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3012           DiagnosticsEngine::Error,
3013           "Unable to find target region on line '%0' in the device code.");
3014       CGM.getDiags().Report(DiagID) << LineNum;
3015       return;
3016     }
3017     auto &Entry =
3018         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3019     assert(Entry.isValid() && "Entry not initialized!");
3020     Entry.setAddress(Addr);
3021     Entry.setID(ID);
3022     Entry.setFlags(Flags);
3023   } else {
3024     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3025     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3026     ++OffloadingEntriesNum;
3027   }
3028 }
3029 
3030 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3031     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3032     unsigned LineNum) const {
3033   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3034   if (PerDevice == OffloadEntriesTargetRegion.end())
3035     return false;
3036   auto PerFile = PerDevice->second.find(FileID);
3037   if (PerFile == PerDevice->second.end())
3038     return false;
3039   auto PerParentName = PerFile->second.find(ParentName);
3040   if (PerParentName == PerFile->second.end())
3041     return false;
3042   auto PerLine = PerParentName->second.find(LineNum);
3043   if (PerLine == PerParentName->second.end())
3044     return false;
3045   // Fail if this entry is already registered.
3046   if (PerLine->second.getAddress() || PerLine->second.getID())
3047     return false;
3048   return true;
3049 }
3050 
3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3052     const OffloadTargetRegionEntryInfoActTy &Action) {
3053   // Scan all target region entries and perform the provided action.
3054   for (const auto &D : OffloadEntriesTargetRegion)
3055     for (const auto &F : D.second)
3056       for (const auto &P : F.second)
3057         for (const auto &L : P.second)
3058           Action(D.first, F.first, P.first(), L.first, L.second);
3059 }
3060 
3061 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3062     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3063                                        OMPTargetGlobalVarEntryKind Flags,
3064                                        unsigned Order) {
3065   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3066                                              "only required for the device "
3067                                              "code generation.");
3068   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3069   ++OffloadingEntriesNum;
3070 }
3071 
3072 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3073     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3074                                      CharUnits VarSize,
3075                                      OMPTargetGlobalVarEntryKind Flags,
3076                                      llvm::GlobalValue::LinkageTypes Linkage) {
3077   if (CGM.getLangOpts().OpenMPIsDevice) {
3078     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3079     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3080            "Entry not initialized!");
3081     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3082            "Resetting with the new address.");
3083     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3084       if (Entry.getVarSize().isZero()) {
3085         Entry.setVarSize(VarSize);
3086         Entry.setLinkage(Linkage);
3087       }
3088       return;
3089     }
3090     Entry.setVarSize(VarSize);
3091     Entry.setLinkage(Linkage);
3092     Entry.setAddress(Addr);
3093   } else {
3094     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3095       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3096       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3097              "Entry not initialized!");
3098       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3099              "Resetting with the new address.");
3100       if (Entry.getVarSize().isZero()) {
3101         Entry.setVarSize(VarSize);
3102         Entry.setLinkage(Linkage);
3103       }
3104       return;
3105     }
3106     OffloadEntriesDeviceGlobalVar.try_emplace(
3107         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3108     ++OffloadingEntriesNum;
3109   }
3110 }
3111 
3112 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3113     actOnDeviceGlobalVarEntriesInfo(
3114         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3115   // Scan all target region entries and perform the provided action.
3116   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3117     Action(E.getKey(), E.getValue());
3118 }
3119 
3120 void CGOpenMPRuntime::createOffloadEntry(
3121     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3122     llvm::GlobalValue::LinkageTypes Linkage) {
3123   StringRef Name = Addr->getName();
3124   llvm::Module &M = CGM.getModule();
3125   llvm::LLVMContext &C = M.getContext();
3126 
3127   // Create constant string with the name.
3128   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3129 
3130   std::string StringName = getName({"omp_offloading", "entry_name"});
3131   auto *Str = new llvm::GlobalVariable(
3132       M, StrPtrInit->getType(), /*isConstant=*/true,
3133       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3134   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3135 
3136   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3137                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3138                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3139                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3140                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3141   std::string EntryName = getName({"omp_offloading", "entry", ""});
3142   llvm::GlobalVariable *Entry = createGlobalStruct(
3143       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3144       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3145 
3146   // The entry has to be created in the section the linker expects it to be.
3147   Entry->setSection("omp_offloading_entries");
3148 }
3149 
3150 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3151   // Emit the offloading entries and metadata so that the device codegen side
3152   // can easily figure out what to emit. The produced metadata looks like
3153   // this:
3154   //
3155   // !omp_offload.info = !{!1, ...}
3156   //
3157   // Right now we only generate metadata for function that contain target
3158   // regions.
3159 
3160   // If we are in simd mode or there are no entries, we don't need to do
3161   // anything.
3162   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3163     return;
3164 
3165   llvm::Module &M = CGM.getModule();
3166   llvm::LLVMContext &C = M.getContext();
3167   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3168                          SourceLocation, StringRef>,
3169               16>
3170       OrderedEntries(OffloadEntriesInfoManager.size());
3171   llvm::SmallVector<StringRef, 16> ParentFunctions(
3172       OffloadEntriesInfoManager.size());
3173 
3174   // Auxiliary methods to create metadata values and strings.
3175   auto &&GetMDInt = [this](unsigned V) {
3176     return llvm::ConstantAsMetadata::get(
3177         llvm::ConstantInt::get(CGM.Int32Ty, V));
3178   };
3179 
3180   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3181 
3182   // Create the offloading info metadata node.
3183   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3184 
3185   // Create function that emits metadata for each target region entry;
3186   auto &&TargetRegionMetadataEmitter =
3187       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3188        &GetMDString](
3189           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3190           unsigned Line,
3191           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3192         // Generate metadata for target regions. Each entry of this metadata
3193         // contains:
3194         // - Entry 0 -> Kind of this type of metadata (0).
3195         // - Entry 1 -> Device ID of the file where the entry was identified.
3196         // - Entry 2 -> File ID of the file where the entry was identified.
3197         // - Entry 3 -> Mangled name of the function where the entry was
3198         // identified.
3199         // - Entry 4 -> Line in the file where the entry was identified.
3200         // - Entry 5 -> Order the entry was created.
3201         // The first element of the metadata node is the kind.
3202         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3203                                  GetMDInt(FileID),      GetMDString(ParentName),
3204                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3205 
3206         SourceLocation Loc;
3207         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3208                   E = CGM.getContext().getSourceManager().fileinfo_end();
3209              I != E; ++I) {
3210           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3211               I->getFirst()->getUniqueID().getFile() == FileID) {
3212             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3213                 I->getFirst(), Line, 1);
3214             break;
3215           }
3216         }
3217         // Save this entry in the right position of the ordered entries array.
3218         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3219         ParentFunctions[E.getOrder()] = ParentName;
3220 
3221         // Add metadata to the named metadata node.
3222         MD->addOperand(llvm::MDNode::get(C, Ops));
3223       };
3224 
3225   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3226       TargetRegionMetadataEmitter);
3227 
3228   // Create function that emits metadata for each device global variable entry;
3229   auto &&DeviceGlobalVarMetadataEmitter =
3230       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3231        MD](StringRef MangledName,
3232            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3233                &E) {
3234         // Generate metadata for global variables. Each entry of this metadata
3235         // contains:
3236         // - Entry 0 -> Kind of this type of metadata (1).
3237         // - Entry 1 -> Mangled name of the variable.
3238         // - Entry 2 -> Declare target kind.
3239         // - Entry 3 -> Order the entry was created.
3240         // The first element of the metadata node is the kind.
3241         llvm::Metadata *Ops[] = {
3242             GetMDInt(E.getKind()), GetMDString(MangledName),
3243             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3244 
3245         // Save this entry in the right position of the ordered entries array.
3246         OrderedEntries[E.getOrder()] =
3247             std::make_tuple(&E, SourceLocation(), MangledName);
3248 
3249         // Add metadata to the named metadata node.
3250         MD->addOperand(llvm::MDNode::get(C, Ops));
3251       };
3252 
3253   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3254       DeviceGlobalVarMetadataEmitter);
3255 
3256   for (const auto &E : OrderedEntries) {
3257     assert(std::get<0>(E) && "All ordered entries must exist!");
3258     if (const auto *CE =
3259             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3260                 std::get<0>(E))) {
3261       if (!CE->getID() || !CE->getAddress()) {
3262         // Do not blame the entry if the parent funtion is not emitted.
3263         StringRef FnName = ParentFunctions[CE->getOrder()];
3264         if (!CGM.GetGlobalValue(FnName))
3265           continue;
3266         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3267             DiagnosticsEngine::Error,
3268             "Offloading entry for target region in %0 is incorrect: either the "
3269             "address or the ID is invalid.");
3270         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3271         continue;
3272       }
3273       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3274                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3275     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3276                                              OffloadEntryInfoDeviceGlobalVar>(
3277                    std::get<0>(E))) {
3278       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3279           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3280               CE->getFlags());
3281       switch (Flags) {
3282       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3283         if (CGM.getLangOpts().OpenMPIsDevice &&
3284             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3285           continue;
3286         if (!CE->getAddress()) {
3287           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3288               DiagnosticsEngine::Error, "Offloading entry for declare target "
3289                                         "variable %0 is incorrect: the "
3290                                         "address is invalid.");
3291           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3292           continue;
3293         }
3294         // The vaiable has no definition - no need to add the entry.
3295         if (CE->getVarSize().isZero())
3296           continue;
3297         break;
3298       }
3299       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3300         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3301                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3302                "Declaret target link address is set.");
3303         if (CGM.getLangOpts().OpenMPIsDevice)
3304           continue;
3305         if (!CE->getAddress()) {
3306           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3307               DiagnosticsEngine::Error,
3308               "Offloading entry for declare target variable is incorrect: the "
3309               "address is invalid.");
3310           CGM.getDiags().Report(DiagID);
3311           continue;
3312         }
3313         break;
3314       }
3315       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3316                          CE->getVarSize().getQuantity(), Flags,
3317                          CE->getLinkage());
3318     } else {
3319       llvm_unreachable("Unsupported entry kind.");
3320     }
3321   }
3322 }
3323 
3324 /// Loads all the offload entries information from the host IR
3325 /// metadata.
3326 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3327   // If we are in target mode, load the metadata from the host IR. This code has
3328   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3329 
3330   if (!CGM.getLangOpts().OpenMPIsDevice)
3331     return;
3332 
3333   if (CGM.getLangOpts().OMPHostIRFile.empty())
3334     return;
3335 
3336   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3337   if (auto EC = Buf.getError()) {
3338     CGM.getDiags().Report(diag::err_cannot_open_file)
3339         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3340     return;
3341   }
3342 
3343   llvm::LLVMContext C;
3344   auto ME = expectedToErrorOrAndEmitErrors(
3345       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3346 
3347   if (auto EC = ME.getError()) {
3348     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3349         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3350     CGM.getDiags().Report(DiagID)
3351         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3352     return;
3353   }
3354 
3355   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3356   if (!MD)
3357     return;
3358 
3359   for (llvm::MDNode *MN : MD->operands()) {
3360     auto &&GetMDInt = [MN](unsigned Idx) {
3361       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3362       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3363     };
3364 
3365     auto &&GetMDString = [MN](unsigned Idx) {
3366       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3367       return V->getString();
3368     };
3369 
3370     switch (GetMDInt(0)) {
3371     default:
3372       llvm_unreachable("Unexpected metadata!");
3373       break;
3374     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3375         OffloadingEntryInfoTargetRegion:
3376       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3377           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3378           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3379           /*Order=*/GetMDInt(5));
3380       break;
3381     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3382         OffloadingEntryInfoDeviceGlobalVar:
3383       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3384           /*MangledName=*/GetMDString(1),
3385           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3386               /*Flags=*/GetMDInt(2)),
3387           /*Order=*/GetMDInt(3));
3388       break;
3389     }
3390   }
3391 }
3392 
3393 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3394   if (!KmpRoutineEntryPtrTy) {
3395     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3396     ASTContext &C = CGM.getContext();
3397     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3398     FunctionProtoType::ExtProtoInfo EPI;
3399     KmpRoutineEntryPtrQTy = C.getPointerType(
3400         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3401     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3402   }
3403 }
3404 
3405 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3406   // Make sure the type of the entry is already created. This is the type we
3407   // have to create:
3408   // struct __tgt_offload_entry{
3409   //   void      *addr;       // Pointer to the offload entry info.
3410   //                          // (function or global)
3411   //   char      *name;       // Name of the function or global.
3412   //   size_t     size;       // Size of the entry info (0 if it a function).
3413   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3414   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3415   // };
3416   if (TgtOffloadEntryQTy.isNull()) {
3417     ASTContext &C = CGM.getContext();
3418     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3419     RD->startDefinition();
3420     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3421     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3422     addFieldToRecordDecl(C, RD, C.getSizeType());
3423     addFieldToRecordDecl(
3424         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3425     addFieldToRecordDecl(
3426         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3427     RD->completeDefinition();
3428     RD->addAttr(PackedAttr::CreateImplicit(C));
3429     TgtOffloadEntryQTy = C.getRecordType(RD);
3430   }
3431   return TgtOffloadEntryQTy;
3432 }
3433 
3434 namespace {
3435 struct PrivateHelpersTy {
3436   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3437                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3438       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3439         PrivateElemInit(PrivateElemInit) {}
3440   const Expr *OriginalRef = nullptr;
3441   const VarDecl *Original = nullptr;
3442   const VarDecl *PrivateCopy = nullptr;
3443   const VarDecl *PrivateElemInit = nullptr;
3444 };
3445 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3446 } // anonymous namespace
3447 
3448 static RecordDecl *
3449 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3450   if (!Privates.empty()) {
3451     ASTContext &C = CGM.getContext();
3452     // Build struct .kmp_privates_t. {
3453     //         /*  private vars  */
3454     //       };
3455     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3456     RD->startDefinition();
3457     for (const auto &Pair : Privates) {
3458       const VarDecl *VD = Pair.second.Original;
3459       QualType Type = VD->getType().getNonReferenceType();
3460       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3461       if (VD->hasAttrs()) {
3462         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3463              E(VD->getAttrs().end());
3464              I != E; ++I)
3465           FD->addAttr(*I);
3466       }
3467     }
3468     RD->completeDefinition();
3469     return RD;
3470   }
3471   return nullptr;
3472 }
3473 
3474 static RecordDecl *
3475 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3476                          QualType KmpInt32Ty,
3477                          QualType KmpRoutineEntryPointerQTy) {
3478   ASTContext &C = CGM.getContext();
3479   // Build struct kmp_task_t {
3480   //         void *              shareds;
3481   //         kmp_routine_entry_t routine;
3482   //         kmp_int32           part_id;
3483   //         kmp_cmplrdata_t data1;
3484   //         kmp_cmplrdata_t data2;
3485   // For taskloops additional fields:
3486   //         kmp_uint64          lb;
3487   //         kmp_uint64          ub;
3488   //         kmp_int64           st;
3489   //         kmp_int32           liter;
3490   //         void *              reductions;
3491   //       };
3492   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3493   UD->startDefinition();
3494   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3495   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3496   UD->completeDefinition();
3497   QualType KmpCmplrdataTy = C.getRecordType(UD);
3498   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3499   RD->startDefinition();
3500   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3501   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3502   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3503   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3504   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3505   if (isOpenMPTaskLoopDirective(Kind)) {
3506     QualType KmpUInt64Ty =
3507         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3508     QualType KmpInt64Ty =
3509         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3510     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3511     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3512     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3513     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3514     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3515   }
3516   RD->completeDefinition();
3517   return RD;
3518 }
3519 
3520 static RecordDecl *
3521 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3522                                      ArrayRef<PrivateDataTy> Privates) {
3523   ASTContext &C = CGM.getContext();
3524   // Build struct kmp_task_t_with_privates {
3525   //         kmp_task_t task_data;
3526   //         .kmp_privates_t. privates;
3527   //       };
3528   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3529   RD->startDefinition();
3530   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3531   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3532     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3533   RD->completeDefinition();
3534   return RD;
3535 }
3536 
3537 /// Emit a proxy function which accepts kmp_task_t as the second
3538 /// argument.
3539 /// \code
3540 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3541 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3542 ///   For taskloops:
3543 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3544 ///   tt->reductions, tt->shareds);
3545 ///   return 0;
3546 /// }
3547 /// \endcode
3548 static llvm::Function *
3549 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3550                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3551                       QualType KmpTaskTWithPrivatesPtrQTy,
3552                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3553                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3554                       llvm::Value *TaskPrivatesMap) {
3555   ASTContext &C = CGM.getContext();
3556   FunctionArgList Args;
3557   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3558                             ImplicitParamDecl::Other);
3559   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3560                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3561                                 ImplicitParamDecl::Other);
3562   Args.push_back(&GtidArg);
3563   Args.push_back(&TaskTypeArg);
3564   const auto &TaskEntryFnInfo =
3565       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3566   llvm::FunctionType *TaskEntryTy =
3567       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3568   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3569   auto *TaskEntry = llvm::Function::Create(
3570       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3571   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3572   TaskEntry->setDoesNotRecurse();
3573   CodeGenFunction CGF(CGM);
3574   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3575                     Loc, Loc);
3576 
3577   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3578   // tt,
3579   // For taskloops:
3580   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3581   // tt->task_data.shareds);
3582   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3583       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3584   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3585       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3586       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3587   const auto *KmpTaskTWithPrivatesQTyRD =
3588       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3589   LValue Base =
3590       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3591   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3592   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3593   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3594   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3595 
3596   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3597   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3598   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3599       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3600       CGF.ConvertTypeForMem(SharedsPtrTy));
3601 
3602   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3603   llvm::Value *PrivatesParam;
3604   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3605     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3606     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3607         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3608   } else {
3609     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3610   }
3611 
3612   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3613                                TaskPrivatesMap,
3614                                CGF.Builder
3615                                    .CreatePointerBitCastOrAddrSpaceCast(
3616                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3617                                    .getPointer()};
3618   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3619                                           std::end(CommonArgs));
3620   if (isOpenMPTaskLoopDirective(Kind)) {
3621     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3622     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3623     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3624     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3625     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3626     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3627     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3628     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3629     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3630     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3631     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3632     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3633     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3634     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3635     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3636     CallArgs.push_back(LBParam);
3637     CallArgs.push_back(UBParam);
3638     CallArgs.push_back(StParam);
3639     CallArgs.push_back(LIParam);
3640     CallArgs.push_back(RParam);
3641   }
3642   CallArgs.push_back(SharedsParam);
3643 
3644   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3645                                                   CallArgs);
3646   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3647                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3648   CGF.FinishFunction();
3649   return TaskEntry;
3650 }
3651 
3652 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3653                                             SourceLocation Loc,
3654                                             QualType KmpInt32Ty,
3655                                             QualType KmpTaskTWithPrivatesPtrQTy,
3656                                             QualType KmpTaskTWithPrivatesQTy) {
3657   ASTContext &C = CGM.getContext();
3658   FunctionArgList Args;
3659   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3660                             ImplicitParamDecl::Other);
3661   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3662                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3663                                 ImplicitParamDecl::Other);
3664   Args.push_back(&GtidArg);
3665   Args.push_back(&TaskTypeArg);
3666   const auto &DestructorFnInfo =
3667       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3668   llvm::FunctionType *DestructorFnTy =
3669       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3670   std::string Name =
3671       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3672   auto *DestructorFn =
3673       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3674                              Name, &CGM.getModule());
3675   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3676                                     DestructorFnInfo);
3677   DestructorFn->setDoesNotRecurse();
3678   CodeGenFunction CGF(CGM);
3679   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3680                     Args, Loc, Loc);
3681 
3682   LValue Base = CGF.EmitLoadOfPointerLValue(
3683       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3684       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3685   const auto *KmpTaskTWithPrivatesQTyRD =
3686       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3687   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3688   Base = CGF.EmitLValueForField(Base, *FI);
3689   for (const auto *Field :
3690        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3691     if (QualType::DestructionKind DtorKind =
3692             Field->getType().isDestructedType()) {
3693       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3694       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3695     }
3696   }
3697   CGF.FinishFunction();
3698   return DestructorFn;
3699 }
3700 
3701 /// Emit a privates mapping function for correct handling of private and
3702 /// firstprivate variables.
3703 /// \code
3704 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3705 /// **noalias priv1,...,  <tyn> **noalias privn) {
3706 ///   *priv1 = &.privates.priv1;
3707 ///   ...;
3708 ///   *privn = &.privates.privn;
3709 /// }
3710 /// \endcode
3711 static llvm::Value *
3712 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3713                                ArrayRef<const Expr *> PrivateVars,
3714                                ArrayRef<const Expr *> FirstprivateVars,
3715                                ArrayRef<const Expr *> LastprivateVars,
3716                                QualType PrivatesQTy,
3717                                ArrayRef<PrivateDataTy> Privates) {
3718   ASTContext &C = CGM.getContext();
3719   FunctionArgList Args;
3720   ImplicitParamDecl TaskPrivatesArg(
3721       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3722       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3723       ImplicitParamDecl::Other);
3724   Args.push_back(&TaskPrivatesArg);
3725   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3726   unsigned Counter = 1;
3727   for (const Expr *E : PrivateVars) {
3728     Args.push_back(ImplicitParamDecl::Create(
3729         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730         C.getPointerType(C.getPointerType(E->getType()))
3731             .withConst()
3732             .withRestrict(),
3733         ImplicitParamDecl::Other));
3734     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3735     PrivateVarsPos[VD] = Counter;
3736     ++Counter;
3737   }
3738   for (const Expr *E : FirstprivateVars) {
3739     Args.push_back(ImplicitParamDecl::Create(
3740         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3741         C.getPointerType(C.getPointerType(E->getType()))
3742             .withConst()
3743             .withRestrict(),
3744         ImplicitParamDecl::Other));
3745     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3746     PrivateVarsPos[VD] = Counter;
3747     ++Counter;
3748   }
3749   for (const Expr *E : LastprivateVars) {
3750     Args.push_back(ImplicitParamDecl::Create(
3751         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3752         C.getPointerType(C.getPointerType(E->getType()))
3753             .withConst()
3754             .withRestrict(),
3755         ImplicitParamDecl::Other));
3756     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3757     PrivateVarsPos[VD] = Counter;
3758     ++Counter;
3759   }
3760   const auto &TaskPrivatesMapFnInfo =
3761       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3762   llvm::FunctionType *TaskPrivatesMapTy =
3763       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3764   std::string Name =
3765       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3766   auto *TaskPrivatesMap = llvm::Function::Create(
3767       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3768       &CGM.getModule());
3769   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3770                                     TaskPrivatesMapFnInfo);
3771   if (CGM.getLangOpts().Optimize) {
3772     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3773     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3774     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3775   }
3776   CodeGenFunction CGF(CGM);
3777   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3778                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3779 
3780   // *privi = &.privates.privi;
3781   LValue Base = CGF.EmitLoadOfPointerLValue(
3782       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3783       TaskPrivatesArg.getType()->castAs<PointerType>());
3784   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3785   Counter = 0;
3786   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3787     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3788     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3789     LValue RefLVal =
3790         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3791     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3792         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3793     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3794     ++Counter;
3795   }
3796   CGF.FinishFunction();
3797   return TaskPrivatesMap;
3798 }
3799 
3800 /// Emit initialization for private variables in task-based directives.
3801 static void emitPrivatesInit(CodeGenFunction &CGF,
3802                              const OMPExecutableDirective &D,
3803                              Address KmpTaskSharedsPtr, LValue TDBase,
3804                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3805                              QualType SharedsTy, QualType SharedsPtrTy,
3806                              const OMPTaskDataTy &Data,
3807                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3808   ASTContext &C = CGF.getContext();
3809   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3810   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3811   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3812                                  ? OMPD_taskloop
3813                                  : OMPD_task;
3814   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3815   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3816   LValue SrcBase;
3817   bool IsTargetTask =
3818       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3819       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3820   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3821   // PointersArray and SizesArray. The original variables for these arrays are
3822   // not captured and we get their addresses explicitly.
3823   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3824       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3825     SrcBase = CGF.MakeAddrLValue(
3826         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3827             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3828         SharedsTy);
3829   }
3830   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3831   for (const PrivateDataTy &Pair : Privates) {
3832     const VarDecl *VD = Pair.second.PrivateCopy;
3833     const Expr *Init = VD->getAnyInitializer();
3834     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3835                              !CGF.isTrivialInitializer(Init)))) {
3836       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3837       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3838         const VarDecl *OriginalVD = Pair.second.Original;
3839         // Check if the variable is the target-based BasePointersArray,
3840         // PointersArray or SizesArray.
3841         LValue SharedRefLValue;
3842         QualType Type = PrivateLValue.getType();
3843         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3844         if (IsTargetTask && !SharedField) {
3845           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3846                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3847                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3848                          ->getNumParams() == 0 &&
3849                  isa<TranslationUnitDecl>(
3850                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3851                          ->getDeclContext()) &&
3852                  "Expected artificial target data variable.");
3853           SharedRefLValue =
3854               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3855         } else if (ForDup) {
3856           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3857           SharedRefLValue = CGF.MakeAddrLValue(
3858               Address(SharedRefLValue.getPointer(CGF),
3859                       C.getDeclAlign(OriginalVD)),
3860               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3861               SharedRefLValue.getTBAAInfo());
3862         } else if (CGF.LambdaCaptureFields.count(
3863                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3864                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3865           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3866         } else {
3867           // Processing for implicitly captured variables.
3868           InlinedOpenMPRegionRAII Region(
3869               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3870               /*HasCancel=*/false);
3871           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3872         }
3873         if (Type->isArrayType()) {
3874           // Initialize firstprivate array.
3875           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3876             // Perform simple memcpy.
3877             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3878           } else {
3879             // Initialize firstprivate array using element-by-element
3880             // initialization.
3881             CGF.EmitOMPAggregateAssign(
3882                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3883                 Type,
3884                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3885                                                   Address SrcElement) {
3886                   // Clean up any temporaries needed by the initialization.
3887                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3888                   InitScope.addPrivate(
3889                       Elem, [SrcElement]() -> Address { return SrcElement; });
3890                   (void)InitScope.Privatize();
3891                   // Emit initialization for single element.
3892                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3893                       CGF, &CapturesInfo);
3894                   CGF.EmitAnyExprToMem(Init, DestElement,
3895                                        Init->getType().getQualifiers(),
3896                                        /*IsInitializer=*/false);
3897                 });
3898           }
3899         } else {
3900           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3901           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3902             return SharedRefLValue.getAddress(CGF);
3903           });
3904           (void)InitScope.Privatize();
3905           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3906           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3907                              /*capturedByInit=*/false);
3908         }
3909       } else {
3910         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3911       }
3912     }
3913     ++FI;
3914   }
3915 }
3916 
3917 /// Check if duplication function is required for taskloops.
3918 static bool checkInitIsRequired(CodeGenFunction &CGF,
3919                                 ArrayRef<PrivateDataTy> Privates) {
3920   bool InitRequired = false;
3921   for (const PrivateDataTy &Pair : Privates) {
3922     const VarDecl *VD = Pair.second.PrivateCopy;
3923     const Expr *Init = VD->getAnyInitializer();
3924     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3925                                     !CGF.isTrivialInitializer(Init));
3926     if (InitRequired)
3927       break;
3928   }
3929   return InitRequired;
3930 }
3931 
3932 
3933 /// Emit task_dup function (for initialization of
3934 /// private/firstprivate/lastprivate vars and last_iter flag)
3935 /// \code
3936 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3937 /// lastpriv) {
3938 /// // setup lastprivate flag
3939 ///    task_dst->last = lastpriv;
3940 /// // could be constructor calls here...
3941 /// }
3942 /// \endcode
3943 static llvm::Value *
3944 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3945                     const OMPExecutableDirective &D,
3946                     QualType KmpTaskTWithPrivatesPtrQTy,
3947                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3948                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3949                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3950                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3951   ASTContext &C = CGM.getContext();
3952   FunctionArgList Args;
3953   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3954                            KmpTaskTWithPrivatesPtrQTy,
3955                            ImplicitParamDecl::Other);
3956   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3957                            KmpTaskTWithPrivatesPtrQTy,
3958                            ImplicitParamDecl::Other);
3959   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3960                                 ImplicitParamDecl::Other);
3961   Args.push_back(&DstArg);
3962   Args.push_back(&SrcArg);
3963   Args.push_back(&LastprivArg);
3964   const auto &TaskDupFnInfo =
3965       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3966   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3967   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3968   auto *TaskDup = llvm::Function::Create(
3969       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3970   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3971   TaskDup->setDoesNotRecurse();
3972   CodeGenFunction CGF(CGM);
3973   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3974                     Loc);
3975 
3976   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3977       CGF.GetAddrOfLocalVar(&DstArg),
3978       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3979   // task_dst->liter = lastpriv;
3980   if (WithLastIter) {
3981     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3982     LValue Base = CGF.EmitLValueForField(
3983         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3984     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3985     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3986         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3987     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3988   }
3989 
3990   // Emit initial values for private copies (if any).
3991   assert(!Privates.empty());
3992   Address KmpTaskSharedsPtr = Address::invalid();
3993   if (!Data.FirstprivateVars.empty()) {
3994     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3995         CGF.GetAddrOfLocalVar(&SrcArg),
3996         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3997     LValue Base = CGF.EmitLValueForField(
3998         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3999     KmpTaskSharedsPtr = Address(
4000         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4001                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4002                                                   KmpTaskTShareds)),
4003                              Loc),
4004         CGM.getNaturalTypeAlignment(SharedsTy));
4005   }
4006   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4007                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4008   CGF.FinishFunction();
4009   return TaskDup;
4010 }
4011 
4012 /// Checks if destructor function is required to be generated.
4013 /// \return true if cleanups are required, false otherwise.
4014 static bool
4015 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4016   bool NeedsCleanup = false;
4017   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4018   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4019   for (const FieldDecl *FD : PrivateRD->fields()) {
4020     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4021     if (NeedsCleanup)
4022       break;
4023   }
4024   return NeedsCleanup;
4025 }
4026 
4027 namespace {
4028 /// Loop generator for OpenMP iterator expression.
4029 class OMPIteratorGeneratorScope final
4030     : public CodeGenFunction::OMPPrivateScope {
4031   CodeGenFunction &CGF;
4032   const OMPIteratorExpr *E = nullptr;
4033   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4034   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4035   OMPIteratorGeneratorScope() = delete;
4036   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4037 
4038 public:
4039   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4040       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4041     if (!E)
4042       return;
4043     SmallVector<llvm::Value *, 4> Uppers;
4044     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4045       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4046       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4047       addPrivate(VD, [&CGF, VD]() {
4048         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4049       });
4050       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4051       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4052         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4053                                  "counter.addr");
4054       });
4055     }
4056     Privatize();
4057 
4058     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4059       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4060       LValue CLVal =
4061           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4062                              HelperData.CounterVD->getType());
4063       // Counter = 0;
4064       CGF.EmitStoreOfScalar(
4065           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4066           CLVal);
4067       CodeGenFunction::JumpDest &ContDest =
4068           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4069       CodeGenFunction::JumpDest &ExitDest =
4070           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4071       // N = <number-of_iterations>;
4072       llvm::Value *N = Uppers[I];
4073       // cont:
4074       // if (Counter < N) goto body; else goto exit;
4075       CGF.EmitBlock(ContDest.getBlock());
4076       auto *CVal =
4077           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4078       llvm::Value *Cmp =
4079           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4080               ? CGF.Builder.CreateICmpSLT(CVal, N)
4081               : CGF.Builder.CreateICmpULT(CVal, N);
4082       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4083       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4084       // body:
4085       CGF.EmitBlock(BodyBB);
4086       // Iteri = Begini + Counter * Stepi;
4087       CGF.EmitIgnoredExpr(HelperData.Update);
4088     }
4089   }
4090   ~OMPIteratorGeneratorScope() {
4091     if (!E)
4092       return;
4093     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4094       // Counter = Counter + 1;
4095       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4096       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4097       // goto cont;
4098       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4099       // exit:
4100       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4101     }
4102   }
4103 };
4104 } // namespace
4105 
4106 static std::pair<llvm::Value *, llvm::Value *>
4107 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4108   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4109   llvm::Value *Addr;
4110   if (OASE) {
4111     const Expr *Base = OASE->getBase();
4112     Addr = CGF.EmitScalarExpr(Base);
4113   } else {
4114     Addr = CGF.EmitLValue(E).getPointer(CGF);
4115   }
4116   llvm::Value *SizeVal;
4117   QualType Ty = E->getType();
4118   if (OASE) {
4119     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4120     for (const Expr *SE : OASE->getDimensions()) {
4121       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4122       Sz = CGF.EmitScalarConversion(
4123           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4124       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4125     }
4126   } else if (const auto *ASE =
4127                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4128     LValue UpAddrLVal =
4129         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4130     llvm::Value *UpAddr =
4131         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4132     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4133     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4134     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4135   } else {
4136     SizeVal = CGF.getTypeSize(Ty);
4137   }
4138   return std::make_pair(Addr, SizeVal);
4139 }
4140 
4141 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4142 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4143   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4144   if (KmpTaskAffinityInfoTy.isNull()) {
4145     RecordDecl *KmpAffinityInfoRD =
4146         C.buildImplicitRecord("kmp_task_affinity_info_t");
4147     KmpAffinityInfoRD->startDefinition();
4148     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4149     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4150     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4151     KmpAffinityInfoRD->completeDefinition();
4152     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4153   }
4154 }
4155 
4156 CGOpenMPRuntime::TaskResultTy
4157 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4158                               const OMPExecutableDirective &D,
4159                               llvm::Function *TaskFunction, QualType SharedsTy,
4160                               Address Shareds, const OMPTaskDataTy &Data) {
4161   ASTContext &C = CGM.getContext();
4162   llvm::SmallVector<PrivateDataTy, 4> Privates;
4163   // Aggregate privates and sort them by the alignment.
4164   const auto *I = Data.PrivateCopies.begin();
4165   for (const Expr *E : Data.PrivateVars) {
4166     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4167     Privates.emplace_back(
4168         C.getDeclAlign(VD),
4169         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4170                          /*PrivateElemInit=*/nullptr));
4171     ++I;
4172   }
4173   I = Data.FirstprivateCopies.begin();
4174   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4175   for (const Expr *E : Data.FirstprivateVars) {
4176     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4177     Privates.emplace_back(
4178         C.getDeclAlign(VD),
4179         PrivateHelpersTy(
4180             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4181             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4182     ++I;
4183     ++IElemInitRef;
4184   }
4185   I = Data.LastprivateCopies.begin();
4186   for (const Expr *E : Data.LastprivateVars) {
4187     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4188     Privates.emplace_back(
4189         C.getDeclAlign(VD),
4190         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4191                          /*PrivateElemInit=*/nullptr));
4192     ++I;
4193   }
4194   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4195     return L.first > R.first;
4196   });
4197   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4198   // Build type kmp_routine_entry_t (if not built yet).
4199   emitKmpRoutineEntryT(KmpInt32Ty);
4200   // Build type kmp_task_t (if not built yet).
4201   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4202     if (SavedKmpTaskloopTQTy.isNull()) {
4203       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4204           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4205     }
4206     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4207   } else {
4208     assert((D.getDirectiveKind() == OMPD_task ||
4209             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4210             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4211            "Expected taskloop, task or target directive");
4212     if (SavedKmpTaskTQTy.isNull()) {
4213       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4214           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4215     }
4216     KmpTaskTQTy = SavedKmpTaskTQTy;
4217   }
4218   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4219   // Build particular struct kmp_task_t for the given task.
4220   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4221       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4222   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4223   QualType KmpTaskTWithPrivatesPtrQTy =
4224       C.getPointerType(KmpTaskTWithPrivatesQTy);
4225   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4226   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4227       KmpTaskTWithPrivatesTy->getPointerTo();
4228   llvm::Value *KmpTaskTWithPrivatesTySize =
4229       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4230   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4231 
4232   // Emit initial values for private copies (if any).
4233   llvm::Value *TaskPrivatesMap = nullptr;
4234   llvm::Type *TaskPrivatesMapTy =
4235       std::next(TaskFunction->arg_begin(), 3)->getType();
4236   if (!Privates.empty()) {
4237     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4238     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4239         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4240         FI->getType(), Privates);
4241     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4242         TaskPrivatesMap, TaskPrivatesMapTy);
4243   } else {
4244     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4245         cast<llvm::PointerType>(TaskPrivatesMapTy));
4246   }
4247   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4248   // kmp_task_t *tt);
4249   llvm::Function *TaskEntry = emitProxyTaskFunction(
4250       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4251       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4252       TaskPrivatesMap);
4253 
4254   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4255   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4256   // kmp_routine_entry_t *task_entry);
4257   // Task flags. Format is taken from
4258   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4259   // description of kmp_tasking_flags struct.
4260   enum {
4261     TiedFlag = 0x1,
4262     FinalFlag = 0x2,
4263     DestructorsFlag = 0x8,
4264     PriorityFlag = 0x20,
4265     DetachableFlag = 0x40,
4266   };
4267   unsigned Flags = Data.Tied ? TiedFlag : 0;
4268   bool NeedsCleanup = false;
4269   if (!Privates.empty()) {
4270     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4271     if (NeedsCleanup)
4272       Flags = Flags | DestructorsFlag;
4273   }
4274   if (Data.Priority.getInt())
4275     Flags = Flags | PriorityFlag;
4276   if (D.hasClausesOfKind<OMPDetachClause>())
4277     Flags = Flags | DetachableFlag;
4278   llvm::Value *TaskFlags =
4279       Data.Final.getPointer()
4280           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4281                                      CGF.Builder.getInt32(FinalFlag),
4282                                      CGF.Builder.getInt32(/*C=*/0))
4283           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4284   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4285   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4286   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4287       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4288       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4289           TaskEntry, KmpRoutineEntryPtrTy)};
4290   llvm::Value *NewTask;
4291   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4292     // Check if we have any device clause associated with the directive.
4293     const Expr *Device = nullptr;
4294     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4295       Device = C->getDevice();
4296     // Emit device ID if any otherwise use default value.
4297     llvm::Value *DeviceID;
4298     if (Device)
4299       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4300                                            CGF.Int64Ty, /*isSigned=*/true);
4301     else
4302       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4303     AllocArgs.push_back(DeviceID);
4304     NewTask = CGF.EmitRuntimeCall(
4305         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4306             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4307         AllocArgs);
4308   } else {
4309     NewTask =
4310         CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4311                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4312                             AllocArgs);
4313   }
4314   // Emit detach clause initialization.
4315   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4316   // task_descriptor);
4317   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4318     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4319     LValue EvtLVal = CGF.EmitLValue(Evt);
4320 
4321     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4322     // int gtid, kmp_task_t *task);
4323     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4324     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4325     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4326     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4327         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4328             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4329         {Loc, Tid, NewTask});
4330     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4331                                       Evt->getExprLoc());
4332     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4333   }
4334   // Process affinity clauses.
4335   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4336     // Process list of affinity data.
4337     ASTContext &C = CGM.getContext();
4338     Address AffinitiesArray = Address::invalid();
4339     // Calculate number of elements to form the array of affinity data.
4340     llvm::Value *NumOfElements = nullptr;
4341     unsigned NumAffinities = 0;
4342     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4343       if (const Expr *Modifier = C->getModifier()) {
4344         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4345         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4346           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4347           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4348           NumOfElements =
4349               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4350         }
4351       } else {
4352         NumAffinities += C->varlist_size();
4353       }
4354     }
4355     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4356     // Fields ids in kmp_task_affinity_info record.
4357     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4358 
4359     QualType KmpTaskAffinityInfoArrayTy;
4360     if (NumOfElements) {
4361       NumOfElements = CGF.Builder.CreateNUWAdd(
4362           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4363       OpaqueValueExpr OVE(
4364           Loc,
4365           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4366           VK_RValue);
4367       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4368                                                     RValue::get(NumOfElements));
4369       KmpTaskAffinityInfoArrayTy =
4370           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4371                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4372       // Properly emit variable-sized array.
4373       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4374                                            ImplicitParamDecl::Other);
4375       CGF.EmitVarDecl(*PD);
4376       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4377       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4378                                                 /*isSigned=*/false);
4379     } else {
4380       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4381           KmpTaskAffinityInfoTy,
4382           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4383           ArrayType::Normal, /*IndexTypeQuals=*/0);
4384       AffinitiesArray =
4385           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4386       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4387       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4388                                              /*isSigned=*/false);
4389     }
4390 
4391     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4392     // Fill array by elements without iterators.
4393     unsigned Pos = 0;
4394     bool HasIterator = false;
4395     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4396       if (C->getModifier()) {
4397         HasIterator = true;
4398         continue;
4399       }
4400       for (const Expr *E : C->varlists()) {
4401         llvm::Value *Addr;
4402         llvm::Value *Size;
4403         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404         LValue Base =
4405             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4406                                KmpTaskAffinityInfoTy);
4407         // affs[i].base_addr = &<Affinities[i].second>;
4408         LValue BaseAddrLVal = CGF.EmitLValueForField(
4409             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411                               BaseAddrLVal);
4412         // affs[i].len = sizeof(<Affinities[i].second>);
4413         LValue LenLVal = CGF.EmitLValueForField(
4414             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415         CGF.EmitStoreOfScalar(Size, LenLVal);
4416         ++Pos;
4417       }
4418     }
4419     LValue PosLVal;
4420     if (HasIterator) {
4421       PosLVal = CGF.MakeAddrLValue(
4422           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4423           C.getSizeType());
4424       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4425     }
4426     // Process elements with iterators.
4427     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4428       const Expr *Modifier = C->getModifier();
4429       if (!Modifier)
4430         continue;
4431       OMPIteratorGeneratorScope IteratorScope(
4432           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4433       for (const Expr *E : C->varlists()) {
4434         llvm::Value *Addr;
4435         llvm::Value *Size;
4436         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4437         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4438         LValue Base = CGF.MakeAddrLValue(
4439             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4440                     AffinitiesArray.getAlignment()),
4441             KmpTaskAffinityInfoTy);
4442         // affs[i].base_addr = &<Affinities[i].second>;
4443         LValue BaseAddrLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4445         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4446                               BaseAddrLVal);
4447         // affs[i].len = sizeof(<Affinities[i].second>);
4448         LValue LenLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4450         CGF.EmitStoreOfScalar(Size, LenLVal);
4451         Idx = CGF.Builder.CreateNUWAdd(
4452             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4453         CGF.EmitStoreOfScalar(Idx, PosLVal);
4454       }
4455     }
4456     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4457     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4458     // naffins, kmp_task_affinity_info_t *affin_list);
4459     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4460     llvm::Value *GTid = getThreadID(CGF, Loc);
4461     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4462         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4463     // FIXME: Emit the function and ignore its result for now unless the
4464     // runtime function is properly implemented.
4465     (void)CGF.EmitRuntimeCall(
4466         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4467             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4468         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4469   }
4470   llvm::Value *NewTaskNewTaskTTy =
4471       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4472           NewTask, KmpTaskTWithPrivatesPtrTy);
4473   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4474                                                KmpTaskTWithPrivatesQTy);
4475   LValue TDBase =
4476       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4477   // Fill the data in the resulting kmp_task_t record.
4478   // Copy shareds if there are any.
4479   Address KmpTaskSharedsPtr = Address::invalid();
4480   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4481     KmpTaskSharedsPtr =
4482         Address(CGF.EmitLoadOfScalar(
4483                     CGF.EmitLValueForField(
4484                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4485                                            KmpTaskTShareds)),
4486                     Loc),
4487                 CGM.getNaturalTypeAlignment(SharedsTy));
4488     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4489     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4490     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4491   }
4492   // Emit initial values for private copies (if any).
4493   TaskResultTy Result;
4494   if (!Privates.empty()) {
4495     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4496                      SharedsTy, SharedsPtrTy, Data, Privates,
4497                      /*ForDup=*/false);
4498     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4499         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4500       Result.TaskDupFn = emitTaskDupFunction(
4501           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4502           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4503           /*WithLastIter=*/!Data.LastprivateVars.empty());
4504     }
4505   }
4506   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4507   enum { Priority = 0, Destructors = 1 };
4508   // Provide pointer to function with destructors for privates.
4509   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4510   const RecordDecl *KmpCmplrdataUD =
4511       (*FI)->getType()->getAsUnionType()->getDecl();
4512   if (NeedsCleanup) {
4513     llvm::Value *DestructorFn = emitDestructorsFunction(
4514         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4515         KmpTaskTWithPrivatesQTy);
4516     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4517     LValue DestructorsLV = CGF.EmitLValueForField(
4518         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4519     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4520                               DestructorFn, KmpRoutineEntryPtrTy),
4521                           DestructorsLV);
4522   }
4523   // Set priority.
4524   if (Data.Priority.getInt()) {
4525     LValue Data2LV = CGF.EmitLValueForField(
4526         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4527     LValue PriorityLV = CGF.EmitLValueForField(
4528         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4529     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4530   }
4531   Result.NewTask = NewTask;
4532   Result.TaskEntry = TaskEntry;
4533   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4534   Result.TDBase = TDBase;
4535   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4536   return Result;
4537 }
4538 
4539 namespace {
4540 /// Dependence kind for RTL.
4541 enum RTLDependenceKindTy {
4542   DepIn = 0x01,
4543   DepInOut = 0x3,
4544   DepMutexInOutSet = 0x4
4545 };
4546 /// Fields ids in kmp_depend_info record.
4547 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4548 } // namespace
4549 
4550 /// Translates internal dependency kind into the runtime kind.
4551 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4552   RTLDependenceKindTy DepKind;
4553   switch (K) {
4554   case OMPC_DEPEND_in:
4555     DepKind = DepIn;
4556     break;
4557   // Out and InOut dependencies must use the same code.
4558   case OMPC_DEPEND_out:
4559   case OMPC_DEPEND_inout:
4560     DepKind = DepInOut;
4561     break;
4562   case OMPC_DEPEND_mutexinoutset:
4563     DepKind = DepMutexInOutSet;
4564     break;
4565   case OMPC_DEPEND_source:
4566   case OMPC_DEPEND_sink:
4567   case OMPC_DEPEND_depobj:
4568   case OMPC_DEPEND_unknown:
4569     llvm_unreachable("Unknown task dependence type");
4570   }
4571   return DepKind;
4572 }
4573 
4574 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4575 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4576                            QualType &FlagsTy) {
4577   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4578   if (KmpDependInfoTy.isNull()) {
4579     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4580     KmpDependInfoRD->startDefinition();
4581     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4582     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4583     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4584     KmpDependInfoRD->completeDefinition();
4585     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4586   }
4587 }
4588 
4589 std::pair<llvm::Value *, LValue>
4590 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4591                                    SourceLocation Loc) {
4592   ASTContext &C = CGM.getContext();
4593   QualType FlagsTy;
4594   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4595   RecordDecl *KmpDependInfoRD =
4596       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4597   LValue Base = CGF.EmitLoadOfPointerLValue(
4598       DepobjLVal.getAddress(CGF),
4599       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4600   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4601   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4602           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4603   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4604                             Base.getTBAAInfo());
4605   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4606       Addr.getPointer(),
4607       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4608   LValue NumDepsBase = CGF.MakeAddrLValue(
4609       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4610       Base.getBaseInfo(), Base.getTBAAInfo());
4611   // NumDeps = deps[i].base_addr;
4612   LValue BaseAddrLVal = CGF.EmitLValueForField(
4613       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4614   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4615   return std::make_pair(NumDeps, Base);
4616 }
4617 
4618 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4619                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4620                            const OMPTaskDataTy::DependData &Data,
4621                            Address DependenciesArray) {
4622   CodeGenModule &CGM = CGF.CGM;
4623   ASTContext &C = CGM.getContext();
4624   QualType FlagsTy;
4625   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4626   RecordDecl *KmpDependInfoRD =
4627       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4628   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4629 
4630   OMPIteratorGeneratorScope IteratorScope(
4631       CGF, cast_or_null<OMPIteratorExpr>(
4632                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4633                                  : nullptr));
4634   for (const Expr *E : Data.DepExprs) {
4635     llvm::Value *Addr;
4636     llvm::Value *Size;
4637     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4638     LValue Base;
4639     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4640       Base = CGF.MakeAddrLValue(
4641           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4642     } else {
4643       LValue &PosLVal = *Pos.get<LValue *>();
4644       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4645       Base = CGF.MakeAddrLValue(
4646           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4647                   DependenciesArray.getAlignment()),
4648           KmpDependInfoTy);
4649     }
4650     // deps[i].base_addr = &<Dependencies[i].second>;
4651     LValue BaseAddrLVal = CGF.EmitLValueForField(
4652         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4653     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4654                           BaseAddrLVal);
4655     // deps[i].len = sizeof(<Dependencies[i].second>);
4656     LValue LenLVal = CGF.EmitLValueForField(
4657         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4658     CGF.EmitStoreOfScalar(Size, LenLVal);
4659     // deps[i].flags = <Dependencies[i].first>;
4660     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4661     LValue FlagsLVal = CGF.EmitLValueForField(
4662         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4663     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4664                           FlagsLVal);
4665     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4666       ++(*P);
4667     } else {
4668       LValue &PosLVal = *Pos.get<LValue *>();
4669       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4670       Idx = CGF.Builder.CreateNUWAdd(Idx,
4671                                      llvm::ConstantInt::get(Idx->getType(), 1));
4672       CGF.EmitStoreOfScalar(Idx, PosLVal);
4673     }
4674   }
4675 }
4676 
4677 static SmallVector<llvm::Value *, 4>
4678 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4679                         const OMPTaskDataTy::DependData &Data) {
4680   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4681          "Expected depobj dependecy kind.");
4682   SmallVector<llvm::Value *, 4> Sizes;
4683   SmallVector<LValue, 4> SizeLVals;
4684   ASTContext &C = CGF.getContext();
4685   QualType FlagsTy;
4686   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4687   RecordDecl *KmpDependInfoRD =
4688       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4689   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4690   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4691   {
4692     OMPIteratorGeneratorScope IteratorScope(
4693         CGF, cast_or_null<OMPIteratorExpr>(
4694                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4695                                    : nullptr));
4696     for (const Expr *E : Data.DepExprs) {
4697       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4698       LValue Base = CGF.EmitLoadOfPointerLValue(
4699           DepobjLVal.getAddress(CGF),
4700           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4701       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4702           Base.getAddress(CGF), KmpDependInfoPtrT);
4703       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4704                                 Base.getTBAAInfo());
4705       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4706           Addr.getPointer(),
4707           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4708       LValue NumDepsBase = CGF.MakeAddrLValue(
4709           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4710           Base.getBaseInfo(), Base.getTBAAInfo());
4711       // NumDeps = deps[i].base_addr;
4712       LValue BaseAddrLVal = CGF.EmitLValueForField(
4713           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4714       llvm::Value *NumDeps =
4715           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4716       LValue NumLVal = CGF.MakeAddrLValue(
4717           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4718           C.getUIntPtrType());
4719       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4720                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4721       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4722       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4723       CGF.EmitStoreOfScalar(Add, NumLVal);
4724       SizeLVals.push_back(NumLVal);
4725     }
4726   }
4727   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4728     llvm::Value *Size =
4729         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4730     Sizes.push_back(Size);
4731   }
4732   return Sizes;
4733 }
4734 
4735 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4736                                LValue PosLVal,
4737                                const OMPTaskDataTy::DependData &Data,
4738                                Address DependenciesArray) {
4739   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4740          "Expected depobj dependecy kind.");
4741   ASTContext &C = CGF.getContext();
4742   QualType FlagsTy;
4743   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4744   RecordDecl *KmpDependInfoRD =
4745       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4746   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4747   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4748   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4749   {
4750     OMPIteratorGeneratorScope IteratorScope(
4751         CGF, cast_or_null<OMPIteratorExpr>(
4752                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4753                                    : nullptr));
4754     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4755       const Expr *E = Data.DepExprs[I];
4756       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4757       LValue Base = CGF.EmitLoadOfPointerLValue(
4758           DepobjLVal.getAddress(CGF),
4759           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4760       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4761           Base.getAddress(CGF), KmpDependInfoPtrT);
4762       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4763                                 Base.getTBAAInfo());
4764 
4765       // Get number of elements in a single depobj.
4766       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4767           Addr.getPointer(),
4768           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4769       LValue NumDepsBase = CGF.MakeAddrLValue(
4770           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4771           Base.getBaseInfo(), Base.getTBAAInfo());
4772       // NumDeps = deps[i].base_addr;
4773       LValue BaseAddrLVal = CGF.EmitLValueForField(
4774           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4775       llvm::Value *NumDeps =
4776           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4777 
4778       // memcopy dependency data.
4779       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4780           ElSize,
4781           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4782       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4783       Address DepAddr =
4784           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4785                   DependenciesArray.getAlignment());
4786       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4787 
4788       // Increase pos.
4789       // pos += size;
4790       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4791       CGF.EmitStoreOfScalar(Add, PosLVal);
4792     }
4793   }
4794 }
4795 
4796 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4797     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4798     SourceLocation Loc) {
4799   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4800         return D.DepExprs.empty();
4801       }))
4802     return std::make_pair(nullptr, Address::invalid());
4803   // Process list of dependencies.
4804   ASTContext &C = CGM.getContext();
4805   Address DependenciesArray = Address::invalid();
4806   llvm::Value *NumOfElements = nullptr;
4807   unsigned NumDependencies = std::accumulate(
4808       Dependencies.begin(), Dependencies.end(), 0,
4809       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4810         return D.DepKind == OMPC_DEPEND_depobj
4811                    ? V
4812                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4813       });
4814   QualType FlagsTy;
4815   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4816   bool HasDepobjDeps = false;
4817   bool HasRegularWithIterators = false;
4818   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4819   llvm::Value *NumOfRegularWithIterators =
4820       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4821   // Calculate number of depobj dependecies and regular deps with the iterators.
4822   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4823     if (D.DepKind == OMPC_DEPEND_depobj) {
4824       SmallVector<llvm::Value *, 4> Sizes =
4825           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4826       for (llvm::Value *Size : Sizes) {
4827         NumOfDepobjElements =
4828             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4829       }
4830       HasDepobjDeps = true;
4831       continue;
4832     }
4833     // Include number of iterations, if any.
4834     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4835       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4836         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4837         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4838         NumOfRegularWithIterators =
4839             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4840       }
4841       HasRegularWithIterators = true;
4842       continue;
4843     }
4844   }
4845 
4846   QualType KmpDependInfoArrayTy;
4847   if (HasDepobjDeps || HasRegularWithIterators) {
4848     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4849                                            /*isSigned=*/false);
4850     if (HasDepobjDeps) {
4851       NumOfElements =
4852           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4853     }
4854     if (HasRegularWithIterators) {
4855       NumOfElements =
4856           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4857     }
4858     OpaqueValueExpr OVE(Loc,
4859                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4860                         VK_RValue);
4861     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4862                                                   RValue::get(NumOfElements));
4863     KmpDependInfoArrayTy =
4864         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4865                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4866     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4867     // Properly emit variable-sized array.
4868     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4869                                          ImplicitParamDecl::Other);
4870     CGF.EmitVarDecl(*PD);
4871     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4872     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4873                                               /*isSigned=*/false);
4874   } else {
4875     KmpDependInfoArrayTy = C.getConstantArrayType(
4876         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4877         ArrayType::Normal, /*IndexTypeQuals=*/0);
4878     DependenciesArray =
4879         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4880     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4881     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4882                                            /*isSigned=*/false);
4883   }
4884   unsigned Pos = 0;
4885   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4886     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4887         Dependencies[I].IteratorExpr)
4888       continue;
4889     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4890                    DependenciesArray);
4891   }
4892   // Copy regular dependecies with iterators.
4893   LValue PosLVal = CGF.MakeAddrLValue(
4894       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4895   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4896   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4897     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4898         !Dependencies[I].IteratorExpr)
4899       continue;
4900     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4901                    DependenciesArray);
4902   }
4903   // Copy final depobj arrays without iterators.
4904   if (HasDepobjDeps) {
4905     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4906       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4907         continue;
4908       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4909                          DependenciesArray);
4910     }
4911   }
4912   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4913       DependenciesArray, CGF.VoidPtrTy);
4914   return std::make_pair(NumOfElements, DependenciesArray);
4915 }
4916 
4917 Address CGOpenMPRuntime::emitDepobjDependClause(
4918     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4919     SourceLocation Loc) {
4920   if (Dependencies.DepExprs.empty())
4921     return Address::invalid();
4922   // Process list of dependencies.
4923   ASTContext &C = CGM.getContext();
4924   Address DependenciesArray = Address::invalid();
4925   unsigned NumDependencies = Dependencies.DepExprs.size();
4926   QualType FlagsTy;
4927   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4928   RecordDecl *KmpDependInfoRD =
4929       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4930 
4931   llvm::Value *Size;
4932   // Define type kmp_depend_info[<Dependencies.size()>];
4933   // For depobj reserve one extra element to store the number of elements.
4934   // It is required to handle depobj(x) update(in) construct.
4935   // kmp_depend_info[<Dependencies.size()>] deps;
4936   llvm::Value *NumDepsVal;
4937   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4938   if (const auto *IE =
4939           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4940     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4941     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4942       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4943       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4944       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4945     }
4946     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4947                                     NumDepsVal);
4948     CharUnits SizeInBytes =
4949         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4950     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4951     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4952     NumDepsVal =
4953         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4954   } else {
4955     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4956         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4957         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4958     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4959     Size = CGM.getSize(Sz.alignTo(Align));
4960     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4961   }
4962   // Need to allocate on the dynamic memory.
4963   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4964   // Use default allocator.
4965   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4966   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4967 
4968   llvm::Value *Addr =
4969       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
4970                               CGM.getModule(), OMPRTL___kmpc_alloc),
4971                           Args, ".dep.arr.addr");
4972   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4973       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4974   DependenciesArray = Address(Addr, Align);
4975   // Write number of elements in the first element of array for depobj.
4976   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4977   // deps[i].base_addr = NumDependencies;
4978   LValue BaseAddrLVal = CGF.EmitLValueForField(
4979       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4980   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4981   llvm::PointerUnion<unsigned *, LValue *> Pos;
4982   unsigned Idx = 1;
4983   LValue PosLVal;
4984   if (Dependencies.IteratorExpr) {
4985     PosLVal = CGF.MakeAddrLValue(
4986         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4987         C.getSizeType());
4988     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4989                           /*IsInit=*/true);
4990     Pos = &PosLVal;
4991   } else {
4992     Pos = &Idx;
4993   }
4994   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4995   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4996       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4997   return DependenciesArray;
4998 }
4999 
5000 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5001                                         SourceLocation Loc) {
5002   ASTContext &C = CGM.getContext();
5003   QualType FlagsTy;
5004   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5005   LValue Base = CGF.EmitLoadOfPointerLValue(
5006       DepobjLVal.getAddress(CGF),
5007       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5008   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5009   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5010       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5011   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5012       Addr.getPointer(),
5013       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5014   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5015                                                                CGF.VoidPtrTy);
5016   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5017   // Use default allocator.
5018   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5019   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5020 
5021   // _kmpc_free(gtid, addr, nullptr);
5022   (void)CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5023                                 CGM.getModule(), OMPRTL___kmpc_free),
5024                             Args);
5025 }
5026 
5027 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5028                                        OpenMPDependClauseKind NewDepKind,
5029                                        SourceLocation Loc) {
5030   ASTContext &C = CGM.getContext();
5031   QualType FlagsTy;
5032   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5033   RecordDecl *KmpDependInfoRD =
5034       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5035   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5036   llvm::Value *NumDeps;
5037   LValue Base;
5038   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5039 
5040   Address Begin = Base.getAddress(CGF);
5041   // Cast from pointer to array type to pointer to single element.
5042   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5043   // The basic structure here is a while-do loop.
5044   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5045   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5046   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5047   CGF.EmitBlock(BodyBB);
5048   llvm::PHINode *ElementPHI =
5049       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5050   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5051   Begin = Address(ElementPHI, Begin.getAlignment());
5052   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5053                             Base.getTBAAInfo());
5054   // deps[i].flags = NewDepKind;
5055   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5056   LValue FlagsLVal = CGF.EmitLValueForField(
5057       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5058   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5059                         FlagsLVal);
5060 
5061   // Shift the address forward by one element.
5062   Address ElementNext =
5063       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5064   ElementPHI->addIncoming(ElementNext.getPointer(),
5065                           CGF.Builder.GetInsertBlock());
5066   llvm::Value *IsEmpty =
5067       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5068   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5069   // Done.
5070   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5071 }
5072 
5073 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5074                                    const OMPExecutableDirective &D,
5075                                    llvm::Function *TaskFunction,
5076                                    QualType SharedsTy, Address Shareds,
5077                                    const Expr *IfCond,
5078                                    const OMPTaskDataTy &Data) {
5079   if (!CGF.HaveInsertPoint())
5080     return;
5081 
5082   TaskResultTy Result =
5083       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5084   llvm::Value *NewTask = Result.NewTask;
5085   llvm::Function *TaskEntry = Result.TaskEntry;
5086   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5087   LValue TDBase = Result.TDBase;
5088   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5089   // Process list of dependences.
5090   Address DependenciesArray = Address::invalid();
5091   llvm::Value *NumOfElements;
5092   std::tie(NumOfElements, DependenciesArray) =
5093       emitDependClause(CGF, Data.Dependences, Loc);
5094 
5095   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5096   // libcall.
5097   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5098   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5099   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5100   // list is not empty
5101   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5102   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5103   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5104   llvm::Value *DepTaskArgs[7];
5105   if (!Data.Dependences.empty()) {
5106     DepTaskArgs[0] = UpLoc;
5107     DepTaskArgs[1] = ThreadID;
5108     DepTaskArgs[2] = NewTask;
5109     DepTaskArgs[3] = NumOfElements;
5110     DepTaskArgs[4] = DependenciesArray.getPointer();
5111     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5112     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5113   }
5114   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5115                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5116     if (!Data.Tied) {
5117       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5118       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5119       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5120     }
5121     if (!Data.Dependences.empty()) {
5122       CGF.EmitRuntimeCall(
5123           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5124               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5125           DepTaskArgs);
5126     } else {
5127       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5128                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5129                           TaskArgs);
5130     }
5131     // Check if parent region is untied and build return for untied task;
5132     if (auto *Region =
5133             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5134       Region->emitUntiedSwitch(CGF);
5135   };
5136 
5137   llvm::Value *DepWaitTaskArgs[6];
5138   if (!Data.Dependences.empty()) {
5139     DepWaitTaskArgs[0] = UpLoc;
5140     DepWaitTaskArgs[1] = ThreadID;
5141     DepWaitTaskArgs[2] = NumOfElements;
5142     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5143     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5144     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5145   }
5146   auto &M = CGM.getModule();
5147   auto &&ElseCodeGen = [&M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5148                         &Data, &DepWaitTaskArgs,
5149                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5150     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5151     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5152     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5153     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5154     // is specified.
5155     if (!Data.Dependences.empty())
5156       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5157                               M, OMPRTL___kmpc_omp_wait_deps),
5158                           DepWaitTaskArgs);
5159     // Call proxy_task_entry(gtid, new_task);
5160     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5161                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5162       Action.Enter(CGF);
5163       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5164       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5165                                                           OutlinedFnArgs);
5166     };
5167 
5168     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5169     // kmp_task_t *new_task);
5170     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5171     // kmp_task_t *new_task);
5172     RegionCodeGenTy RCG(CodeGen);
5173     CommonActionTy Action(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5174                               M, OMPRTL___kmpc_omp_task_begin_if0),
5175                           TaskArgs,
5176                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5177                               M, OMPRTL___kmpc_omp_task_complete_if0),
5178                           TaskArgs);
5179     RCG.setAction(Action);
5180     RCG(CGF);
5181   };
5182 
5183   if (IfCond) {
5184     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5185   } else {
5186     RegionCodeGenTy ThenRCG(ThenCodeGen);
5187     ThenRCG(CGF);
5188   }
5189 }
5190 
5191 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5192                                        const OMPLoopDirective &D,
5193                                        llvm::Function *TaskFunction,
5194                                        QualType SharedsTy, Address Shareds,
5195                                        const Expr *IfCond,
5196                                        const OMPTaskDataTy &Data) {
5197   if (!CGF.HaveInsertPoint())
5198     return;
5199   TaskResultTy Result =
5200       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5201   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5202   // libcall.
5203   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5204   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5205   // sched, kmp_uint64 grainsize, void *task_dup);
5206   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5207   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5208   llvm::Value *IfVal;
5209   if (IfCond) {
5210     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5211                                       /*isSigned=*/true);
5212   } else {
5213     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5214   }
5215 
5216   LValue LBLVal = CGF.EmitLValueForField(
5217       Result.TDBase,
5218       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5219   const auto *LBVar =
5220       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5221   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5222                        LBLVal.getQuals(),
5223                        /*IsInitializer=*/true);
5224   LValue UBLVal = CGF.EmitLValueForField(
5225       Result.TDBase,
5226       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5227   const auto *UBVar =
5228       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5229   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5230                        UBLVal.getQuals(),
5231                        /*IsInitializer=*/true);
5232   LValue StLVal = CGF.EmitLValueForField(
5233       Result.TDBase,
5234       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5235   const auto *StVar =
5236       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5237   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5238                        StLVal.getQuals(),
5239                        /*IsInitializer=*/true);
5240   // Store reductions address.
5241   LValue RedLVal = CGF.EmitLValueForField(
5242       Result.TDBase,
5243       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5244   if (Data.Reductions) {
5245     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5246   } else {
5247     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5248                                CGF.getContext().VoidPtrTy);
5249   }
5250   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5251   llvm::Value *TaskArgs[] = {
5252       UpLoc,
5253       ThreadID,
5254       Result.NewTask,
5255       IfVal,
5256       LBLVal.getPointer(CGF),
5257       UBLVal.getPointer(CGF),
5258       CGF.EmitLoadOfScalar(StLVal, Loc),
5259       llvm::ConstantInt::getSigned(
5260           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5261       llvm::ConstantInt::getSigned(
5262           CGF.IntTy, Data.Schedule.getPointer()
5263                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5264                          : NoSchedule),
5265       Data.Schedule.getPointer()
5266           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5267                                       /*isSigned=*/false)
5268           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5269       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5270                              Result.TaskDupFn, CGF.VoidPtrTy)
5271                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5272   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5273                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5274                       TaskArgs);
5275 }
5276 
5277 /// Emit reduction operation for each element of array (required for
5278 /// array sections) LHS op = RHS.
5279 /// \param Type Type of array.
5280 /// \param LHSVar Variable on the left side of the reduction operation
5281 /// (references element of array in original variable).
5282 /// \param RHSVar Variable on the right side of the reduction operation
5283 /// (references element of array in original variable).
5284 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5285 /// RHSVar.
5286 static void EmitOMPAggregateReduction(
5287     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5288     const VarDecl *RHSVar,
5289     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5290                                   const Expr *, const Expr *)> &RedOpGen,
5291     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5292     const Expr *UpExpr = nullptr) {
5293   // Perform element-by-element initialization.
5294   QualType ElementTy;
5295   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5296   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5297 
5298   // Drill down to the base element type on both arrays.
5299   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5300   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5301 
5302   llvm::Value *RHSBegin = RHSAddr.getPointer();
5303   llvm::Value *LHSBegin = LHSAddr.getPointer();
5304   // Cast from pointer to array type to pointer to single element.
5305   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5306   // The basic structure here is a while-do loop.
5307   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5308   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5309   llvm::Value *IsEmpty =
5310       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5311   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5312 
5313   // Enter the loop body, making that address the current address.
5314   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5315   CGF.EmitBlock(BodyBB);
5316 
5317   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5318 
5319   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5320       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5321   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5322   Address RHSElementCurrent =
5323       Address(RHSElementPHI,
5324               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5325 
5326   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5327       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5328   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5329   Address LHSElementCurrent =
5330       Address(LHSElementPHI,
5331               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5332 
5333   // Emit copy.
5334   CodeGenFunction::OMPPrivateScope Scope(CGF);
5335   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5336   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5337   Scope.Privatize();
5338   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5339   Scope.ForceCleanup();
5340 
5341   // Shift the address forward by one element.
5342   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5343       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5344   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5345       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5346   // Check whether we've reached the end.
5347   llvm::Value *Done =
5348       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5349   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5350   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5351   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5352 
5353   // Done.
5354   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5355 }
5356 
5357 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5358 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5359 /// UDR combiner function.
5360 static void emitReductionCombiner(CodeGenFunction &CGF,
5361                                   const Expr *ReductionOp) {
5362   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5363     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5364       if (const auto *DRE =
5365               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5366         if (const auto *DRD =
5367                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5368           std::pair<llvm::Function *, llvm::Function *> Reduction =
5369               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5370           RValue Func = RValue::get(Reduction.first);
5371           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5372           CGF.EmitIgnoredExpr(ReductionOp);
5373           return;
5374         }
5375   CGF.EmitIgnoredExpr(ReductionOp);
5376 }
5377 
5378 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5379     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5380     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5381     ArrayRef<const Expr *> ReductionOps) {
5382   ASTContext &C = CGM.getContext();
5383 
5384   // void reduction_func(void *LHSArg, void *RHSArg);
5385   FunctionArgList Args;
5386   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5387                            ImplicitParamDecl::Other);
5388   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5389                            ImplicitParamDecl::Other);
5390   Args.push_back(&LHSArg);
5391   Args.push_back(&RHSArg);
5392   const auto &CGFI =
5393       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5394   std::string Name = getName({"omp", "reduction", "reduction_func"});
5395   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5396                                     llvm::GlobalValue::InternalLinkage, Name,
5397                                     &CGM.getModule());
5398   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5399   Fn->setDoesNotRecurse();
5400   CodeGenFunction CGF(CGM);
5401   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5402 
5403   // Dst = (void*[n])(LHSArg);
5404   // Src = (void*[n])(RHSArg);
5405   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5406       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5407       ArgsType), CGF.getPointerAlign());
5408   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5409       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5410       ArgsType), CGF.getPointerAlign());
5411 
5412   //  ...
5413   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5414   //  ...
5415   CodeGenFunction::OMPPrivateScope Scope(CGF);
5416   auto IPriv = Privates.begin();
5417   unsigned Idx = 0;
5418   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5419     const auto *RHSVar =
5420         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5421     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5422       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5423     });
5424     const auto *LHSVar =
5425         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5426     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5427       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5428     });
5429     QualType PrivTy = (*IPriv)->getType();
5430     if (PrivTy->isVariablyModifiedType()) {
5431       // Get array size and emit VLA type.
5432       ++Idx;
5433       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5434       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5435       const VariableArrayType *VLA =
5436           CGF.getContext().getAsVariableArrayType(PrivTy);
5437       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5438       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5439           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5440       CGF.EmitVariablyModifiedType(PrivTy);
5441     }
5442   }
5443   Scope.Privatize();
5444   IPriv = Privates.begin();
5445   auto ILHS = LHSExprs.begin();
5446   auto IRHS = RHSExprs.begin();
5447   for (const Expr *E : ReductionOps) {
5448     if ((*IPriv)->getType()->isArrayType()) {
5449       // Emit reduction for array section.
5450       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5451       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5452       EmitOMPAggregateReduction(
5453           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5454           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5455             emitReductionCombiner(CGF, E);
5456           });
5457     } else {
5458       // Emit reduction for array subscript or single variable.
5459       emitReductionCombiner(CGF, E);
5460     }
5461     ++IPriv;
5462     ++ILHS;
5463     ++IRHS;
5464   }
5465   Scope.ForceCleanup();
5466   CGF.FinishFunction();
5467   return Fn;
5468 }
5469 
5470 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5471                                                   const Expr *ReductionOp,
5472                                                   const Expr *PrivateRef,
5473                                                   const DeclRefExpr *LHS,
5474                                                   const DeclRefExpr *RHS) {
5475   if (PrivateRef->getType()->isArrayType()) {
5476     // Emit reduction for array section.
5477     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5478     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5479     EmitOMPAggregateReduction(
5480         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5481         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5482           emitReductionCombiner(CGF, ReductionOp);
5483         });
5484   } else {
5485     // Emit reduction for array subscript or single variable.
5486     emitReductionCombiner(CGF, ReductionOp);
5487   }
5488 }
5489 
5490 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5491                                     ArrayRef<const Expr *> Privates,
5492                                     ArrayRef<const Expr *> LHSExprs,
5493                                     ArrayRef<const Expr *> RHSExprs,
5494                                     ArrayRef<const Expr *> ReductionOps,
5495                                     ReductionOptionsTy Options) {
5496   if (!CGF.HaveInsertPoint())
5497     return;
5498 
5499   bool WithNowait = Options.WithNowait;
5500   bool SimpleReduction = Options.SimpleReduction;
5501 
5502   // Next code should be emitted for reduction:
5503   //
5504   // static kmp_critical_name lock = { 0 };
5505   //
5506   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5507   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5508   //  ...
5509   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5510   //  *(Type<n>-1*)rhs[<n>-1]);
5511   // }
5512   //
5513   // ...
5514   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5515   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5516   // RedList, reduce_func, &<lock>)) {
5517   // case 1:
5518   //  ...
5519   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5520   //  ...
5521   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5522   // break;
5523   // case 2:
5524   //  ...
5525   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5526   //  ...
5527   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5528   // break;
5529   // default:;
5530   // }
5531   //
5532   // if SimpleReduction is true, only the next code is generated:
5533   //  ...
5534   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5535   //  ...
5536 
5537   ASTContext &C = CGM.getContext();
5538 
5539   if (SimpleReduction) {
5540     CodeGenFunction::RunCleanupsScope Scope(CGF);
5541     auto IPriv = Privates.begin();
5542     auto ILHS = LHSExprs.begin();
5543     auto IRHS = RHSExprs.begin();
5544     for (const Expr *E : ReductionOps) {
5545       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5546                                   cast<DeclRefExpr>(*IRHS));
5547       ++IPriv;
5548       ++ILHS;
5549       ++IRHS;
5550     }
5551     return;
5552   }
5553 
5554   // 1. Build a list of reduction variables.
5555   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5556   auto Size = RHSExprs.size();
5557   for (const Expr *E : Privates) {
5558     if (E->getType()->isVariablyModifiedType())
5559       // Reserve place for array size.
5560       ++Size;
5561   }
5562   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5563   QualType ReductionArrayTy =
5564       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5565                              /*IndexTypeQuals=*/0);
5566   Address ReductionList =
5567       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5568   auto IPriv = Privates.begin();
5569   unsigned Idx = 0;
5570   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5571     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5572     CGF.Builder.CreateStore(
5573         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5574             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5575         Elem);
5576     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5577       // Store array size.
5578       ++Idx;
5579       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5580       llvm::Value *Size = CGF.Builder.CreateIntCast(
5581           CGF.getVLASize(
5582                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5583               .NumElts,
5584           CGF.SizeTy, /*isSigned=*/false);
5585       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5586                               Elem);
5587     }
5588   }
5589 
5590   // 2. Emit reduce_func().
5591   llvm::Function *ReductionFn = emitReductionFunction(
5592       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5593       LHSExprs, RHSExprs, ReductionOps);
5594 
5595   // 3. Create static kmp_critical_name lock = { 0 };
5596   std::string Name = getName({"reduction"});
5597   llvm::Value *Lock = getCriticalRegionLock(Name);
5598 
5599   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5600   // RedList, reduce_func, &<lock>);
5601   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5602   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5603   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5604   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5605       ReductionList.getPointer(), CGF.VoidPtrTy);
5606   llvm::Value *Args[] = {
5607       IdentTLoc,                             // ident_t *<loc>
5608       ThreadId,                              // i32 <gtid>
5609       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5610       ReductionArrayTySize,                  // size_type sizeof(RedList)
5611       RL,                                    // void *RedList
5612       ReductionFn, // void (*) (void *, void *) <reduce_func>
5613       Lock         // kmp_critical_name *&<lock>
5614   };
5615   llvm::Value *Res = CGF.EmitRuntimeCall(
5616       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5617           CGM.getModule(),
5618           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5619       Args);
5620 
5621   // 5. Build switch(res)
5622   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5623   llvm::SwitchInst *SwInst =
5624       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5625 
5626   // 6. Build case 1:
5627   //  ...
5628   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5629   //  ...
5630   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5631   // break;
5632   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5633   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5634   CGF.EmitBlock(Case1BB);
5635 
5636   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5637   llvm::Value *EndArgs[] = {
5638       IdentTLoc, // ident_t *<loc>
5639       ThreadId,  // i32 <gtid>
5640       Lock       // kmp_critical_name *&<lock>
5641   };
5642   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5643                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5644     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5645     auto IPriv = Privates.begin();
5646     auto ILHS = LHSExprs.begin();
5647     auto IRHS = RHSExprs.begin();
5648     for (const Expr *E : ReductionOps) {
5649       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5650                                      cast<DeclRefExpr>(*IRHS));
5651       ++IPriv;
5652       ++ILHS;
5653       ++IRHS;
5654     }
5655   };
5656   RegionCodeGenTy RCG(CodeGen);
5657   CommonActionTy Action(
5658       nullptr, llvm::None,
5659       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5660           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5661                                       : OMPRTL___kmpc_end_reduce),
5662       EndArgs);
5663   RCG.setAction(Action);
5664   RCG(CGF);
5665 
5666   CGF.EmitBranch(DefaultBB);
5667 
5668   // 7. Build case 2:
5669   //  ...
5670   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5671   //  ...
5672   // break;
5673   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5674   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5675   CGF.EmitBlock(Case2BB);
5676 
5677   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5678                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5679     auto ILHS = LHSExprs.begin();
5680     auto IRHS = RHSExprs.begin();
5681     auto IPriv = Privates.begin();
5682     for (const Expr *E : ReductionOps) {
5683       const Expr *XExpr = nullptr;
5684       const Expr *EExpr = nullptr;
5685       const Expr *UpExpr = nullptr;
5686       BinaryOperatorKind BO = BO_Comma;
5687       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5688         if (BO->getOpcode() == BO_Assign) {
5689           XExpr = BO->getLHS();
5690           UpExpr = BO->getRHS();
5691         }
5692       }
5693       // Try to emit update expression as a simple atomic.
5694       const Expr *RHSExpr = UpExpr;
5695       if (RHSExpr) {
5696         // Analyze RHS part of the whole expression.
5697         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5698                 RHSExpr->IgnoreParenImpCasts())) {
5699           // If this is a conditional operator, analyze its condition for
5700           // min/max reduction operator.
5701           RHSExpr = ACO->getCond();
5702         }
5703         if (const auto *BORHS =
5704                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5705           EExpr = BORHS->getRHS();
5706           BO = BORHS->getOpcode();
5707         }
5708       }
5709       if (XExpr) {
5710         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5711         auto &&AtomicRedGen = [BO, VD,
5712                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5713                                     const Expr *EExpr, const Expr *UpExpr) {
5714           LValue X = CGF.EmitLValue(XExpr);
5715           RValue E;
5716           if (EExpr)
5717             E = CGF.EmitAnyExpr(EExpr);
5718           CGF.EmitOMPAtomicSimpleUpdateExpr(
5719               X, E, BO, /*IsXLHSInRHSPart=*/true,
5720               llvm::AtomicOrdering::Monotonic, Loc,
5721               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5722                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5723                 PrivateScope.addPrivate(
5724                     VD, [&CGF, VD, XRValue, Loc]() {
5725                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5726                       CGF.emitOMPSimpleStore(
5727                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5728                           VD->getType().getNonReferenceType(), Loc);
5729                       return LHSTemp;
5730                     });
5731                 (void)PrivateScope.Privatize();
5732                 return CGF.EmitAnyExpr(UpExpr);
5733               });
5734         };
5735         if ((*IPriv)->getType()->isArrayType()) {
5736           // Emit atomic reduction for array section.
5737           const auto *RHSVar =
5738               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5739           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5740                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5741         } else {
5742           // Emit atomic reduction for array subscript or single variable.
5743           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5744         }
5745       } else {
5746         // Emit as a critical region.
5747         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5748                                            const Expr *, const Expr *) {
5749           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5750           std::string Name = RT.getName({"atomic_reduction"});
5751           RT.emitCriticalRegion(
5752               CGF, Name,
5753               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5754                 Action.Enter(CGF);
5755                 emitReductionCombiner(CGF, E);
5756               },
5757               Loc);
5758         };
5759         if ((*IPriv)->getType()->isArrayType()) {
5760           const auto *LHSVar =
5761               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5762           const auto *RHSVar =
5763               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5764           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5765                                     CritRedGen);
5766         } else {
5767           CritRedGen(CGF, nullptr, nullptr, nullptr);
5768         }
5769       }
5770       ++ILHS;
5771       ++IRHS;
5772       ++IPriv;
5773     }
5774   };
5775   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5776   if (!WithNowait) {
5777     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5778     llvm::Value *EndArgs[] = {
5779         IdentTLoc, // ident_t *<loc>
5780         ThreadId,  // i32 <gtid>
5781         Lock       // kmp_critical_name *&<lock>
5782     };
5783     CommonActionTy Action(nullptr, llvm::None,
5784                           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
5785                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5786                           EndArgs);
5787     AtomicRCG.setAction(Action);
5788     AtomicRCG(CGF);
5789   } else {
5790     AtomicRCG(CGF);
5791   }
5792 
5793   CGF.EmitBranch(DefaultBB);
5794   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5795 }
5796 
5797 /// Generates unique name for artificial threadprivate variables.
5798 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5799 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5800                                       const Expr *Ref) {
5801   SmallString<256> Buffer;
5802   llvm::raw_svector_ostream Out(Buffer);
5803   const clang::DeclRefExpr *DE;
5804   const VarDecl *D = ::getBaseDecl(Ref, DE);
5805   if (!D)
5806     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5807   D = D->getCanonicalDecl();
5808   std::string Name = CGM.getOpenMPRuntime().getName(
5809       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5810   Out << Prefix << Name << "_"
5811       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5812   return std::string(Out.str());
5813 }
5814 
5815 /// Emits reduction initializer function:
5816 /// \code
5817 /// void @.red_init(void* %arg, void* %orig) {
5818 /// %0 = bitcast void* %arg to <type>*
5819 /// store <type> <init>, <type>* %0
5820 /// ret void
5821 /// }
5822 /// \endcode
5823 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5824                                            SourceLocation Loc,
5825                                            ReductionCodeGen &RCG, unsigned N) {
5826   ASTContext &C = CGM.getContext();
5827   QualType VoidPtrTy = C.VoidPtrTy;
5828   VoidPtrTy.addRestrict();
5829   FunctionArgList Args;
5830   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5831                           ImplicitParamDecl::Other);
5832   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5833                               ImplicitParamDecl::Other);
5834   Args.emplace_back(&Param);
5835   Args.emplace_back(&ParamOrig);
5836   const auto &FnInfo =
5837       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5838   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5839   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5840   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5841                                     Name, &CGM.getModule());
5842   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5843   Fn->setDoesNotRecurse();
5844   CodeGenFunction CGF(CGM);
5845   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5846   Address PrivateAddr = CGF.EmitLoadOfPointer(
5847       CGF.GetAddrOfLocalVar(&Param),
5848       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5849   llvm::Value *Size = nullptr;
5850   // If the size of the reduction item is non-constant, load it from global
5851   // threadprivate variable.
5852   if (RCG.getSizes(N).second) {
5853     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5854         CGF, CGM.getContext().getSizeType(),
5855         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5856     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5857                                 CGM.getContext().getSizeType(), Loc);
5858   }
5859   RCG.emitAggregateType(CGF, N, Size);
5860   LValue OrigLVal;
5861   // If initializer uses initializer from declare reduction construct, emit a
5862   // pointer to the address of the original reduction item (reuired by reduction
5863   // initializer)
5864   if (RCG.usesReductionInitializer(N)) {
5865     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5866     SharedAddr = CGF.EmitLoadOfPointer(
5867         SharedAddr,
5868         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5869     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5870   } else {
5871     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5872         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5873         CGM.getContext().VoidPtrTy);
5874   }
5875   // Emit the initializer:
5876   // %0 = bitcast void* %arg to <type>*
5877   // store <type> <init>, <type>* %0
5878   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5879                          [](CodeGenFunction &) { return false; });
5880   CGF.FinishFunction();
5881   return Fn;
5882 }
5883 
5884 /// Emits reduction combiner function:
5885 /// \code
5886 /// void @.red_comb(void* %arg0, void* %arg1) {
5887 /// %lhs = bitcast void* %arg0 to <type>*
5888 /// %rhs = bitcast void* %arg1 to <type>*
5889 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5890 /// store <type> %2, <type>* %lhs
5891 /// ret void
5892 /// }
5893 /// \endcode
5894 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5895                                            SourceLocation Loc,
5896                                            ReductionCodeGen &RCG, unsigned N,
5897                                            const Expr *ReductionOp,
5898                                            const Expr *LHS, const Expr *RHS,
5899                                            const Expr *PrivateRef) {
5900   ASTContext &C = CGM.getContext();
5901   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5902   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5903   FunctionArgList Args;
5904   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5905                                C.VoidPtrTy, ImplicitParamDecl::Other);
5906   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5907                             ImplicitParamDecl::Other);
5908   Args.emplace_back(&ParamInOut);
5909   Args.emplace_back(&ParamIn);
5910   const auto &FnInfo =
5911       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5912   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5913   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5914   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5915                                     Name, &CGM.getModule());
5916   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5917   Fn->setDoesNotRecurse();
5918   CodeGenFunction CGF(CGM);
5919   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5920   llvm::Value *Size = nullptr;
5921   // If the size of the reduction item is non-constant, load it from global
5922   // threadprivate variable.
5923   if (RCG.getSizes(N).second) {
5924     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5925         CGF, CGM.getContext().getSizeType(),
5926         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5927     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5928                                 CGM.getContext().getSizeType(), Loc);
5929   }
5930   RCG.emitAggregateType(CGF, N, Size);
5931   // Remap lhs and rhs variables to the addresses of the function arguments.
5932   // %lhs = bitcast void* %arg0 to <type>*
5933   // %rhs = bitcast void* %arg1 to <type>*
5934   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5935   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5936     // Pull out the pointer to the variable.
5937     Address PtrAddr = CGF.EmitLoadOfPointer(
5938         CGF.GetAddrOfLocalVar(&ParamInOut),
5939         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5940     return CGF.Builder.CreateElementBitCast(
5941         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5942   });
5943   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5944     // Pull out the pointer to the variable.
5945     Address PtrAddr = CGF.EmitLoadOfPointer(
5946         CGF.GetAddrOfLocalVar(&ParamIn),
5947         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5948     return CGF.Builder.CreateElementBitCast(
5949         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5950   });
5951   PrivateScope.Privatize();
5952   // Emit the combiner body:
5953   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5954   // store <type> %2, <type>* %lhs
5955   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5956       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5957       cast<DeclRefExpr>(RHS));
5958   CGF.FinishFunction();
5959   return Fn;
5960 }
5961 
5962 /// Emits reduction finalizer function:
5963 /// \code
5964 /// void @.red_fini(void* %arg) {
5965 /// %0 = bitcast void* %arg to <type>*
5966 /// <destroy>(<type>* %0)
5967 /// ret void
5968 /// }
5969 /// \endcode
5970 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5971                                            SourceLocation Loc,
5972                                            ReductionCodeGen &RCG, unsigned N) {
5973   if (!RCG.needCleanups(N))
5974     return nullptr;
5975   ASTContext &C = CGM.getContext();
5976   FunctionArgList Args;
5977   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5978                           ImplicitParamDecl::Other);
5979   Args.emplace_back(&Param);
5980   const auto &FnInfo =
5981       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5982   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5983   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5984   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5985                                     Name, &CGM.getModule());
5986   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5987   Fn->setDoesNotRecurse();
5988   CodeGenFunction CGF(CGM);
5989   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5990   Address PrivateAddr = CGF.EmitLoadOfPointer(
5991       CGF.GetAddrOfLocalVar(&Param),
5992       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5993   llvm::Value *Size = nullptr;
5994   // If the size of the reduction item is non-constant, load it from global
5995   // threadprivate variable.
5996   if (RCG.getSizes(N).second) {
5997     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5998         CGF, CGM.getContext().getSizeType(),
5999         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6000     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6001                                 CGM.getContext().getSizeType(), Loc);
6002   }
6003   RCG.emitAggregateType(CGF, N, Size);
6004   // Emit the finalizer body:
6005   // <destroy>(<type>* %0)
6006   RCG.emitCleanups(CGF, N, PrivateAddr);
6007   CGF.FinishFunction(Loc);
6008   return Fn;
6009 }
6010 
6011 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6012     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6013     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6014   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6015     return nullptr;
6016 
6017   // Build typedef struct:
6018   // kmp_taskred_input {
6019   //   void *reduce_shar; // shared reduction item
6020   //   void *reduce_orig; // original reduction item used for initialization
6021   //   size_t reduce_size; // size of data item
6022   //   void *reduce_init; // data initialization routine
6023   //   void *reduce_fini; // data finalization routine
6024   //   void *reduce_comb; // data combiner routine
6025   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6026   // } kmp_taskred_input_t;
6027   ASTContext &C = CGM.getContext();
6028   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6029   RD->startDefinition();
6030   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6032   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6033   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6034   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6035   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6036   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6037       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6038   RD->completeDefinition();
6039   QualType RDType = C.getRecordType(RD);
6040   unsigned Size = Data.ReductionVars.size();
6041   llvm::APInt ArraySize(/*numBits=*/64, Size);
6042   QualType ArrayRDType = C.getConstantArrayType(
6043       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6044   // kmp_task_red_input_t .rd_input.[Size];
6045   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6046   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6047                        Data.ReductionCopies, Data.ReductionOps);
6048   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6049     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6050     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6051                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6052     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6053         TaskRedInput.getPointer(), Idxs,
6054         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6055         ".rd_input.gep.");
6056     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6057     // ElemLVal.reduce_shar = &Shareds[Cnt];
6058     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6059     RCG.emitSharedOrigLValue(CGF, Cnt);
6060     llvm::Value *CastedShared =
6061         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6062     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6063     // ElemLVal.reduce_orig = &Origs[Cnt];
6064     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6065     llvm::Value *CastedOrig =
6066         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6067     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6068     RCG.emitAggregateType(CGF, Cnt);
6069     llvm::Value *SizeValInChars;
6070     llvm::Value *SizeVal;
6071     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6072     // We use delayed creation/initialization for VLAs and array sections. It is
6073     // required because runtime does not provide the way to pass the sizes of
6074     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6075     // threadprivate global variables are used to store these values and use
6076     // them in the functions.
6077     bool DelayedCreation = !!SizeVal;
6078     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6079                                                /*isSigned=*/false);
6080     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6081     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6082     // ElemLVal.reduce_init = init;
6083     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6084     llvm::Value *InitAddr =
6085         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6086     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6087     // ElemLVal.reduce_fini = fini;
6088     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6089     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6090     llvm::Value *FiniAddr = Fini
6091                                 ? CGF.EmitCastToVoidPtr(Fini)
6092                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6093     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6094     // ElemLVal.reduce_comb = comb;
6095     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6096     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6097         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6098         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6099     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6100     // ElemLVal.flags = 0;
6101     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6102     if (DelayedCreation) {
6103       CGF.EmitStoreOfScalar(
6104           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6105           FlagsLVal);
6106     } else
6107       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6108                                  FlagsLVal.getType());
6109   }
6110   if (Data.IsReductionWithTaskMod) {
6111     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6112     // is_ws, int num, void *data);
6113     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6114     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6115                                                   CGM.IntTy, /*isSigned=*/true);
6116     llvm::Value *Args[] = {
6117         IdentTLoc, GTid,
6118         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6119                                /*isSigned=*/true),
6120         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6121         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6122             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6123     return CGF.EmitRuntimeCall(
6124         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6125             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6126         Args);
6127   }
6128   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6129   llvm::Value *Args[] = {
6130       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6131                                 /*isSigned=*/true),
6132       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6133       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6134                                                       CGM.VoidPtrTy)};
6135   return CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6136                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6137                              Args);
6138 }
6139 
6140 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6141                                             SourceLocation Loc,
6142                                             bool IsWorksharingReduction) {
6143   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6144   // is_ws, int num, void *data);
6145   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6146   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6147                                                 CGM.IntTy, /*isSigned=*/true);
6148   llvm::Value *Args[] = {IdentTLoc, GTid,
6149                          llvm::ConstantInt::get(CGM.IntTy,
6150                                                 IsWorksharingReduction ? 1 : 0,
6151                                                 /*isSigned=*/true)};
6152   (void)CGF.EmitRuntimeCall(
6153       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6154           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6155       Args);
6156 }
6157 
6158 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6159                                               SourceLocation Loc,
6160                                               ReductionCodeGen &RCG,
6161                                               unsigned N) {
6162   auto Sizes = RCG.getSizes(N);
6163   // Emit threadprivate global variable if the type is non-constant
6164   // (Sizes.second = nullptr).
6165   if (Sizes.second) {
6166     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6167                                                      /*isSigned=*/false);
6168     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6169         CGF, CGM.getContext().getSizeType(),
6170         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6171     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6172   }
6173 }
6174 
6175 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6176                                               SourceLocation Loc,
6177                                               llvm::Value *ReductionsPtr,
6178                                               LValue SharedLVal) {
6179   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6180   // *d);
6181   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6182                                                    CGM.IntTy,
6183                                                    /*isSigned=*/true),
6184                          ReductionsPtr,
6185                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6186                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6187   return Address(
6188       CGF.EmitRuntimeCall(
6189           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6190               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6191           Args),
6192       SharedLVal.getAlignment());
6193 }
6194 
6195 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6196                                        SourceLocation Loc) {
6197   if (!CGF.HaveInsertPoint())
6198     return;
6199 
6200   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6201   if (OMPBuilder) {
6202     OMPBuilder->CreateTaskwait(CGF.Builder);
6203   } else {
6204     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6205     // global_tid);
6206     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6207     // Ignore return result until untied tasks are supported.
6208     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6209                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6210                         Args);
6211   }
6212 
6213   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6214     Region->emitUntiedSwitch(CGF);
6215 }
6216 
6217 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6218                                            OpenMPDirectiveKind InnerKind,
6219                                            const RegionCodeGenTy &CodeGen,
6220                                            bool HasCancel) {
6221   if (!CGF.HaveInsertPoint())
6222     return;
6223   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6224   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6225 }
6226 
6227 namespace {
6228 enum RTCancelKind {
6229   CancelNoreq = 0,
6230   CancelParallel = 1,
6231   CancelLoop = 2,
6232   CancelSections = 3,
6233   CancelTaskgroup = 4
6234 };
6235 } // anonymous namespace
6236 
6237 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6238   RTCancelKind CancelKind = CancelNoreq;
6239   if (CancelRegion == OMPD_parallel)
6240     CancelKind = CancelParallel;
6241   else if (CancelRegion == OMPD_for)
6242     CancelKind = CancelLoop;
6243   else if (CancelRegion == OMPD_sections)
6244     CancelKind = CancelSections;
6245   else {
6246     assert(CancelRegion == OMPD_taskgroup);
6247     CancelKind = CancelTaskgroup;
6248   }
6249   return CancelKind;
6250 }
6251 
6252 void CGOpenMPRuntime::emitCancellationPointCall(
6253     CodeGenFunction &CGF, SourceLocation Loc,
6254     OpenMPDirectiveKind CancelRegion) {
6255   if (!CGF.HaveInsertPoint())
6256     return;
6257   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6258   // global_tid, kmp_int32 cncl_kind);
6259   if (auto *OMPRegionInfo =
6260           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6261     // For 'cancellation point taskgroup', the task region info may not have a
6262     // cancel. This may instead happen in another adjacent task.
6263     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6264       llvm::Value *Args[] = {
6265           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6266           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6267       // Ignore return result until untied tasks are supported.
6268       llvm::Value *Result = CGF.EmitRuntimeCall(
6269           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6270               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6271           Args);
6272       // if (__kmpc_cancellationpoint()) {
6273       //   exit from construct;
6274       // }
6275       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6276       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6277       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6278       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6279       CGF.EmitBlock(ExitBB);
6280       // exit from construct;
6281       CodeGenFunction::JumpDest CancelDest =
6282           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6283       CGF.EmitBranchThroughCleanup(CancelDest);
6284       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6285     }
6286   }
6287 }
6288 
6289 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6290                                      const Expr *IfCond,
6291                                      OpenMPDirectiveKind CancelRegion) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6295   // kmp_int32 cncl_kind);
6296   auto &M = CGM.getModule();
6297   if (auto *OMPRegionInfo =
6298           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6299     auto &&ThenGen = [&M, Loc, CancelRegion,
6300                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6301       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6302       llvm::Value *Args[] = {
6303           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6304           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6305       // Ignore return result until untied tasks are supported.
6306       llvm::Value *Result =
6307           CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6308                                   M, OMPRTL___kmpc_cancel),
6309                               Args);
6310       // if (__kmpc_cancel()) {
6311       //   exit from construct;
6312       // }
6313       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6314       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6315       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6316       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6317       CGF.EmitBlock(ExitBB);
6318       // exit from construct;
6319       CodeGenFunction::JumpDest CancelDest =
6320           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6321       CGF.EmitBranchThroughCleanup(CancelDest);
6322       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6323     };
6324     if (IfCond) {
6325       emitIfClause(CGF, IfCond, ThenGen,
6326                    [](CodeGenFunction &, PrePostActionTy &) {});
6327     } else {
6328       RegionCodeGenTy ThenRCG(ThenGen);
6329       ThenRCG(CGF);
6330     }
6331   }
6332 }
6333 
6334 namespace {
6335 /// Cleanup action for uses_allocators support.
6336 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6337   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6338 
6339 public:
6340   OMPUsesAllocatorsActionTy(
6341       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6342       : Allocators(Allocators) {}
6343   void Enter(CodeGenFunction &CGF) override {
6344     if (!CGF.HaveInsertPoint())
6345       return;
6346     for (const auto &AllocatorData : Allocators) {
6347       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6348           CGF, AllocatorData.first, AllocatorData.second);
6349     }
6350   }
6351   void Exit(CodeGenFunction &CGF) override {
6352     if (!CGF.HaveInsertPoint())
6353       return;
6354     for (const auto &AllocatorData : Allocators) {
6355       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6356                                                         AllocatorData.first);
6357     }
6358   }
6359 };
6360 } // namespace
6361 
6362 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6363     const OMPExecutableDirective &D, StringRef ParentName,
6364     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6365     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6366   assert(!ParentName.empty() && "Invalid target region parent name!");
6367   HasEmittedTargetRegion = true;
6368   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6369   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6370     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6371       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6372       if (!D.AllocatorTraits)
6373         continue;
6374       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6375     }
6376   }
6377   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6378   CodeGen.setAction(UsesAllocatorAction);
6379   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6380                                    IsOffloadEntry, CodeGen);
6381 }
6382 
6383 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6384                                              const Expr *Allocator,
6385                                              const Expr *AllocatorTraits) {
6386   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6387   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6388   // Use default memspace handle.
6389   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6390   llvm::Value *NumTraits = llvm::ConstantInt::get(
6391       CGF.IntTy, cast<ConstantArrayType>(
6392                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6393                      ->getSize()
6394                      .getLimitedValue());
6395   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6396   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6397       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6398   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6399                                            AllocatorTraitsLVal.getBaseInfo(),
6400                                            AllocatorTraitsLVal.getTBAAInfo());
6401   llvm::Value *Traits =
6402       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6403 
6404   llvm::Value *AllocatorVal =
6405       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6406                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6407                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6408   // Store to allocator.
6409   CGF.EmitVarDecl(*cast<VarDecl>(
6410       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6411   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6412   AllocatorVal =
6413       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6414                                Allocator->getType(), Allocator->getExprLoc());
6415   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6416 }
6417 
6418 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6419                                              const Expr *Allocator) {
6420   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6421   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6422   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6423   llvm::Value *AllocatorVal =
6424       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6425   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6426                                           CGF.getContext().VoidPtrTy,
6427                                           Allocator->getExprLoc());
6428   (void)CGF.EmitRuntimeCall(
6429       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
6430           CGM.getModule(), OMPRTL___kmpc_destroy_allocator),
6431       {ThreadId, AllocatorVal});
6432 }
6433 
6434 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6435     const OMPExecutableDirective &D, StringRef ParentName,
6436     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6437     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6438   // Create a unique name for the entry function using the source location
6439   // information of the current target region. The name will be something like:
6440   //
6441   // __omp_offloading_DD_FFFF_PP_lBB
6442   //
6443   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6444   // mangled name of the function that encloses the target region and BB is the
6445   // line number of the target region.
6446 
6447   unsigned DeviceID;
6448   unsigned FileID;
6449   unsigned Line;
6450   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6451                            Line);
6452   SmallString<64> EntryFnName;
6453   {
6454     llvm::raw_svector_ostream OS(EntryFnName);
6455     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6456        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6457   }
6458 
6459   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6460 
6461   CodeGenFunction CGF(CGM, true);
6462   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6463   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6464 
6465   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6466 
6467   // If this target outline function is not an offload entry, we don't need to
6468   // register it.
6469   if (!IsOffloadEntry)
6470     return;
6471 
6472   // The target region ID is used by the runtime library to identify the current
6473   // target region, so it only has to be unique and not necessarily point to
6474   // anything. It could be the pointer to the outlined function that implements
6475   // the target region, but we aren't using that so that the compiler doesn't
6476   // need to keep that, and could therefore inline the host function if proven
6477   // worthwhile during optimization. In the other hand, if emitting code for the
6478   // device, the ID has to be the function address so that it can retrieved from
6479   // the offloading entry and launched by the runtime library. We also mark the
6480   // outlined function to have external linkage in case we are emitting code for
6481   // the device, because these functions will be entry points to the device.
6482 
6483   if (CGM.getLangOpts().OpenMPIsDevice) {
6484     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6485     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6486     OutlinedFn->setDSOLocal(false);
6487   } else {
6488     std::string Name = getName({EntryFnName, "region_id"});
6489     OutlinedFnID = new llvm::GlobalVariable(
6490         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6491         llvm::GlobalValue::WeakAnyLinkage,
6492         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6493   }
6494 
6495   // Register the information for the entry associated with this target region.
6496   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6497       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6498       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6499 }
6500 
6501 /// Checks if the expression is constant or does not have non-trivial function
6502 /// calls.
6503 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6504   // We can skip constant expressions.
6505   // We can skip expressions with trivial calls or simple expressions.
6506   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6507           !E->hasNonTrivialCall(Ctx)) &&
6508          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6509 }
6510 
6511 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6512                                                     const Stmt *Body) {
6513   const Stmt *Child = Body->IgnoreContainers();
6514   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6515     Child = nullptr;
6516     for (const Stmt *S : C->body()) {
6517       if (const auto *E = dyn_cast<Expr>(S)) {
6518         if (isTrivial(Ctx, E))
6519           continue;
6520       }
6521       // Some of the statements can be ignored.
6522       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6523           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6524         continue;
6525       // Analyze declarations.
6526       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6527         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6528               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6529                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6530                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6531                   isa<UsingDirectiveDecl>(D) ||
6532                   isa<OMPDeclareReductionDecl>(D) ||
6533                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6534                 return true;
6535               const auto *VD = dyn_cast<VarDecl>(D);
6536               if (!VD)
6537                 return false;
6538               return VD->isConstexpr() ||
6539                      ((VD->getType().isTrivialType(Ctx) ||
6540                        VD->getType()->isReferenceType()) &&
6541                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6542             }))
6543           continue;
6544       }
6545       // Found multiple children - cannot get the one child only.
6546       if (Child)
6547         return nullptr;
6548       Child = S;
6549     }
6550     if (Child)
6551       Child = Child->IgnoreContainers();
6552   }
6553   return Child;
6554 }
6555 
6556 /// Emit the number of teams for a target directive.  Inspect the num_teams
6557 /// clause associated with a teams construct combined or closely nested
6558 /// with the target directive.
6559 ///
6560 /// Emit a team of size one for directives such as 'target parallel' that
6561 /// have no associated teams construct.
6562 ///
6563 /// Otherwise, return nullptr.
6564 static llvm::Value *
6565 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6566                                const OMPExecutableDirective &D) {
6567   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6568          "Clauses associated with the teams directive expected to be emitted "
6569          "only for the host!");
6570   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6571   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6572          "Expected target-based executable directive.");
6573   CGBuilderTy &Bld = CGF.Builder;
6574   switch (DirectiveKind) {
6575   case OMPD_target: {
6576     const auto *CS = D.getInnermostCapturedStmt();
6577     const auto *Body =
6578         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6579     const Stmt *ChildStmt =
6580         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6581     if (const auto *NestedDir =
6582             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6583       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6584         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6585           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6586           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6587           const Expr *NumTeams =
6588               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6589           llvm::Value *NumTeamsVal =
6590               CGF.EmitScalarExpr(NumTeams,
6591                                  /*IgnoreResultAssign*/ true);
6592           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6593                                    /*isSigned=*/true);
6594         }
6595         return Bld.getInt32(0);
6596       }
6597       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6598           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6599         return Bld.getInt32(1);
6600       return Bld.getInt32(0);
6601     }
6602     return nullptr;
6603   }
6604   case OMPD_target_teams:
6605   case OMPD_target_teams_distribute:
6606   case OMPD_target_teams_distribute_simd:
6607   case OMPD_target_teams_distribute_parallel_for:
6608   case OMPD_target_teams_distribute_parallel_for_simd: {
6609     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6610       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6611       const Expr *NumTeams =
6612           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6613       llvm::Value *NumTeamsVal =
6614           CGF.EmitScalarExpr(NumTeams,
6615                              /*IgnoreResultAssign*/ true);
6616       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6617                                /*isSigned=*/true);
6618     }
6619     return Bld.getInt32(0);
6620   }
6621   case OMPD_target_parallel:
6622   case OMPD_target_parallel_for:
6623   case OMPD_target_parallel_for_simd:
6624   case OMPD_target_simd:
6625     return Bld.getInt32(1);
6626   case OMPD_parallel:
6627   case OMPD_for:
6628   case OMPD_parallel_for:
6629   case OMPD_parallel_master:
6630   case OMPD_parallel_sections:
6631   case OMPD_for_simd:
6632   case OMPD_parallel_for_simd:
6633   case OMPD_cancel:
6634   case OMPD_cancellation_point:
6635   case OMPD_ordered:
6636   case OMPD_threadprivate:
6637   case OMPD_allocate:
6638   case OMPD_task:
6639   case OMPD_simd:
6640   case OMPD_sections:
6641   case OMPD_section:
6642   case OMPD_single:
6643   case OMPD_master:
6644   case OMPD_critical:
6645   case OMPD_taskyield:
6646   case OMPD_barrier:
6647   case OMPD_taskwait:
6648   case OMPD_taskgroup:
6649   case OMPD_atomic:
6650   case OMPD_flush:
6651   case OMPD_depobj:
6652   case OMPD_scan:
6653   case OMPD_teams:
6654   case OMPD_target_data:
6655   case OMPD_target_exit_data:
6656   case OMPD_target_enter_data:
6657   case OMPD_distribute:
6658   case OMPD_distribute_simd:
6659   case OMPD_distribute_parallel_for:
6660   case OMPD_distribute_parallel_for_simd:
6661   case OMPD_teams_distribute:
6662   case OMPD_teams_distribute_simd:
6663   case OMPD_teams_distribute_parallel_for:
6664   case OMPD_teams_distribute_parallel_for_simd:
6665   case OMPD_target_update:
6666   case OMPD_declare_simd:
6667   case OMPD_declare_variant:
6668   case OMPD_begin_declare_variant:
6669   case OMPD_end_declare_variant:
6670   case OMPD_declare_target:
6671   case OMPD_end_declare_target:
6672   case OMPD_declare_reduction:
6673   case OMPD_declare_mapper:
6674   case OMPD_taskloop:
6675   case OMPD_taskloop_simd:
6676   case OMPD_master_taskloop:
6677   case OMPD_master_taskloop_simd:
6678   case OMPD_parallel_master_taskloop:
6679   case OMPD_parallel_master_taskloop_simd:
6680   case OMPD_requires:
6681   case OMPD_unknown:
6682     break;
6683   default:
6684     break;
6685   }
6686   llvm_unreachable("Unexpected directive kind.");
6687 }
6688 
6689 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6690                                   llvm::Value *DefaultThreadLimitVal) {
6691   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6692       CGF.getContext(), CS->getCapturedStmt());
6693   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6694     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6695       llvm::Value *NumThreads = nullptr;
6696       llvm::Value *CondVal = nullptr;
6697       // Handle if clause. If if clause present, the number of threads is
6698       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6699       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6700         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6701         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6702         const OMPIfClause *IfClause = nullptr;
6703         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6704           if (C->getNameModifier() == OMPD_unknown ||
6705               C->getNameModifier() == OMPD_parallel) {
6706             IfClause = C;
6707             break;
6708           }
6709         }
6710         if (IfClause) {
6711           const Expr *Cond = IfClause->getCondition();
6712           bool Result;
6713           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6714             if (!Result)
6715               return CGF.Builder.getInt32(1);
6716           } else {
6717             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6718             if (const auto *PreInit =
6719                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6720               for (const auto *I : PreInit->decls()) {
6721                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6722                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6723                 } else {
6724                   CodeGenFunction::AutoVarEmission Emission =
6725                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6726                   CGF.EmitAutoVarCleanups(Emission);
6727                 }
6728               }
6729             }
6730             CondVal = CGF.EvaluateExprAsBool(Cond);
6731           }
6732         }
6733       }
6734       // Check the value of num_threads clause iff if clause was not specified
6735       // or is not evaluated to false.
6736       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6737         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6738         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6739         const auto *NumThreadsClause =
6740             Dir->getSingleClause<OMPNumThreadsClause>();
6741         CodeGenFunction::LexicalScope Scope(
6742             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6743         if (const auto *PreInit =
6744                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6745           for (const auto *I : PreInit->decls()) {
6746             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6747               CGF.EmitVarDecl(cast<VarDecl>(*I));
6748             } else {
6749               CodeGenFunction::AutoVarEmission Emission =
6750                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6751               CGF.EmitAutoVarCleanups(Emission);
6752             }
6753           }
6754         }
6755         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6756         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6757                                                /*isSigned=*/false);
6758         if (DefaultThreadLimitVal)
6759           NumThreads = CGF.Builder.CreateSelect(
6760               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6761               DefaultThreadLimitVal, NumThreads);
6762       } else {
6763         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6764                                            : CGF.Builder.getInt32(0);
6765       }
6766       // Process condition of the if clause.
6767       if (CondVal) {
6768         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6769                                               CGF.Builder.getInt32(1));
6770       }
6771       return NumThreads;
6772     }
6773     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6774       return CGF.Builder.getInt32(1);
6775     return DefaultThreadLimitVal;
6776   }
6777   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6778                                : CGF.Builder.getInt32(0);
6779 }
6780 
6781 /// Emit the number of threads for a target directive.  Inspect the
6782 /// thread_limit clause associated with a teams construct combined or closely
6783 /// nested with the target directive.
6784 ///
6785 /// Emit the num_threads clause for directives such as 'target parallel' that
6786 /// have no associated teams construct.
6787 ///
6788 /// Otherwise, return nullptr.
6789 static llvm::Value *
6790 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6791                                  const OMPExecutableDirective &D) {
6792   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6793          "Clauses associated with the teams directive expected to be emitted "
6794          "only for the host!");
6795   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6796   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6797          "Expected target-based executable directive.");
6798   CGBuilderTy &Bld = CGF.Builder;
6799   llvm::Value *ThreadLimitVal = nullptr;
6800   llvm::Value *NumThreadsVal = nullptr;
6801   switch (DirectiveKind) {
6802   case OMPD_target: {
6803     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6804     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6805       return NumThreads;
6806     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6807         CGF.getContext(), CS->getCapturedStmt());
6808     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6809       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6810         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6811         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6812         const auto *ThreadLimitClause =
6813             Dir->getSingleClause<OMPThreadLimitClause>();
6814         CodeGenFunction::LexicalScope Scope(
6815             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6816         if (const auto *PreInit =
6817                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6818           for (const auto *I : PreInit->decls()) {
6819             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6820               CGF.EmitVarDecl(cast<VarDecl>(*I));
6821             } else {
6822               CodeGenFunction::AutoVarEmission Emission =
6823                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6824               CGF.EmitAutoVarCleanups(Emission);
6825             }
6826           }
6827         }
6828         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6829             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6830         ThreadLimitVal =
6831             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6832       }
6833       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6834           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6835         CS = Dir->getInnermostCapturedStmt();
6836         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6837             CGF.getContext(), CS->getCapturedStmt());
6838         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6839       }
6840       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6841           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6842         CS = Dir->getInnermostCapturedStmt();
6843         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6844           return NumThreads;
6845       }
6846       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6847         return Bld.getInt32(1);
6848     }
6849     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6850   }
6851   case OMPD_target_teams: {
6852     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6853       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6854       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6855       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6856           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6857       ThreadLimitVal =
6858           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6859     }
6860     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6861     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6862       return NumThreads;
6863     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6864         CGF.getContext(), CS->getCapturedStmt());
6865     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6866       if (Dir->getDirectiveKind() == OMPD_distribute) {
6867         CS = Dir->getInnermostCapturedStmt();
6868         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6869           return NumThreads;
6870       }
6871     }
6872     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6873   }
6874   case OMPD_target_teams_distribute:
6875     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6876       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6877       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6878       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6879           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6880       ThreadLimitVal =
6881           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6882     }
6883     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6884   case OMPD_target_parallel:
6885   case OMPD_target_parallel_for:
6886   case OMPD_target_parallel_for_simd:
6887   case OMPD_target_teams_distribute_parallel_for:
6888   case OMPD_target_teams_distribute_parallel_for_simd: {
6889     llvm::Value *CondVal = nullptr;
6890     // Handle if clause. If if clause present, the number of threads is
6891     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6892     if (D.hasClausesOfKind<OMPIfClause>()) {
6893       const OMPIfClause *IfClause = nullptr;
6894       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6895         if (C->getNameModifier() == OMPD_unknown ||
6896             C->getNameModifier() == OMPD_parallel) {
6897           IfClause = C;
6898           break;
6899         }
6900       }
6901       if (IfClause) {
6902         const Expr *Cond = IfClause->getCondition();
6903         bool Result;
6904         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6905           if (!Result)
6906             return Bld.getInt32(1);
6907         } else {
6908           CodeGenFunction::RunCleanupsScope Scope(CGF);
6909           CondVal = CGF.EvaluateExprAsBool(Cond);
6910         }
6911       }
6912     }
6913     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6914       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6915       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6916       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6917           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6918       ThreadLimitVal =
6919           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6920     }
6921     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6922       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6923       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6924       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6925           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6926       NumThreadsVal =
6927           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6928       ThreadLimitVal = ThreadLimitVal
6929                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6930                                                                 ThreadLimitVal),
6931                                               NumThreadsVal, ThreadLimitVal)
6932                            : NumThreadsVal;
6933     }
6934     if (!ThreadLimitVal)
6935       ThreadLimitVal = Bld.getInt32(0);
6936     if (CondVal)
6937       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6938     return ThreadLimitVal;
6939   }
6940   case OMPD_target_teams_distribute_simd:
6941   case OMPD_target_simd:
6942     return Bld.getInt32(1);
6943   case OMPD_parallel:
6944   case OMPD_for:
6945   case OMPD_parallel_for:
6946   case OMPD_parallel_master:
6947   case OMPD_parallel_sections:
6948   case OMPD_for_simd:
6949   case OMPD_parallel_for_simd:
6950   case OMPD_cancel:
6951   case OMPD_cancellation_point:
6952   case OMPD_ordered:
6953   case OMPD_threadprivate:
6954   case OMPD_allocate:
6955   case OMPD_task:
6956   case OMPD_simd:
6957   case OMPD_sections:
6958   case OMPD_section:
6959   case OMPD_single:
6960   case OMPD_master:
6961   case OMPD_critical:
6962   case OMPD_taskyield:
6963   case OMPD_barrier:
6964   case OMPD_taskwait:
6965   case OMPD_taskgroup:
6966   case OMPD_atomic:
6967   case OMPD_flush:
6968   case OMPD_depobj:
6969   case OMPD_scan:
6970   case OMPD_teams:
6971   case OMPD_target_data:
6972   case OMPD_target_exit_data:
6973   case OMPD_target_enter_data:
6974   case OMPD_distribute:
6975   case OMPD_distribute_simd:
6976   case OMPD_distribute_parallel_for:
6977   case OMPD_distribute_parallel_for_simd:
6978   case OMPD_teams_distribute:
6979   case OMPD_teams_distribute_simd:
6980   case OMPD_teams_distribute_parallel_for:
6981   case OMPD_teams_distribute_parallel_for_simd:
6982   case OMPD_target_update:
6983   case OMPD_declare_simd:
6984   case OMPD_declare_variant:
6985   case OMPD_begin_declare_variant:
6986   case OMPD_end_declare_variant:
6987   case OMPD_declare_target:
6988   case OMPD_end_declare_target:
6989   case OMPD_declare_reduction:
6990   case OMPD_declare_mapper:
6991   case OMPD_taskloop:
6992   case OMPD_taskloop_simd:
6993   case OMPD_master_taskloop:
6994   case OMPD_master_taskloop_simd:
6995   case OMPD_parallel_master_taskloop:
6996   case OMPD_parallel_master_taskloop_simd:
6997   case OMPD_requires:
6998   case OMPD_unknown:
6999     break;
7000   default:
7001     break;
7002   }
7003   llvm_unreachable("Unsupported directive kind.");
7004 }
7005 
7006 namespace {
7007 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7008 
7009 // Utility to handle information from clauses associated with a given
7010 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7011 // It provides a convenient interface to obtain the information and generate
7012 // code for that information.
7013 class MappableExprsHandler {
7014 public:
7015   /// Values for bit flags used to specify the mapping type for
7016   /// offloading.
7017   enum OpenMPOffloadMappingFlags : uint64_t {
7018     /// No flags
7019     OMP_MAP_NONE = 0x0,
7020     /// Allocate memory on the device and move data from host to device.
7021     OMP_MAP_TO = 0x01,
7022     /// Allocate memory on the device and move data from device to host.
7023     OMP_MAP_FROM = 0x02,
7024     /// Always perform the requested mapping action on the element, even
7025     /// if it was already mapped before.
7026     OMP_MAP_ALWAYS = 0x04,
7027     /// Delete the element from the device environment, ignoring the
7028     /// current reference count associated with the element.
7029     OMP_MAP_DELETE = 0x08,
7030     /// The element being mapped is a pointer-pointee pair; both the
7031     /// pointer and the pointee should be mapped.
7032     OMP_MAP_PTR_AND_OBJ = 0x10,
7033     /// This flags signals that the base address of an entry should be
7034     /// passed to the target kernel as an argument.
7035     OMP_MAP_TARGET_PARAM = 0x20,
7036     /// Signal that the runtime library has to return the device pointer
7037     /// in the current position for the data being mapped. Used when we have the
7038     /// use_device_ptr or use_device_addr clause.
7039     OMP_MAP_RETURN_PARAM = 0x40,
7040     /// This flag signals that the reference being passed is a pointer to
7041     /// private data.
7042     OMP_MAP_PRIVATE = 0x80,
7043     /// Pass the element to the device by value.
7044     OMP_MAP_LITERAL = 0x100,
7045     /// Implicit map
7046     OMP_MAP_IMPLICIT = 0x200,
7047     /// Close is a hint to the runtime to allocate memory close to
7048     /// the target device.
7049     OMP_MAP_CLOSE = 0x400,
7050     /// The 16 MSBs of the flags indicate whether the entry is member of some
7051     /// struct/class.
7052     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7053     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7054   };
7055 
7056   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7057   static unsigned getFlagMemberOffset() {
7058     unsigned Offset = 0;
7059     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7060          Remain = Remain >> 1)
7061       Offset++;
7062     return Offset;
7063   }
7064 
7065   /// Class that associates information with a base pointer to be passed to the
7066   /// runtime library.
7067   class BasePointerInfo {
7068     /// The base pointer.
7069     llvm::Value *Ptr = nullptr;
7070     /// The base declaration that refers to this device pointer, or null if
7071     /// there is none.
7072     const ValueDecl *DevPtrDecl = nullptr;
7073 
7074   public:
7075     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7076         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7077     llvm::Value *operator*() const { return Ptr; }
7078     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7079     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7080   };
7081 
7082   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7083   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7084   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7085 
7086   /// Map between a struct and the its lowest & highest elements which have been
7087   /// mapped.
7088   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7089   ///                    HE(FieldIndex, Pointer)}
7090   struct StructRangeInfoTy {
7091     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7092         0, Address::invalid()};
7093     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7094         0, Address::invalid()};
7095     Address Base = Address::invalid();
7096   };
7097 
7098 private:
7099   /// Kind that defines how a device pointer has to be returned.
7100   struct MapInfo {
7101     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7102     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7103     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7104     bool ReturnDevicePointer = false;
7105     bool IsImplicit = false;
7106     bool ForDeviceAddr = false;
7107 
7108     MapInfo() = default;
7109     MapInfo(
7110         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7111         OpenMPMapClauseKind MapType,
7112         ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7113         bool IsImplicit, bool ForDeviceAddr = false)
7114         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7115           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7116           ForDeviceAddr(ForDeviceAddr) {}
7117   };
7118 
7119   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7120   /// member and there is no map information about it, then emission of that
7121   /// entry is deferred until the whole struct has been processed.
7122   struct DeferredDevicePtrEntryTy {
7123     const Expr *IE = nullptr;
7124     const ValueDecl *VD = nullptr;
7125     bool ForDeviceAddr = false;
7126 
7127     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7128                              bool ForDeviceAddr)
7129         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7130   };
7131 
7132   /// The target directive from where the mappable clauses were extracted. It
7133   /// is either a executable directive or a user-defined mapper directive.
7134   llvm::PointerUnion<const OMPExecutableDirective *,
7135                      const OMPDeclareMapperDecl *>
7136       CurDir;
7137 
7138   /// Function the directive is being generated for.
7139   CodeGenFunction &CGF;
7140 
7141   /// Set of all first private variables in the current directive.
7142   /// bool data is set to true if the variable is implicitly marked as
7143   /// firstprivate, false otherwise.
7144   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7145 
7146   /// Map between device pointer declarations and their expression components.
7147   /// The key value for declarations in 'this' is null.
7148   llvm::DenseMap<
7149       const ValueDecl *,
7150       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7151       DevPointersMap;
7152 
7153   llvm::Value *getExprTypeSize(const Expr *E) const {
7154     QualType ExprTy = E->getType().getCanonicalType();
7155 
7156     // Calculate the size for array shaping expression.
7157     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7158       llvm::Value *Size =
7159           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7160       for (const Expr *SE : OAE->getDimensions()) {
7161         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7162         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7163                                       CGF.getContext().getSizeType(),
7164                                       SE->getExprLoc());
7165         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7166       }
7167       return Size;
7168     }
7169 
7170     // Reference types are ignored for mapping purposes.
7171     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7172       ExprTy = RefTy->getPointeeType().getCanonicalType();
7173 
7174     // Given that an array section is considered a built-in type, we need to
7175     // do the calculation based on the length of the section instead of relying
7176     // on CGF.getTypeSize(E->getType()).
7177     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7178       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7179                             OAE->getBase()->IgnoreParenImpCasts())
7180                             .getCanonicalType();
7181 
7182       // If there is no length associated with the expression and lower bound is
7183       // not specified too, that means we are using the whole length of the
7184       // base.
7185       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7186           !OAE->getLowerBound())
7187         return CGF.getTypeSize(BaseTy);
7188 
7189       llvm::Value *ElemSize;
7190       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7191         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7192       } else {
7193         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7194         assert(ATy && "Expecting array type if not a pointer type.");
7195         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7196       }
7197 
7198       // If we don't have a length at this point, that is because we have an
7199       // array section with a single element.
7200       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7201         return ElemSize;
7202 
7203       if (const Expr *LenExpr = OAE->getLength()) {
7204         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7205         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7206                                              CGF.getContext().getSizeType(),
7207                                              LenExpr->getExprLoc());
7208         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7209       }
7210       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7211              OAE->getLowerBound() && "expected array_section[lb:].");
7212       // Size = sizetype - lb * elemtype;
7213       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7214       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7215       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7216                                        CGF.getContext().getSizeType(),
7217                                        OAE->getLowerBound()->getExprLoc());
7218       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7219       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7220       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7221       LengthVal = CGF.Builder.CreateSelect(
7222           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7223       return LengthVal;
7224     }
7225     return CGF.getTypeSize(ExprTy);
7226   }
7227 
7228   /// Return the corresponding bits for a given map clause modifier. Add
7229   /// a flag marking the map as a pointer if requested. Add a flag marking the
7230   /// map as the first one of a series of maps that relate to the same map
7231   /// expression.
7232   OpenMPOffloadMappingFlags getMapTypeBits(
7233       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7234       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7235     OpenMPOffloadMappingFlags Bits =
7236         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7237     switch (MapType) {
7238     case OMPC_MAP_alloc:
7239     case OMPC_MAP_release:
7240       // alloc and release is the default behavior in the runtime library,  i.e.
7241       // if we don't pass any bits alloc/release that is what the runtime is
7242       // going to do. Therefore, we don't need to signal anything for these two
7243       // type modifiers.
7244       break;
7245     case OMPC_MAP_to:
7246       Bits |= OMP_MAP_TO;
7247       break;
7248     case OMPC_MAP_from:
7249       Bits |= OMP_MAP_FROM;
7250       break;
7251     case OMPC_MAP_tofrom:
7252       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7253       break;
7254     case OMPC_MAP_delete:
7255       Bits |= OMP_MAP_DELETE;
7256       break;
7257     case OMPC_MAP_unknown:
7258       llvm_unreachable("Unexpected map type!");
7259     }
7260     if (AddPtrFlag)
7261       Bits |= OMP_MAP_PTR_AND_OBJ;
7262     if (AddIsTargetParamFlag)
7263       Bits |= OMP_MAP_TARGET_PARAM;
7264     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7265         != MapModifiers.end())
7266       Bits |= OMP_MAP_ALWAYS;
7267     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7268         != MapModifiers.end())
7269       Bits |= OMP_MAP_CLOSE;
7270     return Bits;
7271   }
7272 
7273   /// Return true if the provided expression is a final array section. A
7274   /// final array section, is one whose length can't be proved to be one.
7275   bool isFinalArraySectionExpression(const Expr *E) const {
7276     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7277 
7278     // It is not an array section and therefore not a unity-size one.
7279     if (!OASE)
7280       return false;
7281 
7282     // An array section with no colon always refer to a single element.
7283     if (OASE->getColonLoc().isInvalid())
7284       return false;
7285 
7286     const Expr *Length = OASE->getLength();
7287 
7288     // If we don't have a length we have to check if the array has size 1
7289     // for this dimension. Also, we should always expect a length if the
7290     // base type is pointer.
7291     if (!Length) {
7292       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7293                              OASE->getBase()->IgnoreParenImpCasts())
7294                              .getCanonicalType();
7295       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7296         return ATy->getSize().getSExtValue() != 1;
7297       // If we don't have a constant dimension length, we have to consider
7298       // the current section as having any size, so it is not necessarily
7299       // unitary. If it happen to be unity size, that's user fault.
7300       return true;
7301     }
7302 
7303     // Check if the length evaluates to 1.
7304     Expr::EvalResult Result;
7305     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7306       return true; // Can have more that size 1.
7307 
7308     llvm::APSInt ConstLength = Result.Val.getInt();
7309     return ConstLength.getSExtValue() != 1;
7310   }
7311 
7312   /// Generate the base pointers, section pointers, sizes and map type
7313   /// bits for the provided map type, map modifier, and expression components.
7314   /// \a IsFirstComponent should be set to true if the provided set of
7315   /// components is the first associated with a capture.
7316   void generateInfoForComponentList(
7317       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7318       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7319       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7320       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7321       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7322       bool IsImplicit, bool ForDeviceAddr = false,
7323       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7324           OverlappedElements = llvm::None) const {
7325     // The following summarizes what has to be generated for each map and the
7326     // types below. The generated information is expressed in this order:
7327     // base pointer, section pointer, size, flags
7328     // (to add to the ones that come from the map type and modifier).
7329     //
7330     // double d;
7331     // int i[100];
7332     // float *p;
7333     //
7334     // struct S1 {
7335     //   int i;
7336     //   float f[50];
7337     // }
7338     // struct S2 {
7339     //   int i;
7340     //   float f[50];
7341     //   S1 s;
7342     //   double *p;
7343     //   struct S2 *ps;
7344     // }
7345     // S2 s;
7346     // S2 *ps;
7347     //
7348     // map(d)
7349     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7350     //
7351     // map(i)
7352     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7353     //
7354     // map(i[1:23])
7355     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7356     //
7357     // map(p)
7358     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7359     //
7360     // map(p[1:24])
7361     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7362     //
7363     // map(s)
7364     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7365     //
7366     // map(s.i)
7367     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7368     //
7369     // map(s.s.f)
7370     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7371     //
7372     // map(s.p)
7373     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7374     //
7375     // map(to: s.p[:22])
7376     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7377     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7378     // &(s.p), &(s.p[0]), 22*sizeof(double),
7379     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7380     // (*) alloc space for struct members, only this is a target parameter
7381     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7382     //      optimizes this entry out, same in the examples below)
7383     // (***) map the pointee (map: to)
7384     //
7385     // map(s.ps)
7386     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7387     //
7388     // map(from: s.ps->s.i)
7389     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7390     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7391     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7392     //
7393     // map(to: s.ps->ps)
7394     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7395     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7396     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7397     //
7398     // map(s.ps->ps->ps)
7399     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7400     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7401     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7402     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7403     //
7404     // map(to: s.ps->ps->s.f[:22])
7405     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7406     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7407     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7408     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7409     //
7410     // map(ps)
7411     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7412     //
7413     // map(ps->i)
7414     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7415     //
7416     // map(ps->s.f)
7417     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7418     //
7419     // map(from: ps->p)
7420     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7421     //
7422     // map(to: ps->p[:22])
7423     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7424     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7425     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7426     //
7427     // map(ps->ps)
7428     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7429     //
7430     // map(from: ps->ps->s.i)
7431     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7432     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7433     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7434     //
7435     // map(from: ps->ps->ps)
7436     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7437     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7438     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7439     //
7440     // map(ps->ps->ps->ps)
7441     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7442     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7443     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7444     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7445     //
7446     // map(to: ps->ps->ps->s.f[:22])
7447     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7448     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7449     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7450     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7451     //
7452     // map(to: s.f[:22]) map(from: s.p[:33])
7453     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7454     //     sizeof(double*) (**), TARGET_PARAM
7455     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7456     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7457     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7458     // (*) allocate contiguous space needed to fit all mapped members even if
7459     //     we allocate space for members not mapped (in this example,
7460     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7461     //     them as well because they fall between &s.f[0] and &s.p)
7462     //
7463     // map(from: s.f[:22]) map(to: ps->p[:33])
7464     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7465     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7466     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7467     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7468     // (*) the struct this entry pertains to is the 2nd element in the list of
7469     //     arguments, hence MEMBER_OF(2)
7470     //
7471     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7472     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7473     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7474     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7475     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7476     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7477     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7478     // (*) the struct this entry pertains to is the 4th element in the list
7479     //     of arguments, hence MEMBER_OF(4)
7480 
7481     // Track if the map information being generated is the first for a capture.
7482     bool IsCaptureFirstInfo = IsFirstComponentList;
7483     // When the variable is on a declare target link or in a to clause with
7484     // unified memory, a reference is needed to hold the host/device address
7485     // of the variable.
7486     bool RequiresReference = false;
7487 
7488     // Scan the components from the base to the complete expression.
7489     auto CI = Components.rbegin();
7490     auto CE = Components.rend();
7491     auto I = CI;
7492 
7493     // Track if the map information being generated is the first for a list of
7494     // components.
7495     bool IsExpressionFirstInfo = true;
7496     Address BP = Address::invalid();
7497     const Expr *AssocExpr = I->getAssociatedExpression();
7498     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7499     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7500     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7501 
7502     if (isa<MemberExpr>(AssocExpr)) {
7503       // The base is the 'this' pointer. The content of the pointer is going
7504       // to be the base of the field being mapped.
7505       BP = CGF.LoadCXXThisAddress();
7506     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7507                (OASE &&
7508                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7509       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7510     } else if (OAShE &&
7511                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7512       BP = Address(
7513           CGF.EmitScalarExpr(OAShE->getBase()),
7514           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7515     } else {
7516       // The base is the reference to the variable.
7517       // BP = &Var.
7518       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7519       if (const auto *VD =
7520               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7521         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7522                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7523           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7524               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7525                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7526             RequiresReference = true;
7527             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7528           }
7529         }
7530       }
7531 
7532       // If the variable is a pointer and is being dereferenced (i.e. is not
7533       // the last component), the base has to be the pointer itself, not its
7534       // reference. References are ignored for mapping purposes.
7535       QualType Ty =
7536           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7537       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7538         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7539 
7540         // We do not need to generate individual map information for the
7541         // pointer, it can be associated with the combined storage.
7542         ++I;
7543       }
7544     }
7545 
7546     // Track whether a component of the list should be marked as MEMBER_OF some
7547     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7548     // in a component list should be marked as MEMBER_OF, all subsequent entries
7549     // do not belong to the base struct. E.g.
7550     // struct S2 s;
7551     // s.ps->ps->ps->f[:]
7552     //   (1) (2) (3) (4)
7553     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7554     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7555     // is the pointee of ps(2) which is not member of struct s, so it should not
7556     // be marked as such (it is still PTR_AND_OBJ).
7557     // The variable is initialized to false so that PTR_AND_OBJ entries which
7558     // are not struct members are not considered (e.g. array of pointers to
7559     // data).
7560     bool ShouldBeMemberOf = false;
7561 
7562     // Variable keeping track of whether or not we have encountered a component
7563     // in the component list which is a member expression. Useful when we have a
7564     // pointer or a final array section, in which case it is the previous
7565     // component in the list which tells us whether we have a member expression.
7566     // E.g. X.f[:]
7567     // While processing the final array section "[:]" it is "f" which tells us
7568     // whether we are dealing with a member of a declared struct.
7569     const MemberExpr *EncounteredME = nullptr;
7570 
7571     for (; I != CE; ++I) {
7572       // If the current component is member of a struct (parent struct) mark it.
7573       if (!EncounteredME) {
7574         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7575         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7576         // as MEMBER_OF the parent struct.
7577         if (EncounteredME)
7578           ShouldBeMemberOf = true;
7579       }
7580 
7581       auto Next = std::next(I);
7582 
7583       // We need to generate the addresses and sizes if this is the last
7584       // component, if the component is a pointer or if it is an array section
7585       // whose length can't be proved to be one. If this is a pointer, it
7586       // becomes the base address for the following components.
7587 
7588       // A final array section, is one whose length can't be proved to be one.
7589       bool IsFinalArraySection =
7590           isFinalArraySectionExpression(I->getAssociatedExpression());
7591 
7592       // Get information on whether the element is a pointer. Have to do a
7593       // special treatment for array sections given that they are built-in
7594       // types.
7595       const auto *OASE =
7596           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7597       const auto *OAShE =
7598           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7599       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7600       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7601       bool IsPointer =
7602           OAShE ||
7603           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7604                        .getCanonicalType()
7605                        ->isAnyPointerType()) ||
7606           I->getAssociatedExpression()->getType()->isAnyPointerType();
7607       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7608 
7609       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7610         // If this is not the last component, we expect the pointer to be
7611         // associated with an array expression or member expression.
7612         assert((Next == CE ||
7613                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7614                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7615                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7616                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7617                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7618                "Unexpected expression");
7619 
7620         Address LB = Address::invalid();
7621         if (OAShE) {
7622           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7623                        CGF.getContext().getTypeAlignInChars(
7624                            OAShE->getBase()->getType()));
7625         } else {
7626           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7627                    .getAddress(CGF);
7628         }
7629 
7630         // If this component is a pointer inside the base struct then we don't
7631         // need to create any entry for it - it will be combined with the object
7632         // it is pointing to into a single PTR_AND_OBJ entry.
7633         bool IsMemberPointerOrAddr =
7634             (IsPointer || ForDeviceAddr) && EncounteredME &&
7635             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7636              EncounteredME);
7637         if (!OverlappedElements.empty()) {
7638           // Handle base element with the info for overlapped elements.
7639           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7640           assert(Next == CE &&
7641                  "Expected last element for the overlapped elements.");
7642           assert(!IsPointer &&
7643                  "Unexpected base element with the pointer type.");
7644           // Mark the whole struct as the struct that requires allocation on the
7645           // device.
7646           PartialStruct.LowestElem = {0, LB};
7647           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7648               I->getAssociatedExpression()->getType());
7649           Address HB = CGF.Builder.CreateConstGEP(
7650               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7651                                                               CGF.VoidPtrTy),
7652               TypeSize.getQuantity() - 1);
7653           PartialStruct.HighestElem = {
7654               std::numeric_limits<decltype(
7655                   PartialStruct.HighestElem.first)>::max(),
7656               HB};
7657           PartialStruct.Base = BP;
7658           // Emit data for non-overlapped data.
7659           OpenMPOffloadMappingFlags Flags =
7660               OMP_MAP_MEMBER_OF |
7661               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7662                              /*AddPtrFlag=*/false,
7663                              /*AddIsTargetParamFlag=*/false);
7664           LB = BP;
7665           llvm::Value *Size = nullptr;
7666           // Do bitcopy of all non-overlapped structure elements.
7667           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7668                    Component : OverlappedElements) {
7669             Address ComponentLB = Address::invalid();
7670             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7671                  Component) {
7672               if (MC.getAssociatedDeclaration()) {
7673                 ComponentLB =
7674                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7675                         .getAddress(CGF);
7676                 Size = CGF.Builder.CreatePtrDiff(
7677                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7678                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7679                 break;
7680               }
7681             }
7682             BasePointers.push_back(BP.getPointer());
7683             Pointers.push_back(LB.getPointer());
7684             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7685                                                       /*isSigned=*/true));
7686             Types.push_back(Flags);
7687             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7688           }
7689           BasePointers.push_back(BP.getPointer());
7690           Pointers.push_back(LB.getPointer());
7691           Size = CGF.Builder.CreatePtrDiff(
7692               CGF.EmitCastToVoidPtr(
7693                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7694               CGF.EmitCastToVoidPtr(LB.getPointer()));
7695           Sizes.push_back(
7696               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7697           Types.push_back(Flags);
7698           break;
7699         }
7700         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7701         if (!IsMemberPointerOrAddr) {
7702           BasePointers.push_back(BP.getPointer());
7703           Pointers.push_back(LB.getPointer());
7704           Sizes.push_back(
7705               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7706 
7707           // We need to add a pointer flag for each map that comes from the
7708           // same expression except for the first one. We also need to signal
7709           // this map is the first one that relates with the current capture
7710           // (there is a set of entries for each capture).
7711           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7712               MapType, MapModifiers, IsImplicit,
7713               !IsExpressionFirstInfo || RequiresReference,
7714               IsCaptureFirstInfo && !RequiresReference);
7715 
7716           if (!IsExpressionFirstInfo) {
7717             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7718             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7719             if (IsPointer)
7720               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7721                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7722 
7723             if (ShouldBeMemberOf) {
7724               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7725               // should be later updated with the correct value of MEMBER_OF.
7726               Flags |= OMP_MAP_MEMBER_OF;
7727               // From now on, all subsequent PTR_AND_OBJ entries should not be
7728               // marked as MEMBER_OF.
7729               ShouldBeMemberOf = false;
7730             }
7731           }
7732 
7733           Types.push_back(Flags);
7734         }
7735 
7736         // If we have encountered a member expression so far, keep track of the
7737         // mapped member. If the parent is "*this", then the value declaration
7738         // is nullptr.
7739         if (EncounteredME) {
7740           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7741           unsigned FieldIndex = FD->getFieldIndex();
7742 
7743           // Update info about the lowest and highest elements for this struct
7744           if (!PartialStruct.Base.isValid()) {
7745             PartialStruct.LowestElem = {FieldIndex, LB};
7746             if (IsFinalArraySection) {
7747               Address HB =
7748                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7749                       .getAddress(CGF);
7750               PartialStruct.HighestElem = {FieldIndex, HB};
7751             } else {
7752               PartialStruct.HighestElem = {FieldIndex, LB};
7753             }
7754             PartialStruct.Base = BP;
7755           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7756             PartialStruct.LowestElem = {FieldIndex, LB};
7757           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7758             PartialStruct.HighestElem = {FieldIndex, LB};
7759           }
7760         }
7761 
7762         // If we have a final array section, we are done with this expression.
7763         if (IsFinalArraySection)
7764           break;
7765 
7766         // The pointer becomes the base for the next element.
7767         if (Next != CE)
7768           BP = LB;
7769 
7770         IsExpressionFirstInfo = false;
7771         IsCaptureFirstInfo = false;
7772       }
7773     }
7774   }
7775 
7776   /// Return the adjusted map modifiers if the declaration a capture refers to
7777   /// appears in a first-private clause. This is expected to be used only with
7778   /// directives that start with 'target'.
7779   MappableExprsHandler::OpenMPOffloadMappingFlags
7780   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7781     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7782 
7783     // A first private variable captured by reference will use only the
7784     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7785     // declaration is known as first-private in this handler.
7786     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7787       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7788           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7789         return MappableExprsHandler::OMP_MAP_ALWAYS |
7790                MappableExprsHandler::OMP_MAP_TO;
7791       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7792         return MappableExprsHandler::OMP_MAP_TO |
7793                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7794       return MappableExprsHandler::OMP_MAP_PRIVATE |
7795              MappableExprsHandler::OMP_MAP_TO;
7796     }
7797     return MappableExprsHandler::OMP_MAP_TO |
7798            MappableExprsHandler::OMP_MAP_FROM;
7799   }
7800 
7801   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7802     // Rotate by getFlagMemberOffset() bits.
7803     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7804                                                   << getFlagMemberOffset());
7805   }
7806 
7807   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7808                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7809     // If the entry is PTR_AND_OBJ but has not been marked with the special
7810     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7811     // marked as MEMBER_OF.
7812     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7813         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7814       return;
7815 
7816     // Reset the placeholder value to prepare the flag for the assignment of the
7817     // proper MEMBER_OF value.
7818     Flags &= ~OMP_MAP_MEMBER_OF;
7819     Flags |= MemberOfFlag;
7820   }
7821 
7822   void getPlainLayout(const CXXRecordDecl *RD,
7823                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7824                       bool AsBase) const {
7825     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7826 
7827     llvm::StructType *St =
7828         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7829 
7830     unsigned NumElements = St->getNumElements();
7831     llvm::SmallVector<
7832         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7833         RecordLayout(NumElements);
7834 
7835     // Fill bases.
7836     for (const auto &I : RD->bases()) {
7837       if (I.isVirtual())
7838         continue;
7839       const auto *Base = I.getType()->getAsCXXRecordDecl();
7840       // Ignore empty bases.
7841       if (Base->isEmpty() || CGF.getContext()
7842                                  .getASTRecordLayout(Base)
7843                                  .getNonVirtualSize()
7844                                  .isZero())
7845         continue;
7846 
7847       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7848       RecordLayout[FieldIndex] = Base;
7849     }
7850     // Fill in virtual bases.
7851     for (const auto &I : RD->vbases()) {
7852       const auto *Base = I.getType()->getAsCXXRecordDecl();
7853       // Ignore empty bases.
7854       if (Base->isEmpty())
7855         continue;
7856       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7857       if (RecordLayout[FieldIndex])
7858         continue;
7859       RecordLayout[FieldIndex] = Base;
7860     }
7861     // Fill in all the fields.
7862     assert(!RD->isUnion() && "Unexpected union.");
7863     for (const auto *Field : RD->fields()) {
7864       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7865       // will fill in later.)
7866       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7867         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7868         RecordLayout[FieldIndex] = Field;
7869       }
7870     }
7871     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7872              &Data : RecordLayout) {
7873       if (Data.isNull())
7874         continue;
7875       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7876         getPlainLayout(Base, Layout, /*AsBase=*/true);
7877       else
7878         Layout.push_back(Data.get<const FieldDecl *>());
7879     }
7880   }
7881 
7882 public:
7883   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7884       : CurDir(&Dir), CGF(CGF) {
7885     // Extract firstprivate clause information.
7886     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7887       for (const auto *D : C->varlists())
7888         FirstPrivateDecls.try_emplace(
7889             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7890     // Extract implicit firstprivates from uses_allocators clauses.
7891     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7892       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7893         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7894         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7895           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7896                                         /*Implicit=*/true);
7897         else if (const auto *VD = dyn_cast<VarDecl>(
7898                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7899                          ->getDecl()))
7900           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7901       }
7902     }
7903     // Extract device pointer clause information.
7904     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7905       for (auto L : C->component_lists())
7906         DevPointersMap[L.first].push_back(L.second);
7907   }
7908 
7909   /// Constructor for the declare mapper directive.
7910   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7911       : CurDir(&Dir), CGF(CGF) {}
7912 
7913   /// Generate code for the combined entry if we have a partially mapped struct
7914   /// and take care of the mapping flags of the arguments corresponding to
7915   /// individual struct members.
7916   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7917                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7918                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7919                          const StructRangeInfoTy &PartialStruct) const {
7920     // Base is the base of the struct
7921     BasePointers.push_back(PartialStruct.Base.getPointer());
7922     // Pointer is the address of the lowest element
7923     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7924     Pointers.push_back(LB);
7925     // Size is (addr of {highest+1} element) - (addr of lowest element)
7926     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7927     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7928     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7929     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7930     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7931     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7932                                                   /*isSigned=*/false);
7933     Sizes.push_back(Size);
7934     // Map type is always TARGET_PARAM
7935     Types.push_back(OMP_MAP_TARGET_PARAM);
7936     // Remove TARGET_PARAM flag from the first element
7937     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7938 
7939     // All other current entries will be MEMBER_OF the combined entry
7940     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7941     // 0xFFFF in the MEMBER_OF field).
7942     OpenMPOffloadMappingFlags MemberOfFlag =
7943         getMemberOfFlag(BasePointers.size() - 1);
7944     for (auto &M : CurTypes)
7945       setCorrectMemberOfFlag(M, MemberOfFlag);
7946   }
7947 
7948   /// Generate all the base pointers, section pointers, sizes and map
7949   /// types for the extracted mappable expressions. Also, for each item that
7950   /// relates with a device pointer, a pair of the relevant declaration and
7951   /// index where it occurs is appended to the device pointers info array.
7952   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7953                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7954                        MapFlagsArrayTy &Types) const {
7955     // We have to process the component lists that relate with the same
7956     // declaration in a single chunk so that we can generate the map flags
7957     // correctly. Therefore, we organize all lists in a map.
7958     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7959 
7960     // Helper function to fill the information map for the different supported
7961     // clauses.
7962     auto &&InfoGen =
7963         [&Info](const ValueDecl *D,
7964                 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7965                 OpenMPMapClauseKind MapType,
7966                 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7967                 bool ReturnDevicePointer, bool IsImplicit,
7968                 bool ForDeviceAddr = false) {
7969           const ValueDecl *VD =
7970               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7971           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7972                                 IsImplicit, ForDeviceAddr);
7973         };
7974 
7975     assert(CurDir.is<const OMPExecutableDirective *>() &&
7976            "Expect a executable directive");
7977     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7978     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7979       for (const auto L : C->component_lists()) {
7980         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7981             /*ReturnDevicePointer=*/false, C->isImplicit());
7982       }
7983     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7984       for (const auto L : C->component_lists()) {
7985         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7986             /*ReturnDevicePointer=*/false, C->isImplicit());
7987       }
7988     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7989       for (const auto L : C->component_lists()) {
7990         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7991             /*ReturnDevicePointer=*/false, C->isImplicit());
7992       }
7993 
7994     // Look at the use_device_ptr clause information and mark the existing map
7995     // entries as such. If there is no map information for an entry in the
7996     // use_device_ptr list, we create one with map type 'alloc' and zero size
7997     // section. It is the user fault if that was not mapped before. If there is
7998     // no map information and the pointer is a struct member, then we defer the
7999     // emission of that entry until the whole struct has been processed.
8000     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8001         DeferredInfo;
8002 
8003     for (const auto *C :
8004          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8005       for (const auto L : C->component_lists()) {
8006         assert(!L.second.empty() && "Not expecting empty list of components!");
8007         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8008         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8009         const Expr *IE = L.second.back().getAssociatedExpression();
8010         // If the first component is a member expression, we have to look into
8011         // 'this', which maps to null in the map of map information. Otherwise
8012         // look directly for the information.
8013         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8014 
8015         // We potentially have map information for this declaration already.
8016         // Look for the first set of components that refer to it.
8017         if (It != Info.end()) {
8018           auto CI = std::find_if(
8019               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8020                 return MI.Components.back().getAssociatedDeclaration() == VD;
8021               });
8022           // If we found a map entry, signal that the pointer has to be returned
8023           // and move on to the next declaration.
8024           if (CI != It->second.end()) {
8025             CI->ReturnDevicePointer = true;
8026             continue;
8027           }
8028         }
8029 
8030         // We didn't find any match in our map information - generate a zero
8031         // size array section - if the pointer is a struct member we defer this
8032         // action until the whole struct has been processed.
8033         if (isa<MemberExpr>(IE)) {
8034           // Insert the pointer into Info to be processed by
8035           // generateInfoForComponentList. Because it is a member pointer
8036           // without a pointee, no entry will be generated for it, therefore
8037           // we need to generate one after the whole struct has been processed.
8038           // Nonetheless, generateInfoForComponentList must be called to take
8039           // the pointer into account for the calculation of the range of the
8040           // partial struct.
8041           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8042                   /*ReturnDevicePointer=*/false, C->isImplicit());
8043           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8044         } else {
8045           llvm::Value *Ptr =
8046               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8047           BasePointers.emplace_back(Ptr, VD);
8048           Pointers.push_back(Ptr);
8049           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8050           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8051         }
8052       }
8053     }
8054 
8055     // Look at the use_device_addr clause information and mark the existing map
8056     // entries as such. If there is no map information for an entry in the
8057     // use_device_addr list, we create one with map type 'alloc' and zero size
8058     // section. It is the user fault if that was not mapped before. If there is
8059     // no map information and the pointer is a struct member, then we defer the
8060     // emission of that entry until the whole struct has been processed.
8061     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8062     for (const auto *C :
8063          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8064       for (const auto L : C->component_lists()) {
8065         assert(!L.second.empty() && "Not expecting empty list of components!");
8066         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8067         if (!Processed.insert(VD).second)
8068           continue;
8069         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8070         const Expr *IE = L.second.back().getAssociatedExpression();
8071         // If the first component is a member expression, we have to look into
8072         // 'this', which maps to null in the map of map information. Otherwise
8073         // look directly for the information.
8074         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8075 
8076         // We potentially have map information for this declaration already.
8077         // Look for the first set of components that refer to it.
8078         if (It != Info.end()) {
8079           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8080             return MI.Components.back().getAssociatedDeclaration() == VD;
8081           });
8082           // If we found a map entry, signal that the pointer has to be returned
8083           // and move on to the next declaration.
8084           if (CI != It->second.end()) {
8085             CI->ReturnDevicePointer = true;
8086             continue;
8087           }
8088         }
8089 
8090         // We didn't find any match in our map information - generate a zero
8091         // size array section - if the pointer is a struct member we defer this
8092         // action until the whole struct has been processed.
8093         if (isa<MemberExpr>(IE)) {
8094           // Insert the pointer into Info to be processed by
8095           // generateInfoForComponentList. Because it is a member pointer
8096           // without a pointee, no entry will be generated for it, therefore
8097           // we need to generate one after the whole struct has been processed.
8098           // Nonetheless, generateInfoForComponentList must be called to take
8099           // the pointer into account for the calculation of the range of the
8100           // partial struct.
8101           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8102                   /*ReturnDevicePointer=*/false, C->isImplicit(),
8103                   /*ForDeviceAddr=*/true);
8104           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8105         } else {
8106           llvm::Value *Ptr;
8107           if (IE->isGLValue())
8108             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8109           else
8110             Ptr = CGF.EmitScalarExpr(IE);
8111           BasePointers.emplace_back(Ptr, VD);
8112           Pointers.push_back(Ptr);
8113           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8114           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8115         }
8116       }
8117     }
8118 
8119     for (const auto &M : Info) {
8120       // We need to know when we generate information for the first component
8121       // associated with a capture, because the mapping flags depend on it.
8122       bool IsFirstComponentList = true;
8123 
8124       // Temporary versions of arrays
8125       MapBaseValuesArrayTy CurBasePointers;
8126       MapValuesArrayTy CurPointers;
8127       MapValuesArrayTy CurSizes;
8128       MapFlagsArrayTy CurTypes;
8129       StructRangeInfoTy PartialStruct;
8130 
8131       for (const MapInfo &L : M.second) {
8132         assert(!L.Components.empty() &&
8133                "Not expecting declaration with no component lists.");
8134 
8135         // Remember the current base pointer index.
8136         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8137         generateInfoForComponentList(
8138             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8139             CurPointers, CurSizes, CurTypes, PartialStruct,
8140             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8141 
8142         // If this entry relates with a device pointer, set the relevant
8143         // declaration and add the 'return pointer' flag.
8144         if (L.ReturnDevicePointer) {
8145           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8146                  "Unexpected number of mapped base pointers.");
8147 
8148           const ValueDecl *RelevantVD =
8149               L.Components.back().getAssociatedDeclaration();
8150           assert(RelevantVD &&
8151                  "No relevant declaration related with device pointer??");
8152 
8153           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8154           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8155         }
8156         IsFirstComponentList = false;
8157       }
8158 
8159       // Append any pending zero-length pointers which are struct members and
8160       // used with use_device_ptr or use_device_addr.
8161       auto CI = DeferredInfo.find(M.first);
8162       if (CI != DeferredInfo.end()) {
8163         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8164           llvm::Value *BasePtr;
8165           llvm::Value *Ptr;
8166           if (L.ForDeviceAddr) {
8167             if (L.IE->isGLValue())
8168               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8169             else
8170               Ptr = this->CGF.EmitScalarExpr(L.IE);
8171             BasePtr = Ptr;
8172             // Entry is RETURN_PARAM. Also, set the placeholder value
8173             // MEMBER_OF=FFFF so that the entry is later updated with the
8174             // correct value of MEMBER_OF.
8175             CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8176           } else {
8177             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8178             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8179                                              L.IE->getExprLoc());
8180             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8181             // value MEMBER_OF=FFFF so that the entry is later updated with the
8182             // correct value of MEMBER_OF.
8183             CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8184                                OMP_MAP_MEMBER_OF);
8185           }
8186           CurBasePointers.emplace_back(BasePtr, L.VD);
8187           CurPointers.push_back(Ptr);
8188           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8189         }
8190       }
8191 
8192       // If there is an entry in PartialStruct it means we have a struct with
8193       // individual members mapped. Emit an extra combined entry.
8194       if (PartialStruct.Base.isValid())
8195         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8196                           PartialStruct);
8197 
8198       // We need to append the results of this capture to what we already have.
8199       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8200       Pointers.append(CurPointers.begin(), CurPointers.end());
8201       Sizes.append(CurSizes.begin(), CurSizes.end());
8202       Types.append(CurTypes.begin(), CurTypes.end());
8203     }
8204   }
8205 
8206   /// Generate all the base pointers, section pointers, sizes and map types for
8207   /// the extracted map clauses of user-defined mapper.
8208   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8209                                 MapValuesArrayTy &Pointers,
8210                                 MapValuesArrayTy &Sizes,
8211                                 MapFlagsArrayTy &Types) const {
8212     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8213            "Expect a declare mapper directive");
8214     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8215     // We have to process the component lists that relate with the same
8216     // declaration in a single chunk so that we can generate the map flags
8217     // correctly. Therefore, we organize all lists in a map.
8218     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8219 
8220     // Helper function to fill the information map for the different supported
8221     // clauses.
8222     auto &&InfoGen = [&Info](
8223         const ValueDecl *D,
8224         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8225         OpenMPMapClauseKind MapType,
8226         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8227         bool ReturnDevicePointer, bool IsImplicit) {
8228       const ValueDecl *VD =
8229           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8230       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8231                             IsImplicit);
8232     };
8233 
8234     for (const auto *C : CurMapperDir->clauselists()) {
8235       const auto *MC = cast<OMPMapClause>(C);
8236       for (const auto L : MC->component_lists()) {
8237         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8238                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8239       }
8240     }
8241 
8242     for (const auto &M : Info) {
8243       // We need to know when we generate information for the first component
8244       // associated with a capture, because the mapping flags depend on it.
8245       bool IsFirstComponentList = true;
8246 
8247       // Temporary versions of arrays
8248       MapBaseValuesArrayTy CurBasePointers;
8249       MapValuesArrayTy CurPointers;
8250       MapValuesArrayTy CurSizes;
8251       MapFlagsArrayTy CurTypes;
8252       StructRangeInfoTy PartialStruct;
8253 
8254       for (const MapInfo &L : M.second) {
8255         assert(!L.Components.empty() &&
8256                "Not expecting declaration with no component lists.");
8257         generateInfoForComponentList(
8258             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8259             CurPointers, CurSizes, CurTypes, PartialStruct,
8260             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8261         IsFirstComponentList = false;
8262       }
8263 
8264       // If there is an entry in PartialStruct it means we have a struct with
8265       // individual members mapped. Emit an extra combined entry.
8266       if (PartialStruct.Base.isValid())
8267         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8268                           PartialStruct);
8269 
8270       // We need to append the results of this capture to what we already have.
8271       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8272       Pointers.append(CurPointers.begin(), CurPointers.end());
8273       Sizes.append(CurSizes.begin(), CurSizes.end());
8274       Types.append(CurTypes.begin(), CurTypes.end());
8275     }
8276   }
8277 
8278   /// Emit capture info for lambdas for variables captured by reference.
8279   void generateInfoForLambdaCaptures(
8280       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8281       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8282       MapFlagsArrayTy &Types,
8283       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8284     const auto *RD = VD->getType()
8285                          .getCanonicalType()
8286                          .getNonReferenceType()
8287                          ->getAsCXXRecordDecl();
8288     if (!RD || !RD->isLambda())
8289       return;
8290     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8291     LValue VDLVal = CGF.MakeAddrLValue(
8292         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8293     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8294     FieldDecl *ThisCapture = nullptr;
8295     RD->getCaptureFields(Captures, ThisCapture);
8296     if (ThisCapture) {
8297       LValue ThisLVal =
8298           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8299       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8300       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8301                                  VDLVal.getPointer(CGF));
8302       BasePointers.push_back(ThisLVal.getPointer(CGF));
8303       Pointers.push_back(ThisLValVal.getPointer(CGF));
8304       Sizes.push_back(
8305           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8306                                     CGF.Int64Ty, /*isSigned=*/true));
8307       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8308                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8309     }
8310     for (const LambdaCapture &LC : RD->captures()) {
8311       if (!LC.capturesVariable())
8312         continue;
8313       const VarDecl *VD = LC.getCapturedVar();
8314       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8315         continue;
8316       auto It = Captures.find(VD);
8317       assert(It != Captures.end() && "Found lambda capture without field.");
8318       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8319       if (LC.getCaptureKind() == LCK_ByRef) {
8320         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8321         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8322                                    VDLVal.getPointer(CGF));
8323         BasePointers.push_back(VarLVal.getPointer(CGF));
8324         Pointers.push_back(VarLValVal.getPointer(CGF));
8325         Sizes.push_back(CGF.Builder.CreateIntCast(
8326             CGF.getTypeSize(
8327                 VD->getType().getCanonicalType().getNonReferenceType()),
8328             CGF.Int64Ty, /*isSigned=*/true));
8329       } else {
8330         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8331         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8332                                    VDLVal.getPointer(CGF));
8333         BasePointers.push_back(VarLVal.getPointer(CGF));
8334         Pointers.push_back(VarRVal.getScalarVal());
8335         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8336       }
8337       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8338                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8339     }
8340   }
8341 
8342   /// Set correct indices for lambdas captures.
8343   void adjustMemberOfForLambdaCaptures(
8344       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8345       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8346       MapFlagsArrayTy &Types) const {
8347     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8348       // Set correct member_of idx for all implicit lambda captures.
8349       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8350                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8351         continue;
8352       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8353       assert(BasePtr && "Unable to find base lambda address.");
8354       int TgtIdx = -1;
8355       for (unsigned J = I; J > 0; --J) {
8356         unsigned Idx = J - 1;
8357         if (Pointers[Idx] != BasePtr)
8358           continue;
8359         TgtIdx = Idx;
8360         break;
8361       }
8362       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8363       // All other current entries will be MEMBER_OF the combined entry
8364       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8365       // 0xFFFF in the MEMBER_OF field).
8366       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8367       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8368     }
8369   }
8370 
8371   /// Generate the base pointers, section pointers, sizes and map types
8372   /// associated to a given capture.
8373   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8374                               llvm::Value *Arg,
8375                               MapBaseValuesArrayTy &BasePointers,
8376                               MapValuesArrayTy &Pointers,
8377                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8378                               StructRangeInfoTy &PartialStruct) const {
8379     assert(!Cap->capturesVariableArrayType() &&
8380            "Not expecting to generate map info for a variable array type!");
8381 
8382     // We need to know when we generating information for the first component
8383     const ValueDecl *VD = Cap->capturesThis()
8384                               ? nullptr
8385                               : Cap->getCapturedVar()->getCanonicalDecl();
8386 
8387     // If this declaration appears in a is_device_ptr clause we just have to
8388     // pass the pointer by value. If it is a reference to a declaration, we just
8389     // pass its value.
8390     if (DevPointersMap.count(VD)) {
8391       BasePointers.emplace_back(Arg, VD);
8392       Pointers.push_back(Arg);
8393       Sizes.push_back(
8394           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8395                                     CGF.Int64Ty, /*isSigned=*/true));
8396       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8397       return;
8398     }
8399 
8400     using MapData =
8401         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8402                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8403     SmallVector<MapData, 4> DeclComponentLists;
8404     assert(CurDir.is<const OMPExecutableDirective *>() &&
8405            "Expect a executable directive");
8406     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8407     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8408       for (const auto L : C->decl_component_lists(VD)) {
8409         assert(L.first == VD &&
8410                "We got information for the wrong declaration??");
8411         assert(!L.second.empty() &&
8412                "Not expecting declaration with no component lists.");
8413         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8414                                         C->getMapTypeModifiers(),
8415                                         C->isImplicit());
8416       }
8417     }
8418 
8419     // Find overlapping elements (including the offset from the base element).
8420     llvm::SmallDenseMap<
8421         const MapData *,
8422         llvm::SmallVector<
8423             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8424         4>
8425         OverlappedData;
8426     size_t Count = 0;
8427     for (const MapData &L : DeclComponentLists) {
8428       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8429       OpenMPMapClauseKind MapType;
8430       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8431       bool IsImplicit;
8432       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8433       ++Count;
8434       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8435         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8436         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8437         auto CI = Components.rbegin();
8438         auto CE = Components.rend();
8439         auto SI = Components1.rbegin();
8440         auto SE = Components1.rend();
8441         for (; CI != CE && SI != SE; ++CI, ++SI) {
8442           if (CI->getAssociatedExpression()->getStmtClass() !=
8443               SI->getAssociatedExpression()->getStmtClass())
8444             break;
8445           // Are we dealing with different variables/fields?
8446           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8447             break;
8448         }
8449         // Found overlapping if, at least for one component, reached the head of
8450         // the components list.
8451         if (CI == CE || SI == SE) {
8452           assert((CI != CE || SI != SE) &&
8453                  "Unexpected full match of the mapping components.");
8454           const MapData &BaseData = CI == CE ? L : L1;
8455           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8456               SI == SE ? Components : Components1;
8457           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8458           OverlappedElements.getSecond().push_back(SubData);
8459         }
8460       }
8461     }
8462     // Sort the overlapped elements for each item.
8463     llvm::SmallVector<const FieldDecl *, 4> Layout;
8464     if (!OverlappedData.empty()) {
8465       if (const auto *CRD =
8466               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8467         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8468       else {
8469         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8470         Layout.append(RD->field_begin(), RD->field_end());
8471       }
8472     }
8473     for (auto &Pair : OverlappedData) {
8474       llvm::sort(
8475           Pair.getSecond(),
8476           [&Layout](
8477               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8478               OMPClauseMappableExprCommon::MappableExprComponentListRef
8479                   Second) {
8480             auto CI = First.rbegin();
8481             auto CE = First.rend();
8482             auto SI = Second.rbegin();
8483             auto SE = Second.rend();
8484             for (; CI != CE && SI != SE; ++CI, ++SI) {
8485               if (CI->getAssociatedExpression()->getStmtClass() !=
8486                   SI->getAssociatedExpression()->getStmtClass())
8487                 break;
8488               // Are we dealing with different variables/fields?
8489               if (CI->getAssociatedDeclaration() !=
8490                   SI->getAssociatedDeclaration())
8491                 break;
8492             }
8493 
8494             // Lists contain the same elements.
8495             if (CI == CE && SI == SE)
8496               return false;
8497 
8498             // List with less elements is less than list with more elements.
8499             if (CI == CE || SI == SE)
8500               return CI == CE;
8501 
8502             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8503             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8504             if (FD1->getParent() == FD2->getParent())
8505               return FD1->getFieldIndex() < FD2->getFieldIndex();
8506             const auto It =
8507                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8508                   return FD == FD1 || FD == FD2;
8509                 });
8510             return *It == FD1;
8511           });
8512     }
8513 
8514     // Associated with a capture, because the mapping flags depend on it.
8515     // Go through all of the elements with the overlapped elements.
8516     for (const auto &Pair : OverlappedData) {
8517       const MapData &L = *Pair.getFirst();
8518       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8519       OpenMPMapClauseKind MapType;
8520       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8521       bool IsImplicit;
8522       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8523       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8524           OverlappedComponents = Pair.getSecond();
8525       bool IsFirstComponentList = true;
8526       generateInfoForComponentList(
8527           MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8528           Types, PartialStruct, IsFirstComponentList, IsImplicit,
8529           /*ForDeviceAddr=*/false, OverlappedComponents);
8530     }
8531     // Go through other elements without overlapped elements.
8532     bool IsFirstComponentList = OverlappedData.empty();
8533     for (const MapData &L : DeclComponentLists) {
8534       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8535       OpenMPMapClauseKind MapType;
8536       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8537       bool IsImplicit;
8538       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8539       auto It = OverlappedData.find(&L);
8540       if (It == OverlappedData.end())
8541         generateInfoForComponentList(MapType, MapModifiers, Components,
8542                                      BasePointers, Pointers, Sizes, Types,
8543                                      PartialStruct, IsFirstComponentList,
8544                                      IsImplicit);
8545       IsFirstComponentList = false;
8546     }
8547   }
8548 
8549   /// Generate the base pointers, section pointers, sizes and map types
8550   /// associated with the declare target link variables.
8551   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8552                                         MapValuesArrayTy &Pointers,
8553                                         MapValuesArrayTy &Sizes,
8554                                         MapFlagsArrayTy &Types) const {
8555     assert(CurDir.is<const OMPExecutableDirective *>() &&
8556            "Expect a executable directive");
8557     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8558     // Map other list items in the map clause which are not captured variables
8559     // but "declare target link" global variables.
8560     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8561       for (const auto L : C->component_lists()) {
8562         if (!L.first)
8563           continue;
8564         const auto *VD = dyn_cast<VarDecl>(L.first);
8565         if (!VD)
8566           continue;
8567         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8568             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8569         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8570             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8571           continue;
8572         StructRangeInfoTy PartialStruct;
8573         generateInfoForComponentList(
8574             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8575             Pointers, Sizes, Types, PartialStruct,
8576             /*IsFirstComponentList=*/true, C->isImplicit());
8577         assert(!PartialStruct.Base.isValid() &&
8578                "No partial structs for declare target link expected.");
8579       }
8580     }
8581   }
8582 
8583   /// Generate the default map information for a given capture \a CI,
8584   /// record field declaration \a RI and captured value \a CV.
8585   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8586                               const FieldDecl &RI, llvm::Value *CV,
8587                               MapBaseValuesArrayTy &CurBasePointers,
8588                               MapValuesArrayTy &CurPointers,
8589                               MapValuesArrayTy &CurSizes,
8590                               MapFlagsArrayTy &CurMapTypes) const {
8591     bool IsImplicit = true;
8592     // Do the default mapping.
8593     if (CI.capturesThis()) {
8594       CurBasePointers.push_back(CV);
8595       CurPointers.push_back(CV);
8596       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8597       CurSizes.push_back(
8598           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8599                                     CGF.Int64Ty, /*isSigned=*/true));
8600       // Default map type.
8601       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8602     } else if (CI.capturesVariableByCopy()) {
8603       CurBasePointers.push_back(CV);
8604       CurPointers.push_back(CV);
8605       if (!RI.getType()->isAnyPointerType()) {
8606         // We have to signal to the runtime captures passed by value that are
8607         // not pointers.
8608         CurMapTypes.push_back(OMP_MAP_LITERAL);
8609         CurSizes.push_back(CGF.Builder.CreateIntCast(
8610             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8611       } else {
8612         // Pointers are implicitly mapped with a zero size and no flags
8613         // (other than first map that is added for all implicit maps).
8614         CurMapTypes.push_back(OMP_MAP_NONE);
8615         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8616       }
8617       const VarDecl *VD = CI.getCapturedVar();
8618       auto I = FirstPrivateDecls.find(VD);
8619       if (I != FirstPrivateDecls.end())
8620         IsImplicit = I->getSecond();
8621     } else {
8622       assert(CI.capturesVariable() && "Expected captured reference.");
8623       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8624       QualType ElementType = PtrTy->getPointeeType();
8625       CurSizes.push_back(CGF.Builder.CreateIntCast(
8626           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8627       // The default map type for a scalar/complex type is 'to' because by
8628       // default the value doesn't have to be retrieved. For an aggregate
8629       // type, the default is 'tofrom'.
8630       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8631       const VarDecl *VD = CI.getCapturedVar();
8632       auto I = FirstPrivateDecls.find(VD);
8633       if (I != FirstPrivateDecls.end() &&
8634           VD->getType().isConstant(CGF.getContext())) {
8635         llvm::Constant *Addr =
8636             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8637         // Copy the value of the original variable to the new global copy.
8638         CGF.Builder.CreateMemCpy(
8639             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8640             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8641             CurSizes.back(), /*IsVolatile=*/false);
8642         // Use new global variable as the base pointers.
8643         CurBasePointers.push_back(Addr);
8644         CurPointers.push_back(Addr);
8645       } else {
8646         CurBasePointers.push_back(CV);
8647         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8648           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8649               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8650               AlignmentSource::Decl));
8651           CurPointers.push_back(PtrAddr.getPointer());
8652         } else {
8653           CurPointers.push_back(CV);
8654         }
8655       }
8656       if (I != FirstPrivateDecls.end())
8657         IsImplicit = I->getSecond();
8658     }
8659     // Every default map produces a single argument which is a target parameter.
8660     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8661 
8662     // Add flag stating this is an implicit map.
8663     if (IsImplicit)
8664       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8665   }
8666 };
8667 } // anonymous namespace
8668 
8669 /// Emit the arrays used to pass the captures and map information to the
8670 /// offloading runtime library. If there is no map or capture information,
8671 /// return nullptr by reference.
8672 static void
8673 emitOffloadingArrays(CodeGenFunction &CGF,
8674                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8675                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8676                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8677                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8678                      CGOpenMPRuntime::TargetDataInfo &Info) {
8679   CodeGenModule &CGM = CGF.CGM;
8680   ASTContext &Ctx = CGF.getContext();
8681 
8682   // Reset the array information.
8683   Info.clearArrayInfo();
8684   Info.NumberOfPtrs = BasePointers.size();
8685 
8686   if (Info.NumberOfPtrs) {
8687     // Detect if we have any capture size requiring runtime evaluation of the
8688     // size so that a constant array could be eventually used.
8689     bool hasRuntimeEvaluationCaptureSize = false;
8690     for (llvm::Value *S : Sizes)
8691       if (!isa<llvm::Constant>(S)) {
8692         hasRuntimeEvaluationCaptureSize = true;
8693         break;
8694       }
8695 
8696     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8697     QualType PointerArrayType = Ctx.getConstantArrayType(
8698         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8699         /*IndexTypeQuals=*/0);
8700 
8701     Info.BasePointersArray =
8702         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8703     Info.PointersArray =
8704         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8705 
8706     // If we don't have any VLA types or other types that require runtime
8707     // evaluation, we can use a constant array for the map sizes, otherwise we
8708     // need to fill up the arrays as we do for the pointers.
8709     QualType Int64Ty =
8710         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8711     if (hasRuntimeEvaluationCaptureSize) {
8712       QualType SizeArrayType = Ctx.getConstantArrayType(
8713           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8714           /*IndexTypeQuals=*/0);
8715       Info.SizesArray =
8716           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8717     } else {
8718       // We expect all the sizes to be constant, so we collect them to create
8719       // a constant array.
8720       SmallVector<llvm::Constant *, 16> ConstSizes;
8721       for (llvm::Value *S : Sizes)
8722         ConstSizes.push_back(cast<llvm::Constant>(S));
8723 
8724       auto *SizesArrayInit = llvm::ConstantArray::get(
8725           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8726       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8727       auto *SizesArrayGbl = new llvm::GlobalVariable(
8728           CGM.getModule(), SizesArrayInit->getType(),
8729           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8730           SizesArrayInit, Name);
8731       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8732       Info.SizesArray = SizesArrayGbl;
8733     }
8734 
8735     // The map types are always constant so we don't need to generate code to
8736     // fill arrays. Instead, we create an array constant.
8737     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8738     llvm::copy(MapTypes, Mapping.begin());
8739     llvm::Constant *MapTypesArrayInit =
8740         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8741     std::string MaptypesName =
8742         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8743     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8744         CGM.getModule(), MapTypesArrayInit->getType(),
8745         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8746         MapTypesArrayInit, MaptypesName);
8747     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8748     Info.MapTypesArray = MapTypesArrayGbl;
8749 
8750     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8751       llvm::Value *BPVal = *BasePointers[I];
8752       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8753           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8754           Info.BasePointersArray, 0, I);
8755       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8756           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8757       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8758       CGF.Builder.CreateStore(BPVal, BPAddr);
8759 
8760       if (Info.requiresDevicePointerInfo())
8761         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8762           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8763 
8764       llvm::Value *PVal = Pointers[I];
8765       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8766           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8767           Info.PointersArray, 0, I);
8768       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8769           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8770       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8771       CGF.Builder.CreateStore(PVal, PAddr);
8772 
8773       if (hasRuntimeEvaluationCaptureSize) {
8774         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8775             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8776             Info.SizesArray,
8777             /*Idx0=*/0,
8778             /*Idx1=*/I);
8779         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8780         CGF.Builder.CreateStore(
8781             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8782             SAddr);
8783       }
8784     }
8785   }
8786 }
8787 
8788 /// Emit the arguments to be passed to the runtime library based on the
8789 /// arrays of pointers, sizes and map types.
8790 static void emitOffloadingArraysArgument(
8791     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8792     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8793     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8794   CodeGenModule &CGM = CGF.CGM;
8795   if (Info.NumberOfPtrs) {
8796     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8797         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8798         Info.BasePointersArray,
8799         /*Idx0=*/0, /*Idx1=*/0);
8800     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8801         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8802         Info.PointersArray,
8803         /*Idx0=*/0,
8804         /*Idx1=*/0);
8805     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8806         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8807         /*Idx0=*/0, /*Idx1=*/0);
8808     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8809         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8810         Info.MapTypesArray,
8811         /*Idx0=*/0,
8812         /*Idx1=*/0);
8813   } else {
8814     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8815     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8816     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8817     MapTypesArrayArg =
8818         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8819   }
8820 }
8821 
8822 /// Check for inner distribute directive.
8823 static const OMPExecutableDirective *
8824 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8825   const auto *CS = D.getInnermostCapturedStmt();
8826   const auto *Body =
8827       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8828   const Stmt *ChildStmt =
8829       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8830 
8831   if (const auto *NestedDir =
8832           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8833     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8834     switch (D.getDirectiveKind()) {
8835     case OMPD_target:
8836       if (isOpenMPDistributeDirective(DKind))
8837         return NestedDir;
8838       if (DKind == OMPD_teams) {
8839         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8840             /*IgnoreCaptured=*/true);
8841         if (!Body)
8842           return nullptr;
8843         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8844         if (const auto *NND =
8845                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8846           DKind = NND->getDirectiveKind();
8847           if (isOpenMPDistributeDirective(DKind))
8848             return NND;
8849         }
8850       }
8851       return nullptr;
8852     case OMPD_target_teams:
8853       if (isOpenMPDistributeDirective(DKind))
8854         return NestedDir;
8855       return nullptr;
8856     case OMPD_target_parallel:
8857     case OMPD_target_simd:
8858     case OMPD_target_parallel_for:
8859     case OMPD_target_parallel_for_simd:
8860       return nullptr;
8861     case OMPD_target_teams_distribute:
8862     case OMPD_target_teams_distribute_simd:
8863     case OMPD_target_teams_distribute_parallel_for:
8864     case OMPD_target_teams_distribute_parallel_for_simd:
8865     case OMPD_parallel:
8866     case OMPD_for:
8867     case OMPD_parallel_for:
8868     case OMPD_parallel_master:
8869     case OMPD_parallel_sections:
8870     case OMPD_for_simd:
8871     case OMPD_parallel_for_simd:
8872     case OMPD_cancel:
8873     case OMPD_cancellation_point:
8874     case OMPD_ordered:
8875     case OMPD_threadprivate:
8876     case OMPD_allocate:
8877     case OMPD_task:
8878     case OMPD_simd:
8879     case OMPD_sections:
8880     case OMPD_section:
8881     case OMPD_single:
8882     case OMPD_master:
8883     case OMPD_critical:
8884     case OMPD_taskyield:
8885     case OMPD_barrier:
8886     case OMPD_taskwait:
8887     case OMPD_taskgroup:
8888     case OMPD_atomic:
8889     case OMPD_flush:
8890     case OMPD_depobj:
8891     case OMPD_scan:
8892     case OMPD_teams:
8893     case OMPD_target_data:
8894     case OMPD_target_exit_data:
8895     case OMPD_target_enter_data:
8896     case OMPD_distribute:
8897     case OMPD_distribute_simd:
8898     case OMPD_distribute_parallel_for:
8899     case OMPD_distribute_parallel_for_simd:
8900     case OMPD_teams_distribute:
8901     case OMPD_teams_distribute_simd:
8902     case OMPD_teams_distribute_parallel_for:
8903     case OMPD_teams_distribute_parallel_for_simd:
8904     case OMPD_target_update:
8905     case OMPD_declare_simd:
8906     case OMPD_declare_variant:
8907     case OMPD_begin_declare_variant:
8908     case OMPD_end_declare_variant:
8909     case OMPD_declare_target:
8910     case OMPD_end_declare_target:
8911     case OMPD_declare_reduction:
8912     case OMPD_declare_mapper:
8913     case OMPD_taskloop:
8914     case OMPD_taskloop_simd:
8915     case OMPD_master_taskloop:
8916     case OMPD_master_taskloop_simd:
8917     case OMPD_parallel_master_taskloop:
8918     case OMPD_parallel_master_taskloop_simd:
8919     case OMPD_requires:
8920     case OMPD_unknown:
8921     default:
8922       llvm_unreachable("Unexpected directive.");
8923     }
8924   }
8925 
8926   return nullptr;
8927 }
8928 
8929 /// Emit the user-defined mapper function. The code generation follows the
8930 /// pattern in the example below.
8931 /// \code
8932 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8933 ///                                           void *base, void *begin,
8934 ///                                           int64_t size, int64_t type) {
8935 ///   // Allocate space for an array section first.
8936 ///   if (size > 1 && !maptype.IsDelete)
8937 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8938 ///                                 size*sizeof(Ty), clearToFrom(type));
8939 ///   // Map members.
8940 ///   for (unsigned i = 0; i < size; i++) {
8941 ///     // For each component specified by this mapper:
8942 ///     for (auto c : all_components) {
8943 ///       if (c.hasMapper())
8944 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8945 ///                       c.arg_type);
8946 ///       else
8947 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8948 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8949 ///     }
8950 ///   }
8951 ///   // Delete the array section.
8952 ///   if (size > 1 && maptype.IsDelete)
8953 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8954 ///                                 size*sizeof(Ty), clearToFrom(type));
8955 /// }
8956 /// \endcode
8957 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8958                                             CodeGenFunction *CGF) {
8959   if (UDMMap.count(D) > 0)
8960     return;
8961   ASTContext &C = CGM.getContext();
8962   QualType Ty = D->getType();
8963   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8964   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8965   auto *MapperVarDecl =
8966       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8967   SourceLocation Loc = D->getLocation();
8968   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8969 
8970   // Prepare mapper function arguments and attributes.
8971   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8972                               C.VoidPtrTy, ImplicitParamDecl::Other);
8973   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8974                             ImplicitParamDecl::Other);
8975   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8976                              C.VoidPtrTy, ImplicitParamDecl::Other);
8977   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8978                             ImplicitParamDecl::Other);
8979   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8980                             ImplicitParamDecl::Other);
8981   FunctionArgList Args;
8982   Args.push_back(&HandleArg);
8983   Args.push_back(&BaseArg);
8984   Args.push_back(&BeginArg);
8985   Args.push_back(&SizeArg);
8986   Args.push_back(&TypeArg);
8987   const CGFunctionInfo &FnInfo =
8988       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8989   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8990   SmallString<64> TyStr;
8991   llvm::raw_svector_ostream Out(TyStr);
8992   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8993   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8994   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8995                                     Name, &CGM.getModule());
8996   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8997   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8998   // Start the mapper function code generation.
8999   CodeGenFunction MapperCGF(CGM);
9000   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9001   // Compute the starting and end addreses of array elements.
9002   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9003       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9004       C.getPointerType(Int64Ty), Loc);
9005   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9006       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9007       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9008   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9009   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9010       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9011       C.getPointerType(Int64Ty), Loc);
9012   // Prepare common arguments for array initiation and deletion.
9013   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9014       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9015       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9016   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9017       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9018       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9019   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9020       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9021       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9022 
9023   // Emit array initiation if this is an array section and \p MapType indicates
9024   // that memory allocation is required.
9025   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9026   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9027                              ElementSize, HeadBB, /*IsInit=*/true);
9028 
9029   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9030 
9031   // Emit the loop header block.
9032   MapperCGF.EmitBlock(HeadBB);
9033   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9034   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9035   // Evaluate whether the initial condition is satisfied.
9036   llvm::Value *IsEmpty =
9037       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9038   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9039   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9040 
9041   // Emit the loop body block.
9042   MapperCGF.EmitBlock(BodyBB);
9043   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9044       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9045   PtrPHI->addIncoming(PtrBegin, EntryBB);
9046   Address PtrCurrent =
9047       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9048                           .getAlignment()
9049                           .alignmentOfArrayElement(ElementSize));
9050   // Privatize the declared variable of mapper to be the current array element.
9051   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9052   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9053     return MapperCGF
9054         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9055         .getAddress(MapperCGF);
9056   });
9057   (void)Scope.Privatize();
9058 
9059   // Get map clause information. Fill up the arrays with all mapped variables.
9060   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9061   MappableExprsHandler::MapValuesArrayTy Pointers;
9062   MappableExprsHandler::MapValuesArrayTy Sizes;
9063   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9064   MappableExprsHandler MEHandler(*D, MapperCGF);
9065   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9066 
9067   // Call the runtime API __tgt_mapper_num_components to get the number of
9068   // pre-existing components.
9069   llvm::Value *OffloadingArgs[] = {Handle};
9070   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9071       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9072           CGM.getModule(), OMPRTL___tgt_mapper_num_components),
9073       OffloadingArgs);
9074   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9075       PreviousSize,
9076       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9077 
9078   // Fill up the runtime mapper handle for all components.
9079   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9080     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9081         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9082     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9083         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9084     llvm::Value *CurSizeArg = Sizes[I];
9085 
9086     // Extract the MEMBER_OF field from the map type.
9087     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9088     MapperCGF.EmitBlock(MemberBB);
9089     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9090     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9091         OriMapType,
9092         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9093     llvm::BasicBlock *MemberCombineBB =
9094         MapperCGF.createBasicBlock("omp.member.combine");
9095     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9096     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9097     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9098     // Add the number of pre-existing components to the MEMBER_OF field if it
9099     // is valid.
9100     MapperCGF.EmitBlock(MemberCombineBB);
9101     llvm::Value *CombinedMember =
9102         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9103     // Do nothing if it is not a member of previous components.
9104     MapperCGF.EmitBlock(TypeBB);
9105     llvm::PHINode *MemberMapType =
9106         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9107     MemberMapType->addIncoming(OriMapType, MemberBB);
9108     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9109 
9110     // Combine the map type inherited from user-defined mapper with that
9111     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9112     // bits of the \a MapType, which is the input argument of the mapper
9113     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9114     // bits of MemberMapType.
9115     // [OpenMP 5.0], 1.2.6. map-type decay.
9116     //        | alloc |  to   | from  | tofrom | release | delete
9117     // ----------------------------------------------------------
9118     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9119     // to     | alloc |  to   | alloc |   to   | release | delete
9120     // from   | alloc | alloc | from  |  from  | release | delete
9121     // tofrom | alloc |  to   | from  | tofrom | release | delete
9122     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9123         MapType,
9124         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9125                                    MappableExprsHandler::OMP_MAP_FROM));
9126     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9127     llvm::BasicBlock *AllocElseBB =
9128         MapperCGF.createBasicBlock("omp.type.alloc.else");
9129     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9130     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9131     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9132     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9133     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9134     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9135     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9136     MapperCGF.EmitBlock(AllocBB);
9137     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9138         MemberMapType,
9139         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9140                                      MappableExprsHandler::OMP_MAP_FROM)));
9141     MapperCGF.Builder.CreateBr(EndBB);
9142     MapperCGF.EmitBlock(AllocElseBB);
9143     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9144         LeftToFrom,
9145         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9146     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9147     // In case of to, clear OMP_MAP_FROM.
9148     MapperCGF.EmitBlock(ToBB);
9149     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9150         MemberMapType,
9151         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9152     MapperCGF.Builder.CreateBr(EndBB);
9153     MapperCGF.EmitBlock(ToElseBB);
9154     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9155         LeftToFrom,
9156         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9157     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9158     // In case of from, clear OMP_MAP_TO.
9159     MapperCGF.EmitBlock(FromBB);
9160     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9161         MemberMapType,
9162         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9163     // In case of tofrom, do nothing.
9164     MapperCGF.EmitBlock(EndBB);
9165     llvm::PHINode *CurMapType =
9166         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9167     CurMapType->addIncoming(AllocMapType, AllocBB);
9168     CurMapType->addIncoming(ToMapType, ToBB);
9169     CurMapType->addIncoming(FromMapType, FromBB);
9170     CurMapType->addIncoming(MemberMapType, ToElseBB);
9171 
9172     // TODO: call the corresponding mapper function if a user-defined mapper is
9173     // associated with this map clause.
9174     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9175     // data structure.
9176     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9177                                      CurSizeArg, CurMapType};
9178     MapperCGF.EmitRuntimeCall(
9179         llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9180             CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9181         OffloadingArgs);
9182   }
9183 
9184   // Update the pointer to point to the next element that needs to be mapped,
9185   // and check whether we have mapped all elements.
9186   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9187       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9188   PtrPHI->addIncoming(PtrNext, BodyBB);
9189   llvm::Value *IsDone =
9190       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9191   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9192   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9193 
9194   MapperCGF.EmitBlock(ExitBB);
9195   // Emit array deletion if this is an array section and \p MapType indicates
9196   // that deletion is required.
9197   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9198                              ElementSize, DoneBB, /*IsInit=*/false);
9199 
9200   // Emit the function exit block.
9201   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9202   MapperCGF.FinishFunction();
9203   UDMMap.try_emplace(D, Fn);
9204   if (CGF) {
9205     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9206     Decls.second.push_back(D);
9207   }
9208 }
9209 
9210 /// Emit the array initialization or deletion portion for user-defined mapper
9211 /// code generation. First, it evaluates whether an array section is mapped and
9212 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9213 /// true, and \a MapType indicates to not delete this array, array
9214 /// initialization code is generated. If \a IsInit is false, and \a MapType
9215 /// indicates to not this array, array deletion code is generated.
9216 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9217     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9218     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9219     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9220   StringRef Prefix = IsInit ? ".init" : ".del";
9221 
9222   // Evaluate if this is an array section.
9223   llvm::BasicBlock *IsDeleteBB =
9224       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9225   llvm::BasicBlock *BodyBB =
9226       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9227   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9228       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9229   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9230 
9231   // Evaluate if we are going to delete this section.
9232   MapperCGF.EmitBlock(IsDeleteBB);
9233   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9234       MapType,
9235       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9236   llvm::Value *DeleteCond;
9237   if (IsInit) {
9238     DeleteCond = MapperCGF.Builder.CreateIsNull(
9239         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9240   } else {
9241     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9242         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9243   }
9244   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9245 
9246   MapperCGF.EmitBlock(BodyBB);
9247   // Get the array size by multiplying element size and element number (i.e., \p
9248   // Size).
9249   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9250       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9251   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9252   // memory allocation/deletion purpose only.
9253   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9254       MapType,
9255       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9256                                    MappableExprsHandler::OMP_MAP_FROM)));
9257   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9258   // data structure.
9259   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9260   MapperCGF.EmitRuntimeCall(
9261       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9262           CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9263       OffloadingArgs);
9264 }
9265 
9266 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9267     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9268     llvm::Value *DeviceID,
9269     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9270                                      const OMPLoopDirective &D)>
9271         SizeEmitter) {
9272   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9273   const OMPExecutableDirective *TD = &D;
9274   // Get nested teams distribute kind directive, if any.
9275   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9276     TD = getNestedDistributeDirective(CGM.getContext(), D);
9277   if (!TD)
9278     return;
9279   const auto *LD = cast<OMPLoopDirective>(TD);
9280   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9281                                                      PrePostActionTy &) {
9282     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9283       llvm::Value *Args[] = {DeviceID, NumIterations};
9284       CGF.EmitRuntimeCall(
9285           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9286               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9287           Args);
9288     }
9289   };
9290   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9291 }
9292 
9293 void CGOpenMPRuntime::emitTargetCall(
9294     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9295     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9296     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9297     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9298                                      const OMPLoopDirective &D)>
9299         SizeEmitter) {
9300   if (!CGF.HaveInsertPoint())
9301     return;
9302 
9303   assert(OutlinedFn && "Invalid outlined function!");
9304 
9305   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9306   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9307   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9308   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9309                                             PrePostActionTy &) {
9310     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9311   };
9312   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9313 
9314   CodeGenFunction::OMPTargetDataInfo InputInfo;
9315   llvm::Value *MapTypesArray = nullptr;
9316   // Fill up the pointer arrays and transfer execution to the device.
9317   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9318                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9319                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9320     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9321       // Reverse offloading is not supported, so just execute on the host.
9322       if (RequiresOuterTask) {
9323         CapturedVars.clear();
9324         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9325       }
9326       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9327       return;
9328     }
9329 
9330     // On top of the arrays that were filled up, the target offloading call
9331     // takes as arguments the device id as well as the host pointer. The host
9332     // pointer is used by the runtime library to identify the current target
9333     // region, so it only has to be unique and not necessarily point to
9334     // anything. It could be the pointer to the outlined function that
9335     // implements the target region, but we aren't using that so that the
9336     // compiler doesn't need to keep that, and could therefore inline the host
9337     // function if proven worthwhile during optimization.
9338 
9339     // From this point on, we need to have an ID of the target region defined.
9340     assert(OutlinedFnID && "Invalid outlined function ID!");
9341 
9342     // Emit device ID if any.
9343     llvm::Value *DeviceID;
9344     if (Device.getPointer()) {
9345       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9346               Device.getInt() == OMPC_DEVICE_device_num) &&
9347              "Expected device_num modifier.");
9348       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9349       DeviceID =
9350           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9351     } else {
9352       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9353     }
9354 
9355     // Emit the number of elements in the offloading arrays.
9356     llvm::Value *PointerNum =
9357         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9358 
9359     // Return value of the runtime offloading call.
9360     llvm::Value *Return;
9361 
9362     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9363     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9364 
9365     // Emit tripcount for the target loop-based directive.
9366     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9367 
9368     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9369     // The target region is an outlined function launched by the runtime
9370     // via calls __tgt_target() or __tgt_target_teams().
9371     //
9372     // __tgt_target() launches a target region with one team and one thread,
9373     // executing a serial region.  This master thread may in turn launch
9374     // more threads within its team upon encountering a parallel region,
9375     // however, no additional teams can be launched on the device.
9376     //
9377     // __tgt_target_teams() launches a target region with one or more teams,
9378     // each with one or more threads.  This call is required for target
9379     // constructs such as:
9380     //  'target teams'
9381     //  'target' / 'teams'
9382     //  'target teams distribute parallel for'
9383     //  'target parallel'
9384     // and so on.
9385     //
9386     // Note that on the host and CPU targets, the runtime implementation of
9387     // these calls simply call the outlined function without forking threads.
9388     // The outlined functions themselves have runtime calls to
9389     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9390     // the compiler in emitTeamsCall() and emitParallelCall().
9391     //
9392     // In contrast, on the NVPTX target, the implementation of
9393     // __tgt_target_teams() launches a GPU kernel with the requested number
9394     // of teams and threads so no additional calls to the runtime are required.
9395     if (NumTeams) {
9396       // If we have NumTeams defined this means that we have an enclosed teams
9397       // region. Therefore we also expect to have NumThreads defined. These two
9398       // values should be defined in the presence of a teams directive,
9399       // regardless of having any clauses associated. If the user is using teams
9400       // but no clauses, these two values will be the default that should be
9401       // passed to the runtime library - a 32-bit integer with the value zero.
9402       assert(NumThreads && "Thread limit expression should be available along "
9403                            "with number of teams.");
9404       llvm::Value *OffloadingArgs[] = {DeviceID,
9405                                        OutlinedFnID,
9406                                        PointerNum,
9407                                        InputInfo.BasePointersArray.getPointer(),
9408                                        InputInfo.PointersArray.getPointer(),
9409                                        InputInfo.SizesArray.getPointer(),
9410                                        MapTypesArray,
9411                                        NumTeams,
9412                                        NumThreads};
9413       Return = CGF.EmitRuntimeCall(
9414           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9415               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9416                                          : OMPRTL___tgt_target_teams),
9417           OffloadingArgs);
9418     } else {
9419       llvm::Value *OffloadingArgs[] = {DeviceID,
9420                                        OutlinedFnID,
9421                                        PointerNum,
9422                                        InputInfo.BasePointersArray.getPointer(),
9423                                        InputInfo.PointersArray.getPointer(),
9424                                        InputInfo.SizesArray.getPointer(),
9425                                        MapTypesArray};
9426       Return = CGF.EmitRuntimeCall(
9427           llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
9428               CGM.getModule(),
9429               HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9430           OffloadingArgs);
9431     }
9432 
9433     // Check the error code and execute the host version if required.
9434     llvm::BasicBlock *OffloadFailedBlock =
9435         CGF.createBasicBlock("omp_offload.failed");
9436     llvm::BasicBlock *OffloadContBlock =
9437         CGF.createBasicBlock("omp_offload.cont");
9438     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9439     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9440 
9441     CGF.EmitBlock(OffloadFailedBlock);
9442     if (RequiresOuterTask) {
9443       CapturedVars.clear();
9444       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9445     }
9446     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9447     CGF.EmitBranch(OffloadContBlock);
9448 
9449     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9450   };
9451 
9452   // Notify that the host version must be executed.
9453   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9454                     RequiresOuterTask](CodeGenFunction &CGF,
9455                                        PrePostActionTy &) {
9456     if (RequiresOuterTask) {
9457       CapturedVars.clear();
9458       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9459     }
9460     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9461   };
9462 
9463   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9464                           &CapturedVars, RequiresOuterTask,
9465                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9466     // Fill up the arrays with all the captured variables.
9467     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9468     MappableExprsHandler::MapValuesArrayTy Pointers;
9469     MappableExprsHandler::MapValuesArrayTy Sizes;
9470     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9471 
9472     // Get mappable expression information.
9473     MappableExprsHandler MEHandler(D, CGF);
9474     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9475 
9476     auto RI = CS.getCapturedRecordDecl()->field_begin();
9477     auto CV = CapturedVars.begin();
9478     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9479                                               CE = CS.capture_end();
9480          CI != CE; ++CI, ++RI, ++CV) {
9481       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9482       MappableExprsHandler::MapValuesArrayTy CurPointers;
9483       MappableExprsHandler::MapValuesArrayTy CurSizes;
9484       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9485       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9486 
9487       // VLA sizes are passed to the outlined region by copy and do not have map
9488       // information associated.
9489       if (CI->capturesVariableArrayType()) {
9490         CurBasePointers.push_back(*CV);
9491         CurPointers.push_back(*CV);
9492         CurSizes.push_back(CGF.Builder.CreateIntCast(
9493             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9494         // Copy to the device as an argument. No need to retrieve it.
9495         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9496                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9497                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9498       } else {
9499         // If we have any information in the map clause, we use it, otherwise we
9500         // just do a default mapping.
9501         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9502                                          CurSizes, CurMapTypes, PartialStruct);
9503         if (CurBasePointers.empty())
9504           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9505                                            CurPointers, CurSizes, CurMapTypes);
9506         // Generate correct mapping for variables captured by reference in
9507         // lambdas.
9508         if (CI->capturesVariable())
9509           MEHandler.generateInfoForLambdaCaptures(
9510               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9511               CurMapTypes, LambdaPointers);
9512       }
9513       // We expect to have at least an element of information for this capture.
9514       assert(!CurBasePointers.empty() &&
9515              "Non-existing map pointer for capture!");
9516       assert(CurBasePointers.size() == CurPointers.size() &&
9517              CurBasePointers.size() == CurSizes.size() &&
9518              CurBasePointers.size() == CurMapTypes.size() &&
9519              "Inconsistent map information sizes!");
9520 
9521       // If there is an entry in PartialStruct it means we have a struct with
9522       // individual members mapped. Emit an extra combined entry.
9523       if (PartialStruct.Base.isValid())
9524         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9525                                     CurMapTypes, PartialStruct);
9526 
9527       // We need to append the results of this capture to what we already have.
9528       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9529       Pointers.append(CurPointers.begin(), CurPointers.end());
9530       Sizes.append(CurSizes.begin(), CurSizes.end());
9531       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9532     }
9533     // Adjust MEMBER_OF flags for the lambdas captures.
9534     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9535                                               Pointers, MapTypes);
9536     // Map other list items in the map clause which are not captured variables
9537     // but "declare target link" global variables.
9538     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9539                                                MapTypes);
9540 
9541     TargetDataInfo Info;
9542     // Fill up the arrays and create the arguments.
9543     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9544     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9545                                  Info.PointersArray, Info.SizesArray,
9546                                  Info.MapTypesArray, Info);
9547     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9548     InputInfo.BasePointersArray =
9549         Address(Info.BasePointersArray, CGM.getPointerAlign());
9550     InputInfo.PointersArray =
9551         Address(Info.PointersArray, CGM.getPointerAlign());
9552     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9553     MapTypesArray = Info.MapTypesArray;
9554     if (RequiresOuterTask)
9555       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9556     else
9557       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9558   };
9559 
9560   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9561                              CodeGenFunction &CGF, PrePostActionTy &) {
9562     if (RequiresOuterTask) {
9563       CodeGenFunction::OMPTargetDataInfo InputInfo;
9564       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9565     } else {
9566       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9567     }
9568   };
9569 
9570   // If we have a target function ID it means that we need to support
9571   // offloading, otherwise, just execute on the host. We need to execute on host
9572   // regardless of the conditional in the if clause if, e.g., the user do not
9573   // specify target triples.
9574   if (OutlinedFnID) {
9575     if (IfCond) {
9576       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9577     } else {
9578       RegionCodeGenTy ThenRCG(TargetThenGen);
9579       ThenRCG(CGF);
9580     }
9581   } else {
9582     RegionCodeGenTy ElseRCG(TargetElseGen);
9583     ElseRCG(CGF);
9584   }
9585 }
9586 
9587 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9588                                                     StringRef ParentName) {
9589   if (!S)
9590     return;
9591 
9592   // Codegen OMP target directives that offload compute to the device.
9593   bool RequiresDeviceCodegen =
9594       isa<OMPExecutableDirective>(S) &&
9595       isOpenMPTargetExecutionDirective(
9596           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9597 
9598   if (RequiresDeviceCodegen) {
9599     const auto &E = *cast<OMPExecutableDirective>(S);
9600     unsigned DeviceID;
9601     unsigned FileID;
9602     unsigned Line;
9603     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9604                              FileID, Line);
9605 
9606     // Is this a target region that should not be emitted as an entry point? If
9607     // so just signal we are done with this target region.
9608     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9609                                                             ParentName, Line))
9610       return;
9611 
9612     switch (E.getDirectiveKind()) {
9613     case OMPD_target:
9614       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9615                                                    cast<OMPTargetDirective>(E));
9616       break;
9617     case OMPD_target_parallel:
9618       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9619           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9620       break;
9621     case OMPD_target_teams:
9622       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9623           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9624       break;
9625     case OMPD_target_teams_distribute:
9626       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9627           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9628       break;
9629     case OMPD_target_teams_distribute_simd:
9630       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9631           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9632       break;
9633     case OMPD_target_parallel_for:
9634       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9635           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9636       break;
9637     case OMPD_target_parallel_for_simd:
9638       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9639           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9640       break;
9641     case OMPD_target_simd:
9642       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9643           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9644       break;
9645     case OMPD_target_teams_distribute_parallel_for:
9646       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9647           CGM, ParentName,
9648           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9649       break;
9650     case OMPD_target_teams_distribute_parallel_for_simd:
9651       CodeGenFunction::
9652           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9653               CGM, ParentName,
9654               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9655       break;
9656     case OMPD_parallel:
9657     case OMPD_for:
9658     case OMPD_parallel_for:
9659     case OMPD_parallel_master:
9660     case OMPD_parallel_sections:
9661     case OMPD_for_simd:
9662     case OMPD_parallel_for_simd:
9663     case OMPD_cancel:
9664     case OMPD_cancellation_point:
9665     case OMPD_ordered:
9666     case OMPD_threadprivate:
9667     case OMPD_allocate:
9668     case OMPD_task:
9669     case OMPD_simd:
9670     case OMPD_sections:
9671     case OMPD_section:
9672     case OMPD_single:
9673     case OMPD_master:
9674     case OMPD_critical:
9675     case OMPD_taskyield:
9676     case OMPD_barrier:
9677     case OMPD_taskwait:
9678     case OMPD_taskgroup:
9679     case OMPD_atomic:
9680     case OMPD_flush:
9681     case OMPD_depobj:
9682     case OMPD_scan:
9683     case OMPD_teams:
9684     case OMPD_target_data:
9685     case OMPD_target_exit_data:
9686     case OMPD_target_enter_data:
9687     case OMPD_distribute:
9688     case OMPD_distribute_simd:
9689     case OMPD_distribute_parallel_for:
9690     case OMPD_distribute_parallel_for_simd:
9691     case OMPD_teams_distribute:
9692     case OMPD_teams_distribute_simd:
9693     case OMPD_teams_distribute_parallel_for:
9694     case OMPD_teams_distribute_parallel_for_simd:
9695     case OMPD_target_update:
9696     case OMPD_declare_simd:
9697     case OMPD_declare_variant:
9698     case OMPD_begin_declare_variant:
9699     case OMPD_end_declare_variant:
9700     case OMPD_declare_target:
9701     case OMPD_end_declare_target:
9702     case OMPD_declare_reduction:
9703     case OMPD_declare_mapper:
9704     case OMPD_taskloop:
9705     case OMPD_taskloop_simd:
9706     case OMPD_master_taskloop:
9707     case OMPD_master_taskloop_simd:
9708     case OMPD_parallel_master_taskloop:
9709     case OMPD_parallel_master_taskloop_simd:
9710     case OMPD_requires:
9711     case OMPD_unknown:
9712     default:
9713       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9714     }
9715     return;
9716   }
9717 
9718   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9719     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9720       return;
9721 
9722     scanForTargetRegionsFunctions(
9723         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9724     return;
9725   }
9726 
9727   // If this is a lambda function, look into its body.
9728   if (const auto *L = dyn_cast<LambdaExpr>(S))
9729     S = L->getBody();
9730 
9731   // Keep looking for target regions recursively.
9732   for (const Stmt *II : S->children())
9733     scanForTargetRegionsFunctions(II, ParentName);
9734 }
9735 
9736 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9737   // If emitting code for the host, we do not process FD here. Instead we do
9738   // the normal code generation.
9739   if (!CGM.getLangOpts().OpenMPIsDevice) {
9740     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9741       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9742           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9743       // Do not emit device_type(nohost) functions for the host.
9744       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9745         return true;
9746     }
9747     return false;
9748   }
9749 
9750   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9751   // Try to detect target regions in the function.
9752   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9753     StringRef Name = CGM.getMangledName(GD);
9754     scanForTargetRegionsFunctions(FD->getBody(), Name);
9755     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9756         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9757     // Do not emit device_type(nohost) functions for the host.
9758     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9759       return true;
9760   }
9761 
9762   // Do not to emit function if it is not marked as declare target.
9763   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9764          AlreadyEmittedTargetDecls.count(VD) == 0;
9765 }
9766 
9767 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9768   if (!CGM.getLangOpts().OpenMPIsDevice)
9769     return false;
9770 
9771   // Check if there are Ctors/Dtors in this declaration and look for target
9772   // regions in it. We use the complete variant to produce the kernel name
9773   // mangling.
9774   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9775   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9776     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9777       StringRef ParentName =
9778           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9779       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9780     }
9781     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9782       StringRef ParentName =
9783           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9784       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9785     }
9786   }
9787 
9788   // Do not to emit variable if it is not marked as declare target.
9789   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9790       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9791           cast<VarDecl>(GD.getDecl()));
9792   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9793       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9794        HasRequiresUnifiedSharedMemory)) {
9795     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9796     return true;
9797   }
9798   return false;
9799 }
9800 
9801 llvm::Constant *
9802 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9803                                                 const VarDecl *VD) {
9804   assert(VD->getType().isConstant(CGM.getContext()) &&
9805          "Expected constant variable.");
9806   StringRef VarName;
9807   llvm::Constant *Addr;
9808   llvm::GlobalValue::LinkageTypes Linkage;
9809   QualType Ty = VD->getType();
9810   SmallString<128> Buffer;
9811   {
9812     unsigned DeviceID;
9813     unsigned FileID;
9814     unsigned Line;
9815     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9816                              FileID, Line);
9817     llvm::raw_svector_ostream OS(Buffer);
9818     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9819        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9820     VarName = OS.str();
9821   }
9822   Linkage = llvm::GlobalValue::InternalLinkage;
9823   Addr =
9824       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9825                                   getDefaultFirstprivateAddressSpace());
9826   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9827   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9828   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9829   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9830       VarName, Addr, VarSize,
9831       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9832   return Addr;
9833 }
9834 
9835 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9836                                                    llvm::Constant *Addr) {
9837   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9838       !CGM.getLangOpts().OpenMPIsDevice)
9839     return;
9840   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9841       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9842   if (!Res) {
9843     if (CGM.getLangOpts().OpenMPIsDevice) {
9844       // Register non-target variables being emitted in device code (debug info
9845       // may cause this).
9846       StringRef VarName = CGM.getMangledName(VD);
9847       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9848     }
9849     return;
9850   }
9851   // Register declare target variables.
9852   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9853   StringRef VarName;
9854   CharUnits VarSize;
9855   llvm::GlobalValue::LinkageTypes Linkage;
9856 
9857   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9858       !HasRequiresUnifiedSharedMemory) {
9859     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9860     VarName = CGM.getMangledName(VD);
9861     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9862       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9863       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9864     } else {
9865       VarSize = CharUnits::Zero();
9866     }
9867     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9868     // Temp solution to prevent optimizations of the internal variables.
9869     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9870       std::string RefName = getName({VarName, "ref"});
9871       if (!CGM.GetGlobalValue(RefName)) {
9872         llvm::Constant *AddrRef =
9873             getOrCreateInternalVariable(Addr->getType(), RefName);
9874         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9875         GVAddrRef->setConstant(/*Val=*/true);
9876         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9877         GVAddrRef->setInitializer(Addr);
9878         CGM.addCompilerUsedGlobal(GVAddrRef);
9879       }
9880     }
9881   } else {
9882     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9883             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9884              HasRequiresUnifiedSharedMemory)) &&
9885            "Declare target attribute must link or to with unified memory.");
9886     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9887       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9888     else
9889       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9890 
9891     if (CGM.getLangOpts().OpenMPIsDevice) {
9892       VarName = Addr->getName();
9893       Addr = nullptr;
9894     } else {
9895       VarName = getAddrOfDeclareTargetVar(VD).getName();
9896       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9897     }
9898     VarSize = CGM.getPointerSize();
9899     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9900   }
9901 
9902   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9903       VarName, Addr, VarSize, Flags, Linkage);
9904 }
9905 
9906 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9907   if (isa<FunctionDecl>(GD.getDecl()) ||
9908       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9909     return emitTargetFunctions(GD);
9910 
9911   return emitTargetGlobalVariable(GD);
9912 }
9913 
9914 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9915   for (const VarDecl *VD : DeferredGlobalVariables) {
9916     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9917         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9918     if (!Res)
9919       continue;
9920     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9921         !HasRequiresUnifiedSharedMemory) {
9922       CGM.EmitGlobal(VD);
9923     } else {
9924       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9925               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9926                HasRequiresUnifiedSharedMemory)) &&
9927              "Expected link clause or to clause with unified memory.");
9928       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9929     }
9930   }
9931 }
9932 
9933 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9934     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9935   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9936          " Expected target-based directive.");
9937 }
9938 
9939 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9940   for (const OMPClause *Clause : D->clauselists()) {
9941     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9942       HasRequiresUnifiedSharedMemory = true;
9943     } else if (const auto *AC =
9944                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9945       switch (AC->getAtomicDefaultMemOrderKind()) {
9946       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9947         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9948         break;
9949       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9950         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9951         break;
9952       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9953         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9954         break;
9955       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9956         break;
9957       }
9958     }
9959   }
9960 }
9961 
9962 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9963   return RequiresAtomicOrdering;
9964 }
9965 
9966 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9967                                                        LangAS &AS) {
9968   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9969     return false;
9970   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9971   switch(A->getAllocatorType()) {
9972   case OMPAllocateDeclAttr::OMPNullMemAlloc:
9973   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9974   // Not supported, fallback to the default mem space.
9975   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9976   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9977   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9978   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9979   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9980   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9981   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9982     AS = LangAS::Default;
9983     return true;
9984   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9985     llvm_unreachable("Expected predefined allocator for the variables with the "
9986                      "static storage.");
9987   }
9988   return false;
9989 }
9990 
9991 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9992   return HasRequiresUnifiedSharedMemory;
9993 }
9994 
9995 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9996     CodeGenModule &CGM)
9997     : CGM(CGM) {
9998   if (CGM.getLangOpts().OpenMPIsDevice) {
9999     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10000     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10001   }
10002 }
10003 
10004 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10005   if (CGM.getLangOpts().OpenMPIsDevice)
10006     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10007 }
10008 
10009 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10010   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10011     return true;
10012 
10013   const auto *D = cast<FunctionDecl>(GD.getDecl());
10014   // Do not to emit function if it is marked as declare target as it was already
10015   // emitted.
10016   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10017     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10018       if (auto *F = dyn_cast_or_null<llvm::Function>(
10019               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10020         return !F->isDeclaration();
10021       return false;
10022     }
10023     return true;
10024   }
10025 
10026   return !AlreadyEmittedTargetDecls.insert(D).second;
10027 }
10028 
10029 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10030   // If we don't have entries or if we are emitting code for the device, we
10031   // don't need to do anything.
10032   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10033       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10034       (OffloadEntriesInfoManager.empty() &&
10035        !HasEmittedDeclareTargetRegion &&
10036        !HasEmittedTargetRegion))
10037     return nullptr;
10038 
10039   // Create and register the function that handles the requires directives.
10040   ASTContext &C = CGM.getContext();
10041 
10042   llvm::Function *RequiresRegFn;
10043   {
10044     CodeGenFunction CGF(CGM);
10045     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10046     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10047     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10048     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10049     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10050     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10051     // TODO: check for other requires clauses.
10052     // The requires directive takes effect only when a target region is
10053     // present in the compilation unit. Otherwise it is ignored and not
10054     // passed to the runtime. This avoids the runtime from throwing an error
10055     // for mismatching requires clauses across compilation units that don't
10056     // contain at least 1 target region.
10057     assert((HasEmittedTargetRegion ||
10058             HasEmittedDeclareTargetRegion ||
10059             !OffloadEntriesInfoManager.empty()) &&
10060            "Target or declare target region expected.");
10061     if (HasRequiresUnifiedSharedMemory)
10062       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10063     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10064                             CGM.getModule(), OMPRTL___tgt_register_requires),
10065                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10066     CGF.FinishFunction();
10067   }
10068   return RequiresRegFn;
10069 }
10070 
10071 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10072                                     const OMPExecutableDirective &D,
10073                                     SourceLocation Loc,
10074                                     llvm::Function *OutlinedFn,
10075                                     ArrayRef<llvm::Value *> CapturedVars) {
10076   if (!CGF.HaveInsertPoint())
10077     return;
10078 
10079   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10080   CodeGenFunction::RunCleanupsScope Scope(CGF);
10081 
10082   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10083   llvm::Value *Args[] = {
10084       RTLoc,
10085       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10086       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10087   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10088   RealArgs.append(std::begin(Args), std::end(Args));
10089   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10090 
10091   llvm::FunctionCallee RTLFn =
10092       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10093           CGM.getModule(), OMPRTL___kmpc_fork_teams);
10094   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10095 }
10096 
10097 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10098                                          const Expr *NumTeams,
10099                                          const Expr *ThreadLimit,
10100                                          SourceLocation Loc) {
10101   if (!CGF.HaveInsertPoint())
10102     return;
10103 
10104   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10105 
10106   llvm::Value *NumTeamsVal =
10107       NumTeams
10108           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10109                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10110           : CGF.Builder.getInt32(0);
10111 
10112   llvm::Value *ThreadLimitVal =
10113       ThreadLimit
10114           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10115                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10116           : CGF.Builder.getInt32(0);
10117 
10118   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10119   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10120                                      ThreadLimitVal};
10121   CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10122                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10123                       PushNumTeamsArgs);
10124 }
10125 
10126 void CGOpenMPRuntime::emitTargetDataCalls(
10127     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10128     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10129   if (!CGF.HaveInsertPoint())
10130     return;
10131 
10132   // Action used to replace the default codegen action and turn privatization
10133   // off.
10134   PrePostActionTy NoPrivAction;
10135 
10136   // Generate the code for the opening of the data environment. Capture all the
10137   // arguments of the runtime call by reference because they are used in the
10138   // closing of the region.
10139   auto &&BeginThenGen = [this, &D, Device, &Info,
10140                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10141     // Fill up the arrays with all the mapped variables.
10142     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10143     MappableExprsHandler::MapValuesArrayTy Pointers;
10144     MappableExprsHandler::MapValuesArrayTy Sizes;
10145     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10146 
10147     // Get map clause information.
10148     MappableExprsHandler MCHandler(D, CGF);
10149     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10150 
10151     // Fill up the arrays and create the arguments.
10152     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10153 
10154     llvm::Value *BasePointersArrayArg = nullptr;
10155     llvm::Value *PointersArrayArg = nullptr;
10156     llvm::Value *SizesArrayArg = nullptr;
10157     llvm::Value *MapTypesArrayArg = nullptr;
10158     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10159                                  SizesArrayArg, MapTypesArrayArg, Info);
10160 
10161     // Emit device ID if any.
10162     llvm::Value *DeviceID = nullptr;
10163     if (Device) {
10164       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10165                                            CGF.Int64Ty, /*isSigned=*/true);
10166     } else {
10167       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10168     }
10169 
10170     // Emit the number of elements in the offloading arrays.
10171     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10172 
10173     llvm::Value *OffloadingArgs[] = {
10174         DeviceID,         PointerNum,    BasePointersArrayArg,
10175         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10176     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10177                             CGM.getModule(), OMPRTL___tgt_target_data_begin),
10178                         OffloadingArgs);
10179 
10180     // If device pointer privatization is required, emit the body of the region
10181     // here. It will have to be duplicated: with and without privatization.
10182     if (!Info.CaptureDeviceAddrMap.empty())
10183       CodeGen(CGF);
10184   };
10185 
10186   // Generate code for the closing of the data region.
10187   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10188                                             PrePostActionTy &) {
10189     assert(Info.isValid() && "Invalid data environment closing arguments.");
10190 
10191     llvm::Value *BasePointersArrayArg = nullptr;
10192     llvm::Value *PointersArrayArg = nullptr;
10193     llvm::Value *SizesArrayArg = nullptr;
10194     llvm::Value *MapTypesArrayArg = nullptr;
10195     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10196                                  SizesArrayArg, MapTypesArrayArg, Info);
10197 
10198     // Emit device ID if any.
10199     llvm::Value *DeviceID = nullptr;
10200     if (Device) {
10201       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10202                                            CGF.Int64Ty, /*isSigned=*/true);
10203     } else {
10204       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10205     }
10206 
10207     // Emit the number of elements in the offloading arrays.
10208     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10209 
10210     llvm::Value *OffloadingArgs[] = {
10211         DeviceID,         PointerNum,    BasePointersArrayArg,
10212         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10213     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10214                             CGM.getModule(), OMPRTL___tgt_target_data_end),
10215                         OffloadingArgs);
10216   };
10217 
10218   // If we need device pointer privatization, we need to emit the body of the
10219   // region with no privatization in the 'else' branch of the conditional.
10220   // Otherwise, we don't have to do anything.
10221   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10222                                                          PrePostActionTy &) {
10223     if (!Info.CaptureDeviceAddrMap.empty()) {
10224       CodeGen.setAction(NoPrivAction);
10225       CodeGen(CGF);
10226     }
10227   };
10228 
10229   // We don't have to do anything to close the region if the if clause evaluates
10230   // to false.
10231   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10232 
10233   if (IfCond) {
10234     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10235   } else {
10236     RegionCodeGenTy RCG(BeginThenGen);
10237     RCG(CGF);
10238   }
10239 
10240   // If we don't require privatization of device pointers, we emit the body in
10241   // between the runtime calls. This avoids duplicating the body code.
10242   if (Info.CaptureDeviceAddrMap.empty()) {
10243     CodeGen.setAction(NoPrivAction);
10244     CodeGen(CGF);
10245   }
10246 
10247   if (IfCond) {
10248     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10249   } else {
10250     RegionCodeGenTy RCG(EndThenGen);
10251     RCG(CGF);
10252   }
10253 }
10254 
10255 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10256     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10257     const Expr *Device) {
10258   if (!CGF.HaveInsertPoint())
10259     return;
10260 
10261   assert((isa<OMPTargetEnterDataDirective>(D) ||
10262           isa<OMPTargetExitDataDirective>(D) ||
10263           isa<OMPTargetUpdateDirective>(D)) &&
10264          "Expecting either target enter, exit data, or update directives.");
10265 
10266   CodeGenFunction::OMPTargetDataInfo InputInfo;
10267   llvm::Value *MapTypesArray = nullptr;
10268   // Generate the code for the opening of the data environment.
10269   auto &&ThenGen = [this, &D, Device, &InputInfo,
10270                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10271     // Emit device ID if any.
10272     llvm::Value *DeviceID = nullptr;
10273     if (Device) {
10274       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10275                                            CGF.Int64Ty, /*isSigned=*/true);
10276     } else {
10277       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10278     }
10279 
10280     // Emit the number of elements in the offloading arrays.
10281     llvm::Constant *PointerNum =
10282         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10283 
10284     llvm::Value *OffloadingArgs[] = {DeviceID,
10285                                      PointerNum,
10286                                      InputInfo.BasePointersArray.getPointer(),
10287                                      InputInfo.PointersArray.getPointer(),
10288                                      InputInfo.SizesArray.getPointer(),
10289                                      MapTypesArray};
10290 
10291     // Select the right runtime function call for each expected standalone
10292     // directive.
10293     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10294     RuntimeFunction RTLFn;
10295     switch (D.getDirectiveKind()) {
10296     case OMPD_target_enter_data:
10297       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10298                         : OMPRTL___tgt_target_data_begin;
10299       break;
10300     case OMPD_target_exit_data:
10301       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10302                         : OMPRTL___tgt_target_data_end;
10303       break;
10304     case OMPD_target_update:
10305       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10306                         : OMPRTL___tgt_target_data_update;
10307       break;
10308     case OMPD_parallel:
10309     case OMPD_for:
10310     case OMPD_parallel_for:
10311     case OMPD_parallel_master:
10312     case OMPD_parallel_sections:
10313     case OMPD_for_simd:
10314     case OMPD_parallel_for_simd:
10315     case OMPD_cancel:
10316     case OMPD_cancellation_point:
10317     case OMPD_ordered:
10318     case OMPD_threadprivate:
10319     case OMPD_allocate:
10320     case OMPD_task:
10321     case OMPD_simd:
10322     case OMPD_sections:
10323     case OMPD_section:
10324     case OMPD_single:
10325     case OMPD_master:
10326     case OMPD_critical:
10327     case OMPD_taskyield:
10328     case OMPD_barrier:
10329     case OMPD_taskwait:
10330     case OMPD_taskgroup:
10331     case OMPD_atomic:
10332     case OMPD_flush:
10333     case OMPD_depobj:
10334     case OMPD_scan:
10335     case OMPD_teams:
10336     case OMPD_target_data:
10337     case OMPD_distribute:
10338     case OMPD_distribute_simd:
10339     case OMPD_distribute_parallel_for:
10340     case OMPD_distribute_parallel_for_simd:
10341     case OMPD_teams_distribute:
10342     case OMPD_teams_distribute_simd:
10343     case OMPD_teams_distribute_parallel_for:
10344     case OMPD_teams_distribute_parallel_for_simd:
10345     case OMPD_declare_simd:
10346     case OMPD_declare_variant:
10347     case OMPD_begin_declare_variant:
10348     case OMPD_end_declare_variant:
10349     case OMPD_declare_target:
10350     case OMPD_end_declare_target:
10351     case OMPD_declare_reduction:
10352     case OMPD_declare_mapper:
10353     case OMPD_taskloop:
10354     case OMPD_taskloop_simd:
10355     case OMPD_master_taskloop:
10356     case OMPD_master_taskloop_simd:
10357     case OMPD_parallel_master_taskloop:
10358     case OMPD_parallel_master_taskloop_simd:
10359     case OMPD_target:
10360     case OMPD_target_simd:
10361     case OMPD_target_teams_distribute:
10362     case OMPD_target_teams_distribute_simd:
10363     case OMPD_target_teams_distribute_parallel_for:
10364     case OMPD_target_teams_distribute_parallel_for_simd:
10365     case OMPD_target_teams:
10366     case OMPD_target_parallel:
10367     case OMPD_target_parallel_for:
10368     case OMPD_target_parallel_for_simd:
10369     case OMPD_requires:
10370     case OMPD_unknown:
10371     default:
10372       llvm_unreachable("Unexpected standalone target data directive.");
10373       break;
10374     }
10375     CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
10376                             CGM.getModule(), RTLFn),
10377                         OffloadingArgs);
10378   };
10379 
10380   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10381                              CodeGenFunction &CGF, PrePostActionTy &) {
10382     // Fill up the arrays with all the mapped variables.
10383     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10384     MappableExprsHandler::MapValuesArrayTy Pointers;
10385     MappableExprsHandler::MapValuesArrayTy Sizes;
10386     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10387 
10388     // Get map clause information.
10389     MappableExprsHandler MEHandler(D, CGF);
10390     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10391 
10392     TargetDataInfo Info;
10393     // Fill up the arrays and create the arguments.
10394     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10395     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10396                                  Info.PointersArray, Info.SizesArray,
10397                                  Info.MapTypesArray, Info);
10398     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10399     InputInfo.BasePointersArray =
10400         Address(Info.BasePointersArray, CGM.getPointerAlign());
10401     InputInfo.PointersArray =
10402         Address(Info.PointersArray, CGM.getPointerAlign());
10403     InputInfo.SizesArray =
10404         Address(Info.SizesArray, CGM.getPointerAlign());
10405     MapTypesArray = Info.MapTypesArray;
10406     if (D.hasClausesOfKind<OMPDependClause>())
10407       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10408     else
10409       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10410   };
10411 
10412   if (IfCond) {
10413     emitIfClause(CGF, IfCond, TargetThenGen,
10414                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10415   } else {
10416     RegionCodeGenTy ThenRCG(TargetThenGen);
10417     ThenRCG(CGF);
10418   }
10419 }
10420 
10421 namespace {
10422   /// Kind of parameter in a function with 'declare simd' directive.
10423   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10424   /// Attribute set of the parameter.
10425   struct ParamAttrTy {
10426     ParamKindTy Kind = Vector;
10427     llvm::APSInt StrideOrArg;
10428     llvm::APSInt Alignment;
10429   };
10430 } // namespace
10431 
10432 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10433                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10434   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10435   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10436   // of that clause. The VLEN value must be power of 2.
10437   // In other case the notion of the function`s "characteristic data type" (CDT)
10438   // is used to compute the vector length.
10439   // CDT is defined in the following order:
10440   //   a) For non-void function, the CDT is the return type.
10441   //   b) If the function has any non-uniform, non-linear parameters, then the
10442   //   CDT is the type of the first such parameter.
10443   //   c) If the CDT determined by a) or b) above is struct, union, or class
10444   //   type which is pass-by-value (except for the type that maps to the
10445   //   built-in complex data type), the characteristic data type is int.
10446   //   d) If none of the above three cases is applicable, the CDT is int.
10447   // The VLEN is then determined based on the CDT and the size of vector
10448   // register of that ISA for which current vector version is generated. The
10449   // VLEN is computed using the formula below:
10450   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10451   // where vector register size specified in section 3.2.1 Registers and the
10452   // Stack Frame of original AMD64 ABI document.
10453   QualType RetType = FD->getReturnType();
10454   if (RetType.isNull())
10455     return 0;
10456   ASTContext &C = FD->getASTContext();
10457   QualType CDT;
10458   if (!RetType.isNull() && !RetType->isVoidType()) {
10459     CDT = RetType;
10460   } else {
10461     unsigned Offset = 0;
10462     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10463       if (ParamAttrs[Offset].Kind == Vector)
10464         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10465       ++Offset;
10466     }
10467     if (CDT.isNull()) {
10468       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10469         if (ParamAttrs[I + Offset].Kind == Vector) {
10470           CDT = FD->getParamDecl(I)->getType();
10471           break;
10472         }
10473       }
10474     }
10475   }
10476   if (CDT.isNull())
10477     CDT = C.IntTy;
10478   CDT = CDT->getCanonicalTypeUnqualified();
10479   if (CDT->isRecordType() || CDT->isUnionType())
10480     CDT = C.IntTy;
10481   return C.getTypeSize(CDT);
10482 }
10483 
10484 static void
10485 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10486                            const llvm::APSInt &VLENVal,
10487                            ArrayRef<ParamAttrTy> ParamAttrs,
10488                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10489   struct ISADataTy {
10490     char ISA;
10491     unsigned VecRegSize;
10492   };
10493   ISADataTy ISAData[] = {
10494       {
10495           'b', 128
10496       }, // SSE
10497       {
10498           'c', 256
10499       }, // AVX
10500       {
10501           'd', 256
10502       }, // AVX2
10503       {
10504           'e', 512
10505       }, // AVX512
10506   };
10507   llvm::SmallVector<char, 2> Masked;
10508   switch (State) {
10509   case OMPDeclareSimdDeclAttr::BS_Undefined:
10510     Masked.push_back('N');
10511     Masked.push_back('M');
10512     break;
10513   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10514     Masked.push_back('N');
10515     break;
10516   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10517     Masked.push_back('M');
10518     break;
10519   }
10520   for (char Mask : Masked) {
10521     for (const ISADataTy &Data : ISAData) {
10522       SmallString<256> Buffer;
10523       llvm::raw_svector_ostream Out(Buffer);
10524       Out << "_ZGV" << Data.ISA << Mask;
10525       if (!VLENVal) {
10526         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10527         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10528         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10529       } else {
10530         Out << VLENVal;
10531       }
10532       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10533         switch (ParamAttr.Kind){
10534         case LinearWithVarStride:
10535           Out << 's' << ParamAttr.StrideOrArg;
10536           break;
10537         case Linear:
10538           Out << 'l';
10539           if (ParamAttr.StrideOrArg != 1)
10540             Out << ParamAttr.StrideOrArg;
10541           break;
10542         case Uniform:
10543           Out << 'u';
10544           break;
10545         case Vector:
10546           Out << 'v';
10547           break;
10548         }
10549         if (!!ParamAttr.Alignment)
10550           Out << 'a' << ParamAttr.Alignment;
10551       }
10552       Out << '_' << Fn->getName();
10553       Fn->addFnAttr(Out.str());
10554     }
10555   }
10556 }
10557 
10558 // This are the Functions that are needed to mangle the name of the
10559 // vector functions generated by the compiler, according to the rules
10560 // defined in the "Vector Function ABI specifications for AArch64",
10561 // available at
10562 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10563 
10564 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10565 ///
10566 /// TODO: Need to implement the behavior for reference marked with a
10567 /// var or no linear modifiers (1.b in the section). For this, we
10568 /// need to extend ParamKindTy to support the linear modifiers.
10569 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10570   QT = QT.getCanonicalType();
10571 
10572   if (QT->isVoidType())
10573     return false;
10574 
10575   if (Kind == ParamKindTy::Uniform)
10576     return false;
10577 
10578   if (Kind == ParamKindTy::Linear)
10579     return false;
10580 
10581   // TODO: Handle linear references with modifiers
10582 
10583   if (Kind == ParamKindTy::LinearWithVarStride)
10584     return false;
10585 
10586   return true;
10587 }
10588 
10589 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10590 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10591   QT = QT.getCanonicalType();
10592   unsigned Size = C.getTypeSize(QT);
10593 
10594   // Only scalars and complex within 16 bytes wide set PVB to true.
10595   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10596     return false;
10597 
10598   if (QT->isFloatingType())
10599     return true;
10600 
10601   if (QT->isIntegerType())
10602     return true;
10603 
10604   if (QT->isPointerType())
10605     return true;
10606 
10607   // TODO: Add support for complex types (section 3.1.2, item 2).
10608 
10609   return false;
10610 }
10611 
10612 /// Computes the lane size (LS) of a return type or of an input parameter,
10613 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10614 /// TODO: Add support for references, section 3.2.1, item 1.
10615 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10616   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10617     QualType PTy = QT.getCanonicalType()->getPointeeType();
10618     if (getAArch64PBV(PTy, C))
10619       return C.getTypeSize(PTy);
10620   }
10621   if (getAArch64PBV(QT, C))
10622     return C.getTypeSize(QT);
10623 
10624   return C.getTypeSize(C.getUIntPtrType());
10625 }
10626 
10627 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10628 // signature of the scalar function, as defined in 3.2.2 of the
10629 // AAVFABI.
10630 static std::tuple<unsigned, unsigned, bool>
10631 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10632   QualType RetType = FD->getReturnType().getCanonicalType();
10633 
10634   ASTContext &C = FD->getASTContext();
10635 
10636   bool OutputBecomesInput = false;
10637 
10638   llvm::SmallVector<unsigned, 8> Sizes;
10639   if (!RetType->isVoidType()) {
10640     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10641     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10642       OutputBecomesInput = true;
10643   }
10644   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10645     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10646     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10647   }
10648 
10649   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10650   // The LS of a function parameter / return value can only be a power
10651   // of 2, starting from 8 bits, up to 128.
10652   assert(std::all_of(Sizes.begin(), Sizes.end(),
10653                      [](unsigned Size) {
10654                        return Size == 8 || Size == 16 || Size == 32 ||
10655                               Size == 64 || Size == 128;
10656                      }) &&
10657          "Invalid size");
10658 
10659   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10660                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10661                          OutputBecomesInput);
10662 }
10663 
10664 /// Mangle the parameter part of the vector function name according to
10665 /// their OpenMP classification. The mangling function is defined in
10666 /// section 3.5 of the AAVFABI.
10667 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10668   SmallString<256> Buffer;
10669   llvm::raw_svector_ostream Out(Buffer);
10670   for (const auto &ParamAttr : ParamAttrs) {
10671     switch (ParamAttr.Kind) {
10672     case LinearWithVarStride:
10673       Out << "ls" << ParamAttr.StrideOrArg;
10674       break;
10675     case Linear:
10676       Out << 'l';
10677       // Don't print the step value if it is not present or if it is
10678       // equal to 1.
10679       if (ParamAttr.StrideOrArg != 1)
10680         Out << ParamAttr.StrideOrArg;
10681       break;
10682     case Uniform:
10683       Out << 'u';
10684       break;
10685     case Vector:
10686       Out << 'v';
10687       break;
10688     }
10689 
10690     if (!!ParamAttr.Alignment)
10691       Out << 'a' << ParamAttr.Alignment;
10692   }
10693 
10694   return std::string(Out.str());
10695 }
10696 
10697 // Function used to add the attribute. The parameter `VLEN` is
10698 // templated to allow the use of "x" when targeting scalable functions
10699 // for SVE.
10700 template <typename T>
10701 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10702                                  char ISA, StringRef ParSeq,
10703                                  StringRef MangledName, bool OutputBecomesInput,
10704                                  llvm::Function *Fn) {
10705   SmallString<256> Buffer;
10706   llvm::raw_svector_ostream Out(Buffer);
10707   Out << Prefix << ISA << LMask << VLEN;
10708   if (OutputBecomesInput)
10709     Out << "v";
10710   Out << ParSeq << "_" << MangledName;
10711   Fn->addFnAttr(Out.str());
10712 }
10713 
10714 // Helper function to generate the Advanced SIMD names depending on
10715 // the value of the NDS when simdlen is not present.
10716 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10717                                       StringRef Prefix, char ISA,
10718                                       StringRef ParSeq, StringRef MangledName,
10719                                       bool OutputBecomesInput,
10720                                       llvm::Function *Fn) {
10721   switch (NDS) {
10722   case 8:
10723     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10724                          OutputBecomesInput, Fn);
10725     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10726                          OutputBecomesInput, Fn);
10727     break;
10728   case 16:
10729     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10730                          OutputBecomesInput, Fn);
10731     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10732                          OutputBecomesInput, Fn);
10733     break;
10734   case 32:
10735     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10736                          OutputBecomesInput, Fn);
10737     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10738                          OutputBecomesInput, Fn);
10739     break;
10740   case 64:
10741   case 128:
10742     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10743                          OutputBecomesInput, Fn);
10744     break;
10745   default:
10746     llvm_unreachable("Scalar type is too wide.");
10747   }
10748 }
10749 
10750 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10751 static void emitAArch64DeclareSimdFunction(
10752     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10753     ArrayRef<ParamAttrTy> ParamAttrs,
10754     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10755     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10756 
10757   // Get basic data for building the vector signature.
10758   const auto Data = getNDSWDS(FD, ParamAttrs);
10759   const unsigned NDS = std::get<0>(Data);
10760   const unsigned WDS = std::get<1>(Data);
10761   const bool OutputBecomesInput = std::get<2>(Data);
10762 
10763   // Check the values provided via `simdlen` by the user.
10764   // 1. A `simdlen(1)` doesn't produce vector signatures,
10765   if (UserVLEN == 1) {
10766     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10767         DiagnosticsEngine::Warning,
10768         "The clause simdlen(1) has no effect when targeting aarch64.");
10769     CGM.getDiags().Report(SLoc, DiagID);
10770     return;
10771   }
10772 
10773   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10774   // Advanced SIMD output.
10775   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10776     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10777         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10778                                     "power of 2 when targeting Advanced SIMD.");
10779     CGM.getDiags().Report(SLoc, DiagID);
10780     return;
10781   }
10782 
10783   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10784   // limits.
10785   if (ISA == 's' && UserVLEN != 0) {
10786     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10787       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10788           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10789                                       "lanes in the architectural constraints "
10790                                       "for SVE (min is 128-bit, max is "
10791                                       "2048-bit, by steps of 128-bit)");
10792       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10793       return;
10794     }
10795   }
10796 
10797   // Sort out parameter sequence.
10798   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10799   StringRef Prefix = "_ZGV";
10800   // Generate simdlen from user input (if any).
10801   if (UserVLEN) {
10802     if (ISA == 's') {
10803       // SVE generates only a masked function.
10804       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10805                            OutputBecomesInput, Fn);
10806     } else {
10807       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10808       // Advanced SIMD generates one or two functions, depending on
10809       // the `[not]inbranch` clause.
10810       switch (State) {
10811       case OMPDeclareSimdDeclAttr::BS_Undefined:
10812         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10813                              OutputBecomesInput, Fn);
10814         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10815                              OutputBecomesInput, Fn);
10816         break;
10817       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10818         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10819                              OutputBecomesInput, Fn);
10820         break;
10821       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10822         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10823                              OutputBecomesInput, Fn);
10824         break;
10825       }
10826     }
10827   } else {
10828     // If no user simdlen is provided, follow the AAVFABI rules for
10829     // generating the vector length.
10830     if (ISA == 's') {
10831       // SVE, section 3.4.1, item 1.
10832       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10833                            OutputBecomesInput, Fn);
10834     } else {
10835       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10836       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10837       // two vector names depending on the use of the clause
10838       // `[not]inbranch`.
10839       switch (State) {
10840       case OMPDeclareSimdDeclAttr::BS_Undefined:
10841         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10842                                   OutputBecomesInput, Fn);
10843         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10844                                   OutputBecomesInput, Fn);
10845         break;
10846       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10847         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10848                                   OutputBecomesInput, Fn);
10849         break;
10850       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10851         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10852                                   OutputBecomesInput, Fn);
10853         break;
10854       }
10855     }
10856   }
10857 }
10858 
10859 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10860                                               llvm::Function *Fn) {
10861   ASTContext &C = CGM.getContext();
10862   FD = FD->getMostRecentDecl();
10863   // Map params to their positions in function decl.
10864   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10865   if (isa<CXXMethodDecl>(FD))
10866     ParamPositions.try_emplace(FD, 0);
10867   unsigned ParamPos = ParamPositions.size();
10868   for (const ParmVarDecl *P : FD->parameters()) {
10869     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10870     ++ParamPos;
10871   }
10872   while (FD) {
10873     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10874       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10875       // Mark uniform parameters.
10876       for (const Expr *E : Attr->uniforms()) {
10877         E = E->IgnoreParenImpCasts();
10878         unsigned Pos;
10879         if (isa<CXXThisExpr>(E)) {
10880           Pos = ParamPositions[FD];
10881         } else {
10882           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10883                                 ->getCanonicalDecl();
10884           Pos = ParamPositions[PVD];
10885         }
10886         ParamAttrs[Pos].Kind = Uniform;
10887       }
10888       // Get alignment info.
10889       auto NI = Attr->alignments_begin();
10890       for (const Expr *E : Attr->aligneds()) {
10891         E = E->IgnoreParenImpCasts();
10892         unsigned Pos;
10893         QualType ParmTy;
10894         if (isa<CXXThisExpr>(E)) {
10895           Pos = ParamPositions[FD];
10896           ParmTy = E->getType();
10897         } else {
10898           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10899                                 ->getCanonicalDecl();
10900           Pos = ParamPositions[PVD];
10901           ParmTy = PVD->getType();
10902         }
10903         ParamAttrs[Pos].Alignment =
10904             (*NI)
10905                 ? (*NI)->EvaluateKnownConstInt(C)
10906                 : llvm::APSInt::getUnsigned(
10907                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10908                           .getQuantity());
10909         ++NI;
10910       }
10911       // Mark linear parameters.
10912       auto SI = Attr->steps_begin();
10913       auto MI = Attr->modifiers_begin();
10914       for (const Expr *E : Attr->linears()) {
10915         E = E->IgnoreParenImpCasts();
10916         unsigned Pos;
10917         // Rescaling factor needed to compute the linear parameter
10918         // value in the mangled name.
10919         unsigned PtrRescalingFactor = 1;
10920         if (isa<CXXThisExpr>(E)) {
10921           Pos = ParamPositions[FD];
10922         } else {
10923           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10924                                 ->getCanonicalDecl();
10925           Pos = ParamPositions[PVD];
10926           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10927             PtrRescalingFactor = CGM.getContext()
10928                                      .getTypeSizeInChars(P->getPointeeType())
10929                                      .getQuantity();
10930         }
10931         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10932         ParamAttr.Kind = Linear;
10933         // Assuming a stride of 1, for `linear` without modifiers.
10934         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10935         if (*SI) {
10936           Expr::EvalResult Result;
10937           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10938             if (const auto *DRE =
10939                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10940               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10941                 ParamAttr.Kind = LinearWithVarStride;
10942                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10943                     ParamPositions[StridePVD->getCanonicalDecl()]);
10944               }
10945             }
10946           } else {
10947             ParamAttr.StrideOrArg = Result.Val.getInt();
10948           }
10949         }
10950         // If we are using a linear clause on a pointer, we need to
10951         // rescale the value of linear_step with the byte size of the
10952         // pointee type.
10953         if (Linear == ParamAttr.Kind)
10954           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10955         ++SI;
10956         ++MI;
10957       }
10958       llvm::APSInt VLENVal;
10959       SourceLocation ExprLoc;
10960       const Expr *VLENExpr = Attr->getSimdlen();
10961       if (VLENExpr) {
10962         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10963         ExprLoc = VLENExpr->getExprLoc();
10964       }
10965       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10966       if (CGM.getTriple().isX86()) {
10967         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10968       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10969         unsigned VLEN = VLENVal.getExtValue();
10970         StringRef MangledName = Fn->getName();
10971         if (CGM.getTarget().hasFeature("sve"))
10972           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10973                                          MangledName, 's', 128, Fn, ExprLoc);
10974         if (CGM.getTarget().hasFeature("neon"))
10975           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10976                                          MangledName, 'n', 128, Fn, ExprLoc);
10977       }
10978     }
10979     FD = FD->getPreviousDecl();
10980   }
10981 }
10982 
10983 namespace {
10984 /// Cleanup action for doacross support.
10985 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10986 public:
10987   static const int DoacrossFinArgs = 2;
10988 
10989 private:
10990   llvm::FunctionCallee RTLFn;
10991   llvm::Value *Args[DoacrossFinArgs];
10992 
10993 public:
10994   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10995                     ArrayRef<llvm::Value *> CallArgs)
10996       : RTLFn(RTLFn) {
10997     assert(CallArgs.size() == DoacrossFinArgs);
10998     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10999   }
11000   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11001     if (!CGF.HaveInsertPoint())
11002       return;
11003     CGF.EmitRuntimeCall(RTLFn, Args);
11004   }
11005 };
11006 } // namespace
11007 
11008 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11009                                        const OMPLoopDirective &D,
11010                                        ArrayRef<Expr *> NumIterations) {
11011   if (!CGF.HaveInsertPoint())
11012     return;
11013 
11014   ASTContext &C = CGM.getContext();
11015   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11016   RecordDecl *RD;
11017   if (KmpDimTy.isNull()) {
11018     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11019     //  kmp_int64 lo; // lower
11020     //  kmp_int64 up; // upper
11021     //  kmp_int64 st; // stride
11022     // };
11023     RD = C.buildImplicitRecord("kmp_dim");
11024     RD->startDefinition();
11025     addFieldToRecordDecl(C, RD, Int64Ty);
11026     addFieldToRecordDecl(C, RD, Int64Ty);
11027     addFieldToRecordDecl(C, RD, Int64Ty);
11028     RD->completeDefinition();
11029     KmpDimTy = C.getRecordType(RD);
11030   } else {
11031     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11032   }
11033   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11034   QualType ArrayTy =
11035       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11036 
11037   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11038   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11039   enum { LowerFD = 0, UpperFD, StrideFD };
11040   // Fill dims with data.
11041   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11042     LValue DimsLVal = CGF.MakeAddrLValue(
11043         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11044     // dims.upper = num_iterations;
11045     LValue UpperLVal = CGF.EmitLValueForField(
11046         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11047     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11048         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11049         Int64Ty, NumIterations[I]->getExprLoc());
11050     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11051     // dims.stride = 1;
11052     LValue StrideLVal = CGF.EmitLValueForField(
11053         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11054     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11055                           StrideLVal);
11056   }
11057 
11058   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11059   // kmp_int32 num_dims, struct kmp_dim * dims);
11060   llvm::Value *Args[] = {
11061       emitUpdateLocation(CGF, D.getBeginLoc()),
11062       getThreadID(CGF, D.getBeginLoc()),
11063       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11064       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11065           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11066           CGM.VoidPtrTy)};
11067 
11068   llvm::FunctionCallee RTLFn =
11069       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11070           CGM.getModule(), OMPRTL___kmpc_doacross_init);
11071   CGF.EmitRuntimeCall(RTLFn, Args);
11072   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11073       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11074   llvm::FunctionCallee FiniRTLFn =
11075       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11076           CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11077   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11078                                              llvm::makeArrayRef(FiniArgs));
11079 }
11080 
11081 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11082                                           const OMPDependClause *C) {
11083   QualType Int64Ty =
11084       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11085   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11086   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11087       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11088   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11089   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11090     const Expr *CounterVal = C->getLoopData(I);
11091     assert(CounterVal);
11092     llvm::Value *CntVal = CGF.EmitScalarConversion(
11093         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11094         CounterVal->getExprLoc());
11095     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11096                           /*Volatile=*/false, Int64Ty);
11097   }
11098   llvm::Value *Args[] = {
11099       emitUpdateLocation(CGF, C->getBeginLoc()),
11100       getThreadID(CGF, C->getBeginLoc()),
11101       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11102   llvm::FunctionCallee RTLFn;
11103   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11104     RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11105         CGM.getModule(), OMPRTL___kmpc_doacross_post);
11106   } else {
11107     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11108     RTLFn = llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11109         CGM.getModule(), OMPRTL___kmpc_doacross_wait);
11110   }
11111   CGF.EmitRuntimeCall(RTLFn, Args);
11112 }
11113 
11114 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11115                                llvm::FunctionCallee Callee,
11116                                ArrayRef<llvm::Value *> Args) const {
11117   assert(Loc.isValid() && "Outlined function call location must be valid.");
11118   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11119 
11120   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11121     if (Fn->doesNotThrow()) {
11122       CGF.EmitNounwindRuntimeCall(Fn, Args);
11123       return;
11124     }
11125   }
11126   CGF.EmitRuntimeCall(Callee, Args);
11127 }
11128 
11129 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11130     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11131     ArrayRef<llvm::Value *> Args) const {
11132   emitCall(CGF, Loc, OutlinedFn, Args);
11133 }
11134 
11135 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11136   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11137     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11138       HasEmittedDeclareTargetRegion = true;
11139 }
11140 
11141 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11142                                              const VarDecl *NativeParam,
11143                                              const VarDecl *TargetParam) const {
11144   return CGF.GetAddrOfLocalVar(NativeParam);
11145 }
11146 
11147 namespace {
11148 /// Cleanup action for allocate support.
11149 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11150 public:
11151   static const int CleanupArgs = 3;
11152 
11153 private:
11154   llvm::FunctionCallee RTLFn;
11155   llvm::Value *Args[CleanupArgs];
11156 
11157 public:
11158   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11159                        ArrayRef<llvm::Value *> CallArgs)
11160       : RTLFn(RTLFn) {
11161     assert(CallArgs.size() == CleanupArgs &&
11162            "Size of arguments does not match.");
11163     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11164   }
11165   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11166     if (!CGF.HaveInsertPoint())
11167       return;
11168     CGF.EmitRuntimeCall(RTLFn, Args);
11169   }
11170 };
11171 } // namespace
11172 
11173 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11174                                                    const VarDecl *VD) {
11175   if (!VD)
11176     return Address::invalid();
11177   const VarDecl *CVD = VD->getCanonicalDecl();
11178   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11179     return Address::invalid();
11180   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11181   // Use the default allocation.
11182   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11183        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11184       !AA->getAllocator())
11185     return Address::invalid();
11186   llvm::Value *Size;
11187   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11188   if (CVD->getType()->isVariablyModifiedType()) {
11189     Size = CGF.getTypeSize(CVD->getType());
11190     // Align the size: ((size + align - 1) / align) * align
11191     Size = CGF.Builder.CreateNUWAdd(
11192         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11193     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11194     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11195   } else {
11196     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11197     Size = CGM.getSize(Sz.alignTo(Align));
11198   }
11199   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11200   assert(AA->getAllocator() &&
11201          "Expected allocator expression for non-default allocator.");
11202   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11203   // According to the standard, the original allocator type is a enum (integer).
11204   // Convert to pointer type, if required.
11205   if (Allocator->getType()->isIntegerTy())
11206     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11207   else if (Allocator->getType()->isPointerTy())
11208     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11209                                                                 CGM.VoidPtrTy);
11210   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11211 
11212   llvm::Value *Addr =
11213       CGF.EmitRuntimeCall(llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(
11214                               CGM.getModule(), OMPRTL___kmpc_alloc),
11215                           Args, getName({CVD->getName(), ".void.addr"}));
11216   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11217                                                               Allocator};
11218   llvm::FunctionCallee FiniRTLFn =
11219       llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction(CGM.getModule(),
11220                                                         OMPRTL___kmpc_free);
11221 
11222   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11223                                                 llvm::makeArrayRef(FiniArgs));
11224   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11225       Addr,
11226       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11227       getName({CVD->getName(), ".addr"}));
11228   return Address(Addr, Align);
11229 }
11230 
11231 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11232     CodeGenModule &CGM, const OMPLoopDirective &S)
11233     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11234   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11235   if (!NeedToPush)
11236     return;
11237   NontemporalDeclsSet &DS =
11238       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11239   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11240     for (const Stmt *Ref : C->private_refs()) {
11241       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11242       const ValueDecl *VD;
11243       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11244         VD = DRE->getDecl();
11245       } else {
11246         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11247         assert((ME->isImplicitCXXThis() ||
11248                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11249                "Expected member of current class.");
11250         VD = ME->getMemberDecl();
11251       }
11252       DS.insert(VD);
11253     }
11254   }
11255 }
11256 
11257 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11258   if (!NeedToPush)
11259     return;
11260   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11261 }
11262 
11263 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11264   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11265 
11266   return llvm::any_of(
11267       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11268       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11269 }
11270 
11271 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11272     const OMPExecutableDirective &S,
11273     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11274     const {
11275   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11276   // Vars in target/task regions must be excluded completely.
11277   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11278       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11279     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11280     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11281     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11282     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11283       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11284         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11285     }
11286   }
11287   // Exclude vars in private clauses.
11288   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11289     for (const Expr *Ref : C->varlists()) {
11290       if (!Ref->getType()->isScalarType())
11291         continue;
11292       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11293       if (!DRE)
11294         continue;
11295       NeedToCheckForLPCs.insert(DRE->getDecl());
11296     }
11297   }
11298   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11299     for (const Expr *Ref : C->varlists()) {
11300       if (!Ref->getType()->isScalarType())
11301         continue;
11302       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11303       if (!DRE)
11304         continue;
11305       NeedToCheckForLPCs.insert(DRE->getDecl());
11306     }
11307   }
11308   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11309     for (const Expr *Ref : C->varlists()) {
11310       if (!Ref->getType()->isScalarType())
11311         continue;
11312       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11313       if (!DRE)
11314         continue;
11315       NeedToCheckForLPCs.insert(DRE->getDecl());
11316     }
11317   }
11318   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11319     for (const Expr *Ref : C->varlists()) {
11320       if (!Ref->getType()->isScalarType())
11321         continue;
11322       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11323       if (!DRE)
11324         continue;
11325       NeedToCheckForLPCs.insert(DRE->getDecl());
11326     }
11327   }
11328   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11329     for (const Expr *Ref : C->varlists()) {
11330       if (!Ref->getType()->isScalarType())
11331         continue;
11332       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11333       if (!DRE)
11334         continue;
11335       NeedToCheckForLPCs.insert(DRE->getDecl());
11336     }
11337   }
11338   for (const Decl *VD : NeedToCheckForLPCs) {
11339     for (const LastprivateConditionalData &Data :
11340          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11341       if (Data.DeclToUniqueName.count(VD) > 0) {
11342         if (!Data.Disabled)
11343           NeedToAddForLPCsAsDisabled.insert(VD);
11344         break;
11345       }
11346     }
11347   }
11348 }
11349 
11350 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11351     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11352     : CGM(CGF.CGM),
11353       Action((CGM.getLangOpts().OpenMP >= 50 &&
11354               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11355                            [](const OMPLastprivateClause *C) {
11356                              return C->getKind() ==
11357                                     OMPC_LASTPRIVATE_conditional;
11358                            }))
11359                  ? ActionToDo::PushAsLastprivateConditional
11360                  : ActionToDo::DoNotPush) {
11361   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11362   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11363     return;
11364   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11365          "Expected a push action.");
11366   LastprivateConditionalData &Data =
11367       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11368   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11369     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11370       continue;
11371 
11372     for (const Expr *Ref : C->varlists()) {
11373       Data.DeclToUniqueName.insert(std::make_pair(
11374           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11375           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11376     }
11377   }
11378   Data.IVLVal = IVLVal;
11379   Data.Fn = CGF.CurFn;
11380 }
11381 
11382 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11383     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11384     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11385   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11386   if (CGM.getLangOpts().OpenMP < 50)
11387     return;
11388   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11389   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11390   if (!NeedToAddForLPCsAsDisabled.empty()) {
11391     Action = ActionToDo::DisableLastprivateConditional;
11392     LastprivateConditionalData &Data =
11393         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11394     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11395       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11396     Data.Fn = CGF.CurFn;
11397     Data.Disabled = true;
11398   }
11399 }
11400 
11401 CGOpenMPRuntime::LastprivateConditionalRAII
11402 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11403     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11404   return LastprivateConditionalRAII(CGF, S);
11405 }
11406 
11407 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11408   if (CGM.getLangOpts().OpenMP < 50)
11409     return;
11410   if (Action == ActionToDo::DisableLastprivateConditional) {
11411     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11412            "Expected list of disabled private vars.");
11413     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11414   }
11415   if (Action == ActionToDo::PushAsLastprivateConditional) {
11416     assert(
11417         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11418         "Expected list of lastprivate conditional vars.");
11419     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11420   }
11421 }
11422 
11423 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11424                                                         const VarDecl *VD) {
11425   ASTContext &C = CGM.getContext();
11426   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11427   if (I == LastprivateConditionalToTypes.end())
11428     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11429   QualType NewType;
11430   const FieldDecl *VDField;
11431   const FieldDecl *FiredField;
11432   LValue BaseLVal;
11433   auto VI = I->getSecond().find(VD);
11434   if (VI == I->getSecond().end()) {
11435     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11436     RD->startDefinition();
11437     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11438     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11439     RD->completeDefinition();
11440     NewType = C.getRecordType(RD);
11441     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11442     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11443     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11444   } else {
11445     NewType = std::get<0>(VI->getSecond());
11446     VDField = std::get<1>(VI->getSecond());
11447     FiredField = std::get<2>(VI->getSecond());
11448     BaseLVal = std::get<3>(VI->getSecond());
11449   }
11450   LValue FiredLVal =
11451       CGF.EmitLValueForField(BaseLVal, FiredField);
11452   CGF.EmitStoreOfScalar(
11453       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11454       FiredLVal);
11455   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11456 }
11457 
11458 namespace {
11459 /// Checks if the lastprivate conditional variable is referenced in LHS.
11460 class LastprivateConditionalRefChecker final
11461     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11462   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11463   const Expr *FoundE = nullptr;
11464   const Decl *FoundD = nullptr;
11465   StringRef UniqueDeclName;
11466   LValue IVLVal;
11467   llvm::Function *FoundFn = nullptr;
11468   SourceLocation Loc;
11469 
11470 public:
11471   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11472     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11473          llvm::reverse(LPM)) {
11474       auto It = D.DeclToUniqueName.find(E->getDecl());
11475       if (It == D.DeclToUniqueName.end())
11476         continue;
11477       if (D.Disabled)
11478         return false;
11479       FoundE = E;
11480       FoundD = E->getDecl()->getCanonicalDecl();
11481       UniqueDeclName = It->second;
11482       IVLVal = D.IVLVal;
11483       FoundFn = D.Fn;
11484       break;
11485     }
11486     return FoundE == E;
11487   }
11488   bool VisitMemberExpr(const MemberExpr *E) {
11489     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11490       return false;
11491     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11492          llvm::reverse(LPM)) {
11493       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11494       if (It == D.DeclToUniqueName.end())
11495         continue;
11496       if (D.Disabled)
11497         return false;
11498       FoundE = E;
11499       FoundD = E->getMemberDecl()->getCanonicalDecl();
11500       UniqueDeclName = It->second;
11501       IVLVal = D.IVLVal;
11502       FoundFn = D.Fn;
11503       break;
11504     }
11505     return FoundE == E;
11506   }
11507   bool VisitStmt(const Stmt *S) {
11508     for (const Stmt *Child : S->children()) {
11509       if (!Child)
11510         continue;
11511       if (const auto *E = dyn_cast<Expr>(Child))
11512         if (!E->isGLValue())
11513           continue;
11514       if (Visit(Child))
11515         return true;
11516     }
11517     return false;
11518   }
11519   explicit LastprivateConditionalRefChecker(
11520       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11521       : LPM(LPM) {}
11522   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11523   getFoundData() const {
11524     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11525   }
11526 };
11527 } // namespace
11528 
11529 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11530                                                        LValue IVLVal,
11531                                                        StringRef UniqueDeclName,
11532                                                        LValue LVal,
11533                                                        SourceLocation Loc) {
11534   // Last updated loop counter for the lastprivate conditional var.
11535   // int<xx> last_iv = 0;
11536   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11537   llvm::Constant *LastIV =
11538       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11539   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11540       IVLVal.getAlignment().getAsAlign());
11541   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11542 
11543   // Last value of the lastprivate conditional.
11544   // decltype(priv_a) last_a;
11545   llvm::Constant *Last = getOrCreateInternalVariable(
11546       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11547   cast<llvm::GlobalVariable>(Last)->setAlignment(
11548       LVal.getAlignment().getAsAlign());
11549   LValue LastLVal =
11550       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11551 
11552   // Global loop counter. Required to handle inner parallel-for regions.
11553   // iv
11554   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11555 
11556   // #pragma omp critical(a)
11557   // if (last_iv <= iv) {
11558   //   last_iv = iv;
11559   //   last_a = priv_a;
11560   // }
11561   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11562                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11563     Action.Enter(CGF);
11564     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11565     // (last_iv <= iv) ? Check if the variable is updated and store new
11566     // value in global var.
11567     llvm::Value *CmpRes;
11568     if (IVLVal.getType()->isSignedIntegerType()) {
11569       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11570     } else {
11571       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11572              "Loop iteration variable must be integer.");
11573       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11574     }
11575     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11576     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11577     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11578     // {
11579     CGF.EmitBlock(ThenBB);
11580 
11581     //   last_iv = iv;
11582     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11583 
11584     //   last_a = priv_a;
11585     switch (CGF.getEvaluationKind(LVal.getType())) {
11586     case TEK_Scalar: {
11587       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11588       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11589       break;
11590     }
11591     case TEK_Complex: {
11592       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11593       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11594       break;
11595     }
11596     case TEK_Aggregate:
11597       llvm_unreachable(
11598           "Aggregates are not supported in lastprivate conditional.");
11599     }
11600     // }
11601     CGF.EmitBranch(ExitBB);
11602     // There is no need to emit line number for unconditional branch.
11603     (void)ApplyDebugLocation::CreateEmpty(CGF);
11604     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11605   };
11606 
11607   if (CGM.getLangOpts().OpenMPSimd) {
11608     // Do not emit as a critical region as no parallel region could be emitted.
11609     RegionCodeGenTy ThenRCG(CodeGen);
11610     ThenRCG(CGF);
11611   } else {
11612     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11613   }
11614 }
11615 
11616 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11617                                                          const Expr *LHS) {
11618   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11619     return;
11620   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11621   if (!Checker.Visit(LHS))
11622     return;
11623   const Expr *FoundE;
11624   const Decl *FoundD;
11625   StringRef UniqueDeclName;
11626   LValue IVLVal;
11627   llvm::Function *FoundFn;
11628   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11629       Checker.getFoundData();
11630   if (FoundFn != CGF.CurFn) {
11631     // Special codegen for inner parallel regions.
11632     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11633     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11634     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11635            "Lastprivate conditional is not found in outer region.");
11636     QualType StructTy = std::get<0>(It->getSecond());
11637     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11638     LValue PrivLVal = CGF.EmitLValue(FoundE);
11639     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11640         PrivLVal.getAddress(CGF),
11641         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11642     LValue BaseLVal =
11643         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11644     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11645     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11646                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11647                         FiredLVal, llvm::AtomicOrdering::Unordered,
11648                         /*IsVolatile=*/true, /*isInit=*/false);
11649     return;
11650   }
11651 
11652   // Private address of the lastprivate conditional in the current context.
11653   // priv_a
11654   LValue LVal = CGF.EmitLValue(FoundE);
11655   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11656                                    FoundE->getExprLoc());
11657 }
11658 
11659 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11660     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11661     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11662   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11663     return;
11664   auto Range = llvm::reverse(LastprivateConditionalStack);
11665   auto It = llvm::find_if(
11666       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11667   if (It == Range.end() || It->Fn != CGF.CurFn)
11668     return;
11669   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11670   assert(LPCI != LastprivateConditionalToTypes.end() &&
11671          "Lastprivates must be registered already.");
11672   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11673   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11674   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11675   for (const auto &Pair : It->DeclToUniqueName) {
11676     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11677     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11678       continue;
11679     auto I = LPCI->getSecond().find(Pair.first);
11680     assert(I != LPCI->getSecond().end() &&
11681            "Lastprivate must be rehistered already.");
11682     // bool Cmp = priv_a.Fired != 0;
11683     LValue BaseLVal = std::get<3>(I->getSecond());
11684     LValue FiredLVal =
11685         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11686     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11687     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11688     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11689     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11690     // if (Cmp) {
11691     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11692     CGF.EmitBlock(ThenBB);
11693     Address Addr = CGF.GetAddrOfLocalVar(VD);
11694     LValue LVal;
11695     if (VD->getType()->isReferenceType())
11696       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11697                                            AlignmentSource::Decl);
11698     else
11699       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11700                                 AlignmentSource::Decl);
11701     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11702                                      D.getBeginLoc());
11703     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11704     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11705     // }
11706   }
11707 }
11708 
11709 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11710     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11711     SourceLocation Loc) {
11712   if (CGF.getLangOpts().OpenMP < 50)
11713     return;
11714   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11715   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11716          "Unknown lastprivate conditional variable.");
11717   StringRef UniqueName = It->second;
11718   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11719   // The variable was not updated in the region - exit.
11720   if (!GV)
11721     return;
11722   LValue LPLVal = CGF.MakeAddrLValue(
11723       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11724   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11725   CGF.EmitStoreOfScalar(Res, PrivLVal);
11726 }
11727 
11728 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11729     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11730     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11731   llvm_unreachable("Not supported in SIMD-only mode");
11732 }
11733 
11734 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11735     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11736     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11737   llvm_unreachable("Not supported in SIMD-only mode");
11738 }
11739 
11740 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11741     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11742     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11743     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11744     bool Tied, unsigned &NumberOfParts) {
11745   llvm_unreachable("Not supported in SIMD-only mode");
11746 }
11747 
11748 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11749                                            SourceLocation Loc,
11750                                            llvm::Function *OutlinedFn,
11751                                            ArrayRef<llvm::Value *> CapturedVars,
11752                                            const Expr *IfCond) {
11753   llvm_unreachable("Not supported in SIMD-only mode");
11754 }
11755 
11756 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11757     CodeGenFunction &CGF, StringRef CriticalName,
11758     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11759     const Expr *Hint) {
11760   llvm_unreachable("Not supported in SIMD-only mode");
11761 }
11762 
11763 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11764                                            const RegionCodeGenTy &MasterOpGen,
11765                                            SourceLocation Loc) {
11766   llvm_unreachable("Not supported in SIMD-only mode");
11767 }
11768 
11769 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11770                                             SourceLocation Loc) {
11771   llvm_unreachable("Not supported in SIMD-only mode");
11772 }
11773 
11774 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11775     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11776     SourceLocation Loc) {
11777   llvm_unreachable("Not supported in SIMD-only mode");
11778 }
11779 
11780 void CGOpenMPSIMDRuntime::emitSingleRegion(
11781     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11782     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11783     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11784     ArrayRef<const Expr *> AssignmentOps) {
11785   llvm_unreachable("Not supported in SIMD-only mode");
11786 }
11787 
11788 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11789                                             const RegionCodeGenTy &OrderedOpGen,
11790                                             SourceLocation Loc,
11791                                             bool IsThreads) {
11792   llvm_unreachable("Not supported in SIMD-only mode");
11793 }
11794 
11795 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11796                                           SourceLocation Loc,
11797                                           OpenMPDirectiveKind Kind,
11798                                           bool EmitChecks,
11799                                           bool ForceSimpleCall) {
11800   llvm_unreachable("Not supported in SIMD-only mode");
11801 }
11802 
11803 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11804     CodeGenFunction &CGF, SourceLocation Loc,
11805     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11806     bool Ordered, const DispatchRTInput &DispatchValues) {
11807   llvm_unreachable("Not supported in SIMD-only mode");
11808 }
11809 
11810 void CGOpenMPSIMDRuntime::emitForStaticInit(
11811     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11812     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11813   llvm_unreachable("Not supported in SIMD-only mode");
11814 }
11815 
11816 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11817     CodeGenFunction &CGF, SourceLocation Loc,
11818     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11819   llvm_unreachable("Not supported in SIMD-only mode");
11820 }
11821 
11822 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11823                                                      SourceLocation Loc,
11824                                                      unsigned IVSize,
11825                                                      bool IVSigned) {
11826   llvm_unreachable("Not supported in SIMD-only mode");
11827 }
11828 
11829 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11830                                               SourceLocation Loc,
11831                                               OpenMPDirectiveKind DKind) {
11832   llvm_unreachable("Not supported in SIMD-only mode");
11833 }
11834 
11835 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11836                                               SourceLocation Loc,
11837                                               unsigned IVSize, bool IVSigned,
11838                                               Address IL, Address LB,
11839                                               Address UB, Address ST) {
11840   llvm_unreachable("Not supported in SIMD-only mode");
11841 }
11842 
11843 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11844                                                llvm::Value *NumThreads,
11845                                                SourceLocation Loc) {
11846   llvm_unreachable("Not supported in SIMD-only mode");
11847 }
11848 
11849 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11850                                              ProcBindKind ProcBind,
11851                                              SourceLocation Loc) {
11852   llvm_unreachable("Not supported in SIMD-only mode");
11853 }
11854 
11855 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11856                                                     const VarDecl *VD,
11857                                                     Address VDAddr,
11858                                                     SourceLocation Loc) {
11859   llvm_unreachable("Not supported in SIMD-only mode");
11860 }
11861 
11862 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11863     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11864     CodeGenFunction *CGF) {
11865   llvm_unreachable("Not supported in SIMD-only mode");
11866 }
11867 
11868 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11869     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11870   llvm_unreachable("Not supported in SIMD-only mode");
11871 }
11872 
11873 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11874                                     ArrayRef<const Expr *> Vars,
11875                                     SourceLocation Loc,
11876                                     llvm::AtomicOrdering AO) {
11877   llvm_unreachable("Not supported in SIMD-only mode");
11878 }
11879 
11880 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11881                                        const OMPExecutableDirective &D,
11882                                        llvm::Function *TaskFunction,
11883                                        QualType SharedsTy, Address Shareds,
11884                                        const Expr *IfCond,
11885                                        const OMPTaskDataTy &Data) {
11886   llvm_unreachable("Not supported in SIMD-only mode");
11887 }
11888 
11889 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11890     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11891     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11892     const Expr *IfCond, const OMPTaskDataTy &Data) {
11893   llvm_unreachable("Not supported in SIMD-only mode");
11894 }
11895 
11896 void CGOpenMPSIMDRuntime::emitReduction(
11897     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11898     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11899     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11900   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11901   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11902                                  ReductionOps, Options);
11903 }
11904 
11905 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11906     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11907     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11908   llvm_unreachable("Not supported in SIMD-only mode");
11909 }
11910 
11911 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11912                                                 SourceLocation Loc,
11913                                                 bool IsWorksharingReduction) {
11914   llvm_unreachable("Not supported in SIMD-only mode");
11915 }
11916 
11917 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11918                                                   SourceLocation Loc,
11919                                                   ReductionCodeGen &RCG,
11920                                                   unsigned N) {
11921   llvm_unreachable("Not supported in SIMD-only mode");
11922 }
11923 
11924 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11925                                                   SourceLocation Loc,
11926                                                   llvm::Value *ReductionsPtr,
11927                                                   LValue SharedLVal) {
11928   llvm_unreachable("Not supported in SIMD-only mode");
11929 }
11930 
11931 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11932                                            SourceLocation Loc) {
11933   llvm_unreachable("Not supported in SIMD-only mode");
11934 }
11935 
11936 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11937     CodeGenFunction &CGF, SourceLocation Loc,
11938     OpenMPDirectiveKind CancelRegion) {
11939   llvm_unreachable("Not supported in SIMD-only mode");
11940 }
11941 
11942 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11943                                          SourceLocation Loc, const Expr *IfCond,
11944                                          OpenMPDirectiveKind CancelRegion) {
11945   llvm_unreachable("Not supported in SIMD-only mode");
11946 }
11947 
11948 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11949     const OMPExecutableDirective &D, StringRef ParentName,
11950     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11951     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11952   llvm_unreachable("Not supported in SIMD-only mode");
11953 }
11954 
11955 void CGOpenMPSIMDRuntime::emitTargetCall(
11956     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11957     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11958     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11959     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11960                                      const OMPLoopDirective &D)>
11961         SizeEmitter) {
11962   llvm_unreachable("Not supported in SIMD-only mode");
11963 }
11964 
11965 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11966   llvm_unreachable("Not supported in SIMD-only mode");
11967 }
11968 
11969 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11970   llvm_unreachable("Not supported in SIMD-only mode");
11971 }
11972 
11973 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11974   return false;
11975 }
11976 
11977 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11978                                         const OMPExecutableDirective &D,
11979                                         SourceLocation Loc,
11980                                         llvm::Function *OutlinedFn,
11981                                         ArrayRef<llvm::Value *> CapturedVars) {
11982   llvm_unreachable("Not supported in SIMD-only mode");
11983 }
11984 
11985 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11986                                              const Expr *NumTeams,
11987                                              const Expr *ThreadLimit,
11988                                              SourceLocation Loc) {
11989   llvm_unreachable("Not supported in SIMD-only mode");
11990 }
11991 
11992 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11993     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11994     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11995   llvm_unreachable("Not supported in SIMD-only mode");
11996 }
11997 
11998 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11999     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12000     const Expr *Device) {
12001   llvm_unreachable("Not supported in SIMD-only mode");
12002 }
12003 
12004 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12005                                            const OMPLoopDirective &D,
12006                                            ArrayRef<Expr *> NumIterations) {
12007   llvm_unreachable("Not supported in SIMD-only mode");
12008 }
12009 
12010 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12011                                               const OMPDependClause *C) {
12012   llvm_unreachable("Not supported in SIMD-only mode");
12013 }
12014 
12015 const VarDecl *
12016 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12017                                         const VarDecl *NativeParam) const {
12018   llvm_unreachable("Not supported in SIMD-only mode");
12019 }
12020 
12021 Address
12022 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12023                                          const VarDecl *NativeParam,
12024                                          const VarDecl *TargetParam) const {
12025   llvm_unreachable("Not supported in SIMD-only mode");
12026 }
12027