1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 
42 using namespace clang;
43 using namespace CodeGen;
44 using namespace llvm::omp;
45 
46 namespace {
47 /// Base class for handling code generation inside OpenMP regions.
48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
49 public:
50   /// Kinds of OpenMP regions used in codegen.
51   enum CGOpenMPRegionKind {
52     /// Region with outlined function for standalone 'parallel'
53     /// directive.
54     ParallelOutlinedRegion,
55     /// Region with outlined function for standalone 'task' directive.
56     TaskOutlinedRegion,
57     /// Region for constructs that do not require function outlining,
58     /// like 'for', 'sections', 'atomic' etc. directives.
59     InlinedRegion,
60     /// Region with outlined function for standalone 'target' directive.
61     TargetRegion,
62   };
63 
64   CGOpenMPRegionInfo(const CapturedStmt &CS,
65                      const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
69         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
70 
71   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
72                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                      bool HasCancel)
74       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
75         Kind(Kind), HasCancel(HasCancel) {}
76 
77   /// Get a variable or parameter for storing global thread id
78   /// inside OpenMP construct.
79   virtual const VarDecl *getThreadIDVariable() const = 0;
80 
81   /// Emit the captured statement body.
82   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
83 
84   /// Get an LValue for the current ThreadID variable.
85   /// \return LValue for thread id variable. This LValue always has type int32*.
86   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
87 
88   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
89 
90   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
91 
92   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
93 
94   bool hasCancel() const { return HasCancel; }
95 
96   static bool classof(const CGCapturedStmtInfo *Info) {
97     return Info->getKind() == CR_OpenMP;
98   }
99 
100   ~CGOpenMPRegionInfo() override = default;
101 
102 protected:
103   CGOpenMPRegionKind RegionKind;
104   RegionCodeGenTy CodeGen;
105   OpenMPDirectiveKind Kind;
106   bool HasCancel;
107 };
108 
109 /// API for captured statement code generation in OpenMP constructs.
110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
111 public:
112   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
113                              const RegionCodeGenTy &CodeGen,
114                              OpenMPDirectiveKind Kind, bool HasCancel,
115                              StringRef HelperName)
116       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
117                            HasCancel),
118         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
119     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
120   }
121 
122   /// Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125 
126   /// Get the name of the capture helper.
127   StringRef getHelperName() const override { return HelperName; }
128 
129   static bool classof(const CGCapturedStmtInfo *Info) {
130     return CGOpenMPRegionInfo::classof(Info) &&
131            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
132                ParallelOutlinedRegion;
133   }
134 
135 private:
136   /// A variable or parameter storing global thread id for OpenMP
137   /// constructs.
138   const VarDecl *ThreadIDVar;
139   StringRef HelperName;
140 };
141 
142 /// API for captured statement code generation in OpenMP constructs.
143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
144 public:
145   class UntiedTaskActionTy final : public PrePostActionTy {
146     bool Untied;
147     const VarDecl *PartIDVar;
148     const RegionCodeGenTy UntiedCodeGen;
149     llvm::SwitchInst *UntiedSwitch = nullptr;
150 
151   public:
152     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
153                        const RegionCodeGenTy &UntiedCodeGen)
154         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
155     void Enter(CodeGenFunction &CGF) override {
156       if (Untied) {
157         // Emit task switching point.
158         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
159             CGF.GetAddrOfLocalVar(PartIDVar),
160             PartIDVar->getType()->castAs<PointerType>());
161         llvm::Value *Res =
162             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
163         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
164         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
165         CGF.EmitBlock(DoneBB);
166         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
167         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
168         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
169                               CGF.Builder.GetInsertBlock());
170         emitUntiedSwitch(CGF);
171       }
172     }
173     void emitUntiedSwitch(CodeGenFunction &CGF) const {
174       if (Untied) {
175         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
176             CGF.GetAddrOfLocalVar(PartIDVar),
177             PartIDVar->getType()->castAs<PointerType>());
178         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
179                               PartIdLVal);
180         UntiedCodeGen(CGF);
181         CodeGenFunction::JumpDest CurPoint =
182             CGF.getJumpDestInCurrentScope(".untied.next.");
183         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
184         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
185         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
186                               CGF.Builder.GetInsertBlock());
187         CGF.EmitBranchThroughCleanup(CurPoint);
188         CGF.EmitBlock(CurPoint.getBlock());
189       }
190     }
191     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
192   };
193   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
194                                  const VarDecl *ThreadIDVar,
195                                  const RegionCodeGenTy &CodeGen,
196                                  OpenMPDirectiveKind Kind, bool HasCancel,
197                                  const UntiedTaskActionTy &Action)
198       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
199         ThreadIDVar(ThreadIDVar), Action(Action) {
200     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
201   }
202 
203   /// Get a variable or parameter for storing global thread id
204   /// inside OpenMP construct.
205   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
206 
207   /// Get an LValue for the current ThreadID variable.
208   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
209 
210   /// Get the name of the capture helper.
211   StringRef getHelperName() const override { return ".omp_outlined."; }
212 
213   void emitUntiedSwitch(CodeGenFunction &CGF) override {
214     Action.emitUntiedSwitch(CGF);
215   }
216 
217   static bool classof(const CGCapturedStmtInfo *Info) {
218     return CGOpenMPRegionInfo::classof(Info) &&
219            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
220                TaskOutlinedRegion;
221   }
222 
223 private:
224   /// A variable or parameter storing global thread id for OpenMP
225   /// constructs.
226   const VarDecl *ThreadIDVar;
227   /// Action for emitting code for untied tasks.
228   const UntiedTaskActionTy &Action;
229 };
230 
231 /// API for inlined captured statement code generation in OpenMP
232 /// constructs.
233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
234 public:
235   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
236                             const RegionCodeGenTy &CodeGen,
237                             OpenMPDirectiveKind Kind, bool HasCancel)
238       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
239         OldCSI(OldCSI),
240         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
241 
242   // Retrieve the value of the context parameter.
243   llvm::Value *getContextValue() const override {
244     if (OuterRegionInfo)
245       return OuterRegionInfo->getContextValue();
246     llvm_unreachable("No context value for inlined OpenMP region");
247   }
248 
249   void setContextValue(llvm::Value *V) override {
250     if (OuterRegionInfo) {
251       OuterRegionInfo->setContextValue(V);
252       return;
253     }
254     llvm_unreachable("No context value for inlined OpenMP region");
255   }
256 
257   /// Lookup the captured field decl for a variable.
258   const FieldDecl *lookup(const VarDecl *VD) const override {
259     if (OuterRegionInfo)
260       return OuterRegionInfo->lookup(VD);
261     // If there is no outer outlined region,no need to lookup in a list of
262     // captured variables, we can use the original one.
263     return nullptr;
264   }
265 
266   FieldDecl *getThisFieldDecl() const override {
267     if (OuterRegionInfo)
268       return OuterRegionInfo->getThisFieldDecl();
269     return nullptr;
270   }
271 
272   /// Get a variable or parameter for storing global thread id
273   /// inside OpenMP construct.
274   const VarDecl *getThreadIDVariable() const override {
275     if (OuterRegionInfo)
276       return OuterRegionInfo->getThreadIDVariable();
277     return nullptr;
278   }
279 
280   /// Get an LValue for the current ThreadID variable.
281   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
282     if (OuterRegionInfo)
283       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
284     llvm_unreachable("No LValue for inlined OpenMP construct");
285   }
286 
287   /// Get the name of the capture helper.
288   StringRef getHelperName() const override {
289     if (auto *OuterRegionInfo = getOldCSI())
290       return OuterRegionInfo->getHelperName();
291     llvm_unreachable("No helper name for inlined OpenMP construct");
292   }
293 
294   void emitUntiedSwitch(CodeGenFunction &CGF) override {
295     if (OuterRegionInfo)
296       OuterRegionInfo->emitUntiedSwitch(CGF);
297   }
298 
299   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
300 
301   static bool classof(const CGCapturedStmtInfo *Info) {
302     return CGOpenMPRegionInfo::classof(Info) &&
303            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
304   }
305 
306   ~CGOpenMPInlinedRegionInfo() override = default;
307 
308 private:
309   /// CodeGen info about outer OpenMP region.
310   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
311   CGOpenMPRegionInfo *OuterRegionInfo;
312 };
313 
314 /// API for captured statement code generation in OpenMP target
315 /// constructs. For this captures, implicit parameters are used instead of the
316 /// captured fields. The name of the target region has to be unique in a given
317 /// application so it is provided by the client, because only the client has
318 /// the information to generate that.
319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
320 public:
321   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
322                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
323       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
324                            /*HasCancel=*/false),
325         HelperName(HelperName) {}
326 
327   /// This is unused for target regions because each starts executing
328   /// with a single thread.
329   const VarDecl *getThreadIDVariable() const override { return nullptr; }
330 
331   /// Get the name of the capture helper.
332   StringRef getHelperName() const override { return HelperName; }
333 
334   static bool classof(const CGCapturedStmtInfo *Info) {
335     return CGOpenMPRegionInfo::classof(Info) &&
336            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
337   }
338 
339 private:
340   StringRef HelperName;
341 };
342 
343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
344   llvm_unreachable("No codegen for expressions");
345 }
346 /// API for generation of expressions captured in a innermost OpenMP
347 /// region.
348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
349 public:
350   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
351       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
352                                   OMPD_unknown,
353                                   /*HasCancel=*/false),
354         PrivScope(CGF) {
355     // Make sure the globals captured in the provided statement are local by
356     // using the privatization logic. We assume the same variable is not
357     // captured more than once.
358     for (const auto &C : CS.captures()) {
359       if (!C.capturesVariable() && !C.capturesVariableByCopy())
360         continue;
361 
362       const VarDecl *VD = C.getCapturedVar();
363       if (VD->isLocalVarDeclOrParm())
364         continue;
365 
366       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
367                       /*RefersToEnclosingVariableOrCapture=*/false,
368                       VD->getType().getNonReferenceType(), VK_LValue,
369                       C.getLocation());
370       PrivScope.addPrivate(
371           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
372     }
373     (void)PrivScope.Privatize();
374   }
375 
376   /// Lookup the captured field decl for a variable.
377   const FieldDecl *lookup(const VarDecl *VD) const override {
378     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
379       return FD;
380     return nullptr;
381   }
382 
383   /// Emit the captured statement body.
384   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
385     llvm_unreachable("No body for expressions");
386   }
387 
388   /// Get a variable or parameter for storing global thread id
389   /// inside OpenMP construct.
390   const VarDecl *getThreadIDVariable() const override {
391     llvm_unreachable("No thread id for expressions");
392   }
393 
394   /// Get the name of the capture helper.
395   StringRef getHelperName() const override {
396     llvm_unreachable("No helper name for expressions");
397   }
398 
399   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
400 
401 private:
402   /// Private scope to capture global variables.
403   CodeGenFunction::OMPPrivateScope PrivScope;
404 };
405 
406 /// RAII for emitting code of OpenMP constructs.
407 class InlinedOpenMPRegionRAII {
408   CodeGenFunction &CGF;
409   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
410   FieldDecl *LambdaThisCaptureField = nullptr;
411   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
412 
413 public:
414   /// Constructs region for combined constructs.
415   /// \param CodeGen Code generation sequence for combined directives. Includes
416   /// a list of functions used for code generation of implicitly inlined
417   /// regions.
418   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
419                           OpenMPDirectiveKind Kind, bool HasCancel)
420       : CGF(CGF) {
421     // Start emission for the construct.
422     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
423         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
424     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
425     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
426     CGF.LambdaThisCaptureField = nullptr;
427     BlockInfo = CGF.BlockInfo;
428     CGF.BlockInfo = nullptr;
429   }
430 
431   ~InlinedOpenMPRegionRAII() {
432     // Restore original CapturedStmtInfo only if we're done with code emission.
433     auto *OldCSI =
434         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
435     delete CGF.CapturedStmtInfo;
436     CGF.CapturedStmtInfo = OldCSI;
437     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
438     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
439     CGF.BlockInfo = BlockInfo;
440   }
441 };
442 
443 /// Values for bit flags used in the ident_t to describe the fields.
444 /// All enumeric elements are named and described in accordance with the code
445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
446 enum OpenMPLocationFlags : unsigned {
447   /// Use trampoline for internal microtask.
448   OMP_IDENT_IMD = 0x01,
449   /// Use c-style ident structure.
450   OMP_IDENT_KMPC = 0x02,
451   /// Atomic reduction option for kmpc_reduce.
452   OMP_ATOMIC_REDUCE = 0x10,
453   /// Explicit 'barrier' directive.
454   OMP_IDENT_BARRIER_EXPL = 0x20,
455   /// Implicit barrier in code.
456   OMP_IDENT_BARRIER_IMPL = 0x40,
457   /// Implicit barrier in 'for' directive.
458   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
459   /// Implicit barrier in 'sections' directive.
460   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
461   /// Implicit barrier in 'single' directive.
462   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
463   /// Call of __kmp_for_static_init for static loop.
464   OMP_IDENT_WORK_LOOP = 0x200,
465   /// Call of __kmp_for_static_init for sections.
466   OMP_IDENT_WORK_SECTIONS = 0x400,
467   /// Call of __kmp_for_static_init for distribute.
468   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
469   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
470 };
471 
472 namespace {
473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
474 /// Values for bit flags for marking which requires clauses have been used.
475 enum OpenMPOffloadingRequiresDirFlags : int64_t {
476   /// flag undefined.
477   OMP_REQ_UNDEFINED               = 0x000,
478   /// no requires clause present.
479   OMP_REQ_NONE                    = 0x001,
480   /// reverse_offload clause.
481   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
482   /// unified_address clause.
483   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
484   /// unified_shared_memory clause.
485   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
486   /// dynamic_allocators clause.
487   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
488   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
489 };
490 
491 enum OpenMPOffloadingReservedDeviceIDs {
492   /// Device ID if the device was not defined, runtime should get it
493   /// from environment variables in the spec.
494   OMP_DEVICEID_UNDEF = -1,
495 };
496 } // anonymous namespace
497 
498 /// Describes ident structure that describes a source location.
499 /// All descriptions are taken from
500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
501 /// Original structure:
502 /// typedef struct ident {
503 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
504 ///                                  see above  */
505 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
506 ///                                  KMP_IDENT_KMPC identifies this union
507 ///                                  member  */
508 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
509 ///                                  see above */
510 ///#if USE_ITT_BUILD
511 ///                            /*  but currently used for storing
512 ///                                region-specific ITT */
513 ///                            /*  contextual information. */
514 ///#endif /* USE_ITT_BUILD */
515 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
516 ///                                 C++  */
517 ///    char const *psource;    /**< String describing the source location.
518 ///                            The string is composed of semi-colon separated
519 //                             fields which describe the source file,
520 ///                            the function and a pair of line numbers that
521 ///                            delimit the construct.
522 ///                             */
523 /// } ident_t;
524 enum IdentFieldIndex {
525   /// might be used in Fortran
526   IdentField_Reserved_1,
527   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
528   IdentField_Flags,
529   /// Not really used in Fortran any more
530   IdentField_Reserved_2,
531   /// Source[4] in Fortran, do not use for C++
532   IdentField_Reserved_3,
533   /// String describing the source location. The string is composed of
534   /// semi-colon separated fields which describe the source file, the function
535   /// and a pair of line numbers that delimit the construct.
536   IdentField_PSource
537 };
538 
539 /// Schedule types for 'omp for' loops (these enumerators are taken from
540 /// the enum sched_type in kmp.h).
541 enum OpenMPSchedType {
542   /// Lower bound for default (unordered) versions.
543   OMP_sch_lower = 32,
544   OMP_sch_static_chunked = 33,
545   OMP_sch_static = 34,
546   OMP_sch_dynamic_chunked = 35,
547   OMP_sch_guided_chunked = 36,
548   OMP_sch_runtime = 37,
549   OMP_sch_auto = 38,
550   /// static with chunk adjustment (e.g., simd)
551   OMP_sch_static_balanced_chunked = 45,
552   /// Lower bound for 'ordered' versions.
553   OMP_ord_lower = 64,
554   OMP_ord_static_chunked = 65,
555   OMP_ord_static = 66,
556   OMP_ord_dynamic_chunked = 67,
557   OMP_ord_guided_chunked = 68,
558   OMP_ord_runtime = 69,
559   OMP_ord_auto = 70,
560   OMP_sch_default = OMP_sch_static,
561   /// dist_schedule types
562   OMP_dist_sch_static_chunked = 91,
563   OMP_dist_sch_static = 92,
564   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
565   /// Set if the monotonic schedule modifier was present.
566   OMP_sch_modifier_monotonic = (1 << 29),
567   /// Set if the nonmonotonic schedule modifier was present.
568   OMP_sch_modifier_nonmonotonic = (1 << 30),
569 };
570 
571 enum OpenMPRTLFunction {
572   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
573   /// kmpc_micro microtask, ...);
574   OMPRTL__kmpc_fork_call,
575   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
576   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
577   OMPRTL__kmpc_threadprivate_cached,
578   /// Call to void __kmpc_threadprivate_register( ident_t *,
579   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
580   OMPRTL__kmpc_threadprivate_register,
581   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
582   OMPRTL__kmpc_global_thread_num,
583   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
584   // kmp_critical_name *crit);
585   OMPRTL__kmpc_critical,
586   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
587   // global_tid, kmp_critical_name *crit, uintptr_t hint);
588   OMPRTL__kmpc_critical_with_hint,
589   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
590   // kmp_critical_name *crit);
591   OMPRTL__kmpc_end_critical,
592   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
593   // global_tid);
594   OMPRTL__kmpc_cancel_barrier,
595   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
596   OMPRTL__kmpc_barrier,
597   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
598   OMPRTL__kmpc_for_static_fini,
599   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
600   // global_tid);
601   OMPRTL__kmpc_serialized_parallel,
602   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
603   // global_tid);
604   OMPRTL__kmpc_end_serialized_parallel,
605   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
606   // kmp_int32 num_threads);
607   OMPRTL__kmpc_push_num_threads,
608   // Call to void __kmpc_flush(ident_t *loc);
609   OMPRTL__kmpc_flush,
610   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
611   OMPRTL__kmpc_master,
612   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
613   OMPRTL__kmpc_end_master,
614   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
615   // int end_part);
616   OMPRTL__kmpc_omp_taskyield,
617   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
618   OMPRTL__kmpc_single,
619   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
620   OMPRTL__kmpc_end_single,
621   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
622   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
623   // kmp_routine_entry_t *task_entry);
624   OMPRTL__kmpc_omp_task_alloc,
625   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
626   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
627   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
628   // kmp_int64 device_id);
629   OMPRTL__kmpc_omp_target_task_alloc,
630   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
631   // new_task);
632   OMPRTL__kmpc_omp_task,
633   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
634   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
635   // kmp_int32 didit);
636   OMPRTL__kmpc_copyprivate,
637   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
638   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
639   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
640   OMPRTL__kmpc_reduce,
641   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
642   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
643   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
644   // *lck);
645   OMPRTL__kmpc_reduce_nowait,
646   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
647   // kmp_critical_name *lck);
648   OMPRTL__kmpc_end_reduce,
649   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
650   // kmp_critical_name *lck);
651   OMPRTL__kmpc_end_reduce_nowait,
652   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
653   // kmp_task_t * new_task);
654   OMPRTL__kmpc_omp_task_begin_if0,
655   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
656   // kmp_task_t * new_task);
657   OMPRTL__kmpc_omp_task_complete_if0,
658   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
659   OMPRTL__kmpc_ordered,
660   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
661   OMPRTL__kmpc_end_ordered,
662   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
663   // global_tid);
664   OMPRTL__kmpc_omp_taskwait,
665   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
666   OMPRTL__kmpc_taskgroup,
667   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
668   OMPRTL__kmpc_end_taskgroup,
669   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
670   // int proc_bind);
671   OMPRTL__kmpc_push_proc_bind,
672   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
673   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
674   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
675   OMPRTL__kmpc_omp_task_with_deps,
676   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
677   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
678   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
679   OMPRTL__kmpc_omp_wait_deps,
680   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
681   // global_tid, kmp_int32 cncl_kind);
682   OMPRTL__kmpc_cancellationpoint,
683   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
684   // kmp_int32 cncl_kind);
685   OMPRTL__kmpc_cancel,
686   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
687   // kmp_int32 num_teams, kmp_int32 thread_limit);
688   OMPRTL__kmpc_push_num_teams,
689   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
690   // microtask, ...);
691   OMPRTL__kmpc_fork_teams,
692   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
693   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
694   // sched, kmp_uint64 grainsize, void *task_dup);
695   OMPRTL__kmpc_taskloop,
696   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
697   // num_dims, struct kmp_dim *dims);
698   OMPRTL__kmpc_doacross_init,
699   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
700   OMPRTL__kmpc_doacross_fini,
701   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
702   // *vec);
703   OMPRTL__kmpc_doacross_post,
704   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
705   // *vec);
706   OMPRTL__kmpc_doacross_wait,
707   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
708   // *data);
709   OMPRTL__kmpc_task_reduction_init,
710   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
711   // *d);
712   OMPRTL__kmpc_task_reduction_get_th_data,
713   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
714   OMPRTL__kmpc_alloc,
715   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
716   OMPRTL__kmpc_free,
717 
718   //
719   // Offloading related calls
720   //
721   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
722   // size);
723   OMPRTL__kmpc_push_target_tripcount,
724   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
725   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
726   // *arg_types);
727   OMPRTL__tgt_target,
728   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
729   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
730   // *arg_types);
731   OMPRTL__tgt_target_nowait,
732   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
733   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
734   // *arg_types, int32_t num_teams, int32_t thread_limit);
735   OMPRTL__tgt_target_teams,
736   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
737   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
738   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
739   OMPRTL__tgt_target_teams_nowait,
740   // Call to void __tgt_register_requires(int64_t flags);
741   OMPRTL__tgt_register_requires,
742   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
743   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
744   OMPRTL__tgt_target_data_begin,
745   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
746   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
747   // *arg_types);
748   OMPRTL__tgt_target_data_begin_nowait,
749   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
750   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
751   OMPRTL__tgt_target_data_end,
752   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
753   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
754   // *arg_types);
755   OMPRTL__tgt_target_data_end_nowait,
756   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
757   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
758   OMPRTL__tgt_target_data_update,
759   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
760   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
761   // *arg_types);
762   OMPRTL__tgt_target_data_update_nowait,
763   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
764   OMPRTL__tgt_mapper_num_components,
765   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
766   // *base, void *begin, int64_t size, int64_t type);
767   OMPRTL__tgt_push_mapper_component,
768 };
769 
770 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
771 /// region.
772 class CleanupTy final : public EHScopeStack::Cleanup {
773   PrePostActionTy *Action;
774 
775 public:
776   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
777   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
778     if (!CGF.HaveInsertPoint())
779       return;
780     Action->Exit(CGF);
781   }
782 };
783 
784 } // anonymous namespace
785 
786 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
787   CodeGenFunction::RunCleanupsScope Scope(CGF);
788   if (PrePostAction) {
789     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
790     Callback(CodeGen, CGF, *PrePostAction);
791   } else {
792     PrePostActionTy Action;
793     Callback(CodeGen, CGF, Action);
794   }
795 }
796 
797 /// Check if the combiner is a call to UDR combiner and if it is so return the
798 /// UDR decl used for reduction.
799 static const OMPDeclareReductionDecl *
800 getReductionInit(const Expr *ReductionOp) {
801   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
802     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
803       if (const auto *DRE =
804               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
805         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
806           return DRD;
807   return nullptr;
808 }
809 
810 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
811                                              const OMPDeclareReductionDecl *DRD,
812                                              const Expr *InitOp,
813                                              Address Private, Address Original,
814                                              QualType Ty) {
815   if (DRD->getInitializer()) {
816     std::pair<llvm::Function *, llvm::Function *> Reduction =
817         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
818     const auto *CE = cast<CallExpr>(InitOp);
819     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
820     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
821     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
822     const auto *LHSDRE =
823         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
824     const auto *RHSDRE =
825         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
826     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
827     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
828                             [=]() { return Private; });
829     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
830                             [=]() { return Original; });
831     (void)PrivateScope.Privatize();
832     RValue Func = RValue::get(Reduction.second);
833     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
834     CGF.EmitIgnoredExpr(InitOp);
835   } else {
836     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
837     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
838     auto *GV = new llvm::GlobalVariable(
839         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
840         llvm::GlobalValue::PrivateLinkage, Init, Name);
841     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
842     RValue InitRVal;
843     switch (CGF.getEvaluationKind(Ty)) {
844     case TEK_Scalar:
845       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
846       break;
847     case TEK_Complex:
848       InitRVal =
849           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
850       break;
851     case TEK_Aggregate:
852       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
853       break;
854     }
855     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
856     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
857     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
858                          /*IsInitializer=*/false);
859   }
860 }
861 
862 /// Emit initialization of arrays of complex types.
863 /// \param DestAddr Address of the array.
864 /// \param Type Type of array.
865 /// \param Init Initial expression of array.
866 /// \param SrcAddr Address of the original array.
867 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
868                                  QualType Type, bool EmitDeclareReductionInit,
869                                  const Expr *Init,
870                                  const OMPDeclareReductionDecl *DRD,
871                                  Address SrcAddr = Address::invalid()) {
872   // Perform element-by-element initialization.
873   QualType ElementTy;
874 
875   // Drill down to the base element type on both arrays.
876   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
877   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
878   DestAddr =
879       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
880   if (DRD)
881     SrcAddr =
882         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
883 
884   llvm::Value *SrcBegin = nullptr;
885   if (DRD)
886     SrcBegin = SrcAddr.getPointer();
887   llvm::Value *DestBegin = DestAddr.getPointer();
888   // Cast from pointer to array type to pointer to single element.
889   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
890   // The basic structure here is a while-do loop.
891   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
892   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
893   llvm::Value *IsEmpty =
894       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
895   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
896 
897   // Enter the loop body, making that address the current address.
898   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
899   CGF.EmitBlock(BodyBB);
900 
901   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
902 
903   llvm::PHINode *SrcElementPHI = nullptr;
904   Address SrcElementCurrent = Address::invalid();
905   if (DRD) {
906     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
907                                           "omp.arraycpy.srcElementPast");
908     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
909     SrcElementCurrent =
910         Address(SrcElementPHI,
911                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
912   }
913   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
914       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
915   DestElementPHI->addIncoming(DestBegin, EntryBB);
916   Address DestElementCurrent =
917       Address(DestElementPHI,
918               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
919 
920   // Emit copy.
921   {
922     CodeGenFunction::RunCleanupsScope InitScope(CGF);
923     if (EmitDeclareReductionInit) {
924       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
925                                        SrcElementCurrent, ElementTy);
926     } else
927       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
928                            /*IsInitializer=*/false);
929   }
930 
931   if (DRD) {
932     // Shift the address forward by one element.
933     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
934         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
935     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
936   }
937 
938   // Shift the address forward by one element.
939   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
940       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
941   // Check whether we've reached the end.
942   llvm::Value *Done =
943       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
944   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
945   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
946 
947   // Done.
948   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
949 }
950 
951 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
952   return CGF.EmitOMPSharedLValue(E);
953 }
954 
955 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
956                                             const Expr *E) {
957   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
958     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
959   return LValue();
960 }
961 
962 void ReductionCodeGen::emitAggregateInitialization(
963     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
964     const OMPDeclareReductionDecl *DRD) {
965   // Emit VarDecl with copy init for arrays.
966   // Get the address of the original variable captured in current
967   // captured region.
968   const auto *PrivateVD =
969       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
970   bool EmitDeclareReductionInit =
971       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
972   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
973                        EmitDeclareReductionInit,
974                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
975                                                 : PrivateVD->getInit(),
976                        DRD, SharedLVal.getAddress(CGF));
977 }
978 
979 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
980                                    ArrayRef<const Expr *> Privates,
981                                    ArrayRef<const Expr *> ReductionOps) {
982   ClausesData.reserve(Shareds.size());
983   SharedAddresses.reserve(Shareds.size());
984   Sizes.reserve(Shareds.size());
985   BaseDecls.reserve(Shareds.size());
986   auto IPriv = Privates.begin();
987   auto IRed = ReductionOps.begin();
988   for (const Expr *Ref : Shareds) {
989     ClausesData.emplace_back(Ref, *IPriv, *IRed);
990     std::advance(IPriv, 1);
991     std::advance(IRed, 1);
992   }
993 }
994 
995 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
996   assert(SharedAddresses.size() == N &&
997          "Number of generated lvalues must be exactly N.");
998   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
999   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
1000   SharedAddresses.emplace_back(First, Second);
1001 }
1002 
1003 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1004   const auto *PrivateVD =
1005       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1006   QualType PrivateType = PrivateVD->getType();
1007   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1008   if (!PrivateType->isVariablyModifiedType()) {
1009     Sizes.emplace_back(
1010         CGF.getTypeSize(
1011             SharedAddresses[N].first.getType().getNonReferenceType()),
1012         nullptr);
1013     return;
1014   }
1015   llvm::Value *Size;
1016   llvm::Value *SizeInChars;
1017   auto *ElemType = cast<llvm::PointerType>(
1018                        SharedAddresses[N].first.getPointer(CGF)->getType())
1019                        ->getElementType();
1020   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1021   if (AsArraySection) {
1022     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1023                                      SharedAddresses[N].first.getPointer(CGF));
1024     Size = CGF.Builder.CreateNUWAdd(
1025         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1026     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1027   } else {
1028     SizeInChars = CGF.getTypeSize(
1029         SharedAddresses[N].first.getType().getNonReferenceType());
1030     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1031   }
1032   Sizes.emplace_back(SizeInChars, Size);
1033   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1034       CGF,
1035       cast<OpaqueValueExpr>(
1036           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1037       RValue::get(Size));
1038   CGF.EmitVariablyModifiedType(PrivateType);
1039 }
1040 
1041 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1042                                          llvm::Value *Size) {
1043   const auto *PrivateVD =
1044       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1045   QualType PrivateType = PrivateVD->getType();
1046   if (!PrivateType->isVariablyModifiedType()) {
1047     assert(!Size && !Sizes[N].second &&
1048            "Size should be nullptr for non-variably modified reduction "
1049            "items.");
1050     return;
1051   }
1052   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1053       CGF,
1054       cast<OpaqueValueExpr>(
1055           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1056       RValue::get(Size));
1057   CGF.EmitVariablyModifiedType(PrivateType);
1058 }
1059 
1060 void ReductionCodeGen::emitInitialization(
1061     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1062     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1063   assert(SharedAddresses.size() > N && "No variable was generated");
1064   const auto *PrivateVD =
1065       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1066   const OMPDeclareReductionDecl *DRD =
1067       getReductionInit(ClausesData[N].ReductionOp);
1068   QualType PrivateType = PrivateVD->getType();
1069   PrivateAddr = CGF.Builder.CreateElementBitCast(
1070       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1071   QualType SharedType = SharedAddresses[N].first.getType();
1072   SharedLVal = CGF.MakeAddrLValue(
1073       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1074                                        CGF.ConvertTypeForMem(SharedType)),
1075       SharedType, SharedAddresses[N].first.getBaseInfo(),
1076       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1077   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1078     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1079   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1080     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1081                                      PrivateAddr, SharedLVal.getAddress(CGF),
1082                                      SharedLVal.getType());
1083   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1084              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1085     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1086                          PrivateVD->getType().getQualifiers(),
1087                          /*IsInitializer=*/false);
1088   }
1089 }
1090 
1091 bool ReductionCodeGen::needCleanups(unsigned N) {
1092   const auto *PrivateVD =
1093       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1094   QualType PrivateType = PrivateVD->getType();
1095   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1096   return DTorKind != QualType::DK_none;
1097 }
1098 
1099 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1100                                     Address PrivateAddr) {
1101   const auto *PrivateVD =
1102       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1103   QualType PrivateType = PrivateVD->getType();
1104   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1105   if (needCleanups(N)) {
1106     PrivateAddr = CGF.Builder.CreateElementBitCast(
1107         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1108     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1109   }
1110 }
1111 
1112 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1113                           LValue BaseLV) {
1114   BaseTy = BaseTy.getNonReferenceType();
1115   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1116          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1117     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1118       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1119     } else {
1120       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1121       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1122     }
1123     BaseTy = BaseTy->getPointeeType();
1124   }
1125   return CGF.MakeAddrLValue(
1126       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1127                                        CGF.ConvertTypeForMem(ElTy)),
1128       BaseLV.getType(), BaseLV.getBaseInfo(),
1129       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1130 }
1131 
1132 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1133                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1134                           llvm::Value *Addr) {
1135   Address Tmp = Address::invalid();
1136   Address TopTmp = Address::invalid();
1137   Address MostTopTmp = Address::invalid();
1138   BaseTy = BaseTy.getNonReferenceType();
1139   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1140          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1141     Tmp = CGF.CreateMemTemp(BaseTy);
1142     if (TopTmp.isValid())
1143       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1144     else
1145       MostTopTmp = Tmp;
1146     TopTmp = Tmp;
1147     BaseTy = BaseTy->getPointeeType();
1148   }
1149   llvm::Type *Ty = BaseLVType;
1150   if (Tmp.isValid())
1151     Ty = Tmp.getElementType();
1152   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1153   if (Tmp.isValid()) {
1154     CGF.Builder.CreateStore(Addr, Tmp);
1155     return MostTopTmp;
1156   }
1157   return Address(Addr, BaseLVAlignment);
1158 }
1159 
1160 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1161   const VarDecl *OrigVD = nullptr;
1162   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1163     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1164     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1165       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1166     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1167       Base = TempASE->getBase()->IgnoreParenImpCasts();
1168     DE = cast<DeclRefExpr>(Base);
1169     OrigVD = cast<VarDecl>(DE->getDecl());
1170   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1171     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1172     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1173       Base = TempASE->getBase()->IgnoreParenImpCasts();
1174     DE = cast<DeclRefExpr>(Base);
1175     OrigVD = cast<VarDecl>(DE->getDecl());
1176   }
1177   return OrigVD;
1178 }
1179 
1180 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1181                                                Address PrivateAddr) {
1182   const DeclRefExpr *DE;
1183   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1184     BaseDecls.emplace_back(OrigVD);
1185     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1186     LValue BaseLValue =
1187         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1188                     OriginalBaseLValue);
1189     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1190         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1191     llvm::Value *PrivatePointer =
1192         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1193             PrivateAddr.getPointer(),
1194             SharedAddresses[N].first.getAddress(CGF).getType());
1195     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1196     return castToBase(CGF, OrigVD->getType(),
1197                       SharedAddresses[N].first.getType(),
1198                       OriginalBaseLValue.getAddress(CGF).getType(),
1199                       OriginalBaseLValue.getAlignment(), Ptr);
1200   }
1201   BaseDecls.emplace_back(
1202       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1203   return PrivateAddr;
1204 }
1205 
1206 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1207   const OMPDeclareReductionDecl *DRD =
1208       getReductionInit(ClausesData[N].ReductionOp);
1209   return DRD && DRD->getInitializer();
1210 }
1211 
1212 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1213   return CGF.EmitLoadOfPointerLValue(
1214       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1215       getThreadIDVariable()->getType()->castAs<PointerType>());
1216 }
1217 
1218 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1219   if (!CGF.HaveInsertPoint())
1220     return;
1221   // 1.2.2 OpenMP Language Terminology
1222   // Structured block - An executable statement with a single entry at the
1223   // top and a single exit at the bottom.
1224   // The point of exit cannot be a branch out of the structured block.
1225   // longjmp() and throw() must not violate the entry/exit criteria.
1226   CGF.EHStack.pushTerminate();
1227   CodeGen(CGF);
1228   CGF.EHStack.popTerminate();
1229 }
1230 
1231 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1232     CodeGenFunction &CGF) {
1233   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1234                             getThreadIDVariable()->getType(),
1235                             AlignmentSource::Decl);
1236 }
1237 
1238 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1239                                        QualType FieldTy) {
1240   auto *Field = FieldDecl::Create(
1241       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1242       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1243       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1244   Field->setAccess(AS_public);
1245   DC->addDecl(Field);
1246   return Field;
1247 }
1248 
1249 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1250                                  StringRef Separator)
1251     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1252       OffloadEntriesInfoManager(CGM) {
1253   ASTContext &C = CGM.getContext();
1254   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1255   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1256   RD->startDefinition();
1257   // reserved_1
1258   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1259   // flags
1260   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1261   // reserved_2
1262   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1263   // reserved_3
1264   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1265   // psource
1266   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1267   RD->completeDefinition();
1268   IdentQTy = C.getRecordType(RD);
1269   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1270   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1271 
1272   loadOffloadInfoMetadata();
1273 }
1274 
1275 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1276                                             const GlobalDecl &OldGD,
1277                                             llvm::GlobalValue *OrigAddr,
1278                                             bool IsForDefinition) {
1279   // Emit at least a definition for the aliasee if the the address of the
1280   // original function is requested.
1281   if (IsForDefinition || OrigAddr)
1282     (void)CGM.GetAddrOfGlobal(NewGD);
1283   StringRef NewMangledName = CGM.getMangledName(NewGD);
1284   llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1285   if (Addr && !Addr->isDeclaration()) {
1286     const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1287     const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1288     llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1289 
1290     // Create a reference to the named value.  This ensures that it is emitted
1291     // if a deferred decl.
1292     llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1293 
1294     // Create the new alias itself, but don't set a name yet.
1295     auto *GA =
1296         llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1297 
1298     if (OrigAddr) {
1299       assert(OrigAddr->isDeclaration() && "Expected declaration");
1300 
1301       GA->takeName(OrigAddr);
1302       OrigAddr->replaceAllUsesWith(
1303           llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1304       OrigAddr->eraseFromParent();
1305     } else {
1306       GA->setName(CGM.getMangledName(OldGD));
1307     }
1308 
1309     // Set attributes which are particular to an alias; this is a
1310     // specialization of the attributes which may be set on a global function.
1311     if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1312         D->isWeakImported())
1313       GA->setLinkage(llvm::Function::WeakAnyLinkage);
1314 
1315     CGM.SetCommonAttributes(OldGD, GA);
1316     return true;
1317   }
1318   return false;
1319 }
1320 
1321 void CGOpenMPRuntime::clear() {
1322   InternalVars.clear();
1323   // Clean non-target variable declarations possibly used only in debug info.
1324   for (const auto &Data : EmittedNonTargetVariables) {
1325     if (!Data.getValue().pointsToAliveValue())
1326       continue;
1327     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1328     if (!GV)
1329       continue;
1330     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1331       continue;
1332     GV->eraseFromParent();
1333   }
1334   // Emit aliases for the deferred aliasees.
1335   for (const auto &Pair : DeferredVariantFunction) {
1336     StringRef MangledName = CGM.getMangledName(Pair.second.second);
1337     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1338     // If not able to emit alias, just emit original declaration.
1339     (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1340                                 /*IsForDefinition=*/false);
1341   }
1342 }
1343 
1344 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1345   SmallString<128> Buffer;
1346   llvm::raw_svector_ostream OS(Buffer);
1347   StringRef Sep = FirstSeparator;
1348   for (StringRef Part : Parts) {
1349     OS << Sep << Part;
1350     Sep = Separator;
1351   }
1352   return std::string(OS.str());
1353 }
1354 
1355 static llvm::Function *
1356 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1357                           const Expr *CombinerInitializer, const VarDecl *In,
1358                           const VarDecl *Out, bool IsCombiner) {
1359   // void .omp_combiner.(Ty *in, Ty *out);
1360   ASTContext &C = CGM.getContext();
1361   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1362   FunctionArgList Args;
1363   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1364                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1365   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1366                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1367   Args.push_back(&OmpOutParm);
1368   Args.push_back(&OmpInParm);
1369   const CGFunctionInfo &FnInfo =
1370       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1371   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1372   std::string Name = CGM.getOpenMPRuntime().getName(
1373       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1374   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1375                                     Name, &CGM.getModule());
1376   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1377   if (CGM.getLangOpts().Optimize) {
1378     Fn->removeFnAttr(llvm::Attribute::NoInline);
1379     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1380     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1381   }
1382   CodeGenFunction CGF(CGM);
1383   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1384   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1385   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1386                     Out->getLocation());
1387   CodeGenFunction::OMPPrivateScope Scope(CGF);
1388   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1389   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1390     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1391         .getAddress(CGF);
1392   });
1393   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1394   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1395     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1396         .getAddress(CGF);
1397   });
1398   (void)Scope.Privatize();
1399   if (!IsCombiner && Out->hasInit() &&
1400       !CGF.isTrivialInitializer(Out->getInit())) {
1401     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1402                          Out->getType().getQualifiers(),
1403                          /*IsInitializer=*/true);
1404   }
1405   if (CombinerInitializer)
1406     CGF.EmitIgnoredExpr(CombinerInitializer);
1407   Scope.ForceCleanup();
1408   CGF.FinishFunction();
1409   return Fn;
1410 }
1411 
1412 void CGOpenMPRuntime::emitUserDefinedReduction(
1413     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1414   if (UDRMap.count(D) > 0)
1415     return;
1416   llvm::Function *Combiner = emitCombinerOrInitializer(
1417       CGM, D->getType(), D->getCombiner(),
1418       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1419       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1420       /*IsCombiner=*/true);
1421   llvm::Function *Initializer = nullptr;
1422   if (const Expr *Init = D->getInitializer()) {
1423     Initializer = emitCombinerOrInitializer(
1424         CGM, D->getType(),
1425         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1426                                                                      : nullptr,
1427         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1428         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1429         /*IsCombiner=*/false);
1430   }
1431   UDRMap.try_emplace(D, Combiner, Initializer);
1432   if (CGF) {
1433     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1434     Decls.second.push_back(D);
1435   }
1436 }
1437 
1438 std::pair<llvm::Function *, llvm::Function *>
1439 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1440   auto I = UDRMap.find(D);
1441   if (I != UDRMap.end())
1442     return I->second;
1443   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1444   return UDRMap.lookup(D);
1445 }
1446 
1447 namespace {
1448 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1449 // Builder if one is present.
1450 struct PushAndPopStackRAII {
1451   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1452                       bool HasCancel)
1453       : OMPBuilder(OMPBuilder) {
1454     if (!OMPBuilder)
1455       return;
1456 
1457     // The following callback is the crucial part of clangs cleanup process.
1458     //
1459     // NOTE:
1460     // Once the OpenMPIRBuilder is used to create parallel regions (and
1461     // similar), the cancellation destination (Dest below) is determined via
1462     // IP. That means if we have variables to finalize we split the block at IP,
1463     // use the new block (=BB) as destination to build a JumpDest (via
1464     // getJumpDestInCurrentScope(BB)) which then is fed to
1465     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1466     // to push & pop an FinalizationInfo object.
1467     // The FiniCB will still be needed but at the point where the
1468     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1469     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1470       assert(IP.getBlock()->end() == IP.getPoint() &&
1471              "Clang CG should cause non-terminated block!");
1472       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1473       CGF.Builder.restoreIP(IP);
1474       CodeGenFunction::JumpDest Dest =
1475           CGF.getOMPCancelDestination(OMPD_parallel);
1476       CGF.EmitBranchThroughCleanup(Dest);
1477     };
1478 
1479     // TODO: Remove this once we emit parallel regions through the
1480     //       OpenMPIRBuilder as it can do this setup internally.
1481     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1482         {FiniCB, OMPD_parallel, HasCancel});
1483     OMPBuilder->pushFinalizationCB(std::move(FI));
1484   }
1485   ~PushAndPopStackRAII() {
1486     if (OMPBuilder)
1487       OMPBuilder->popFinalizationCB();
1488   }
1489   llvm::OpenMPIRBuilder *OMPBuilder;
1490 };
1491 } // namespace
1492 
1493 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1494     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1495     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1496     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1497   assert(ThreadIDVar->getType()->isPointerType() &&
1498          "thread id variable must be of type kmp_int32 *");
1499   CodeGenFunction CGF(CGM, true);
1500   bool HasCancel = false;
1501   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1502     HasCancel = OPD->hasCancel();
1503   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1504     HasCancel = OPSD->hasCancel();
1505   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1506     HasCancel = OPFD->hasCancel();
1507   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1508     HasCancel = OPFD->hasCancel();
1509   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1510     HasCancel = OPFD->hasCancel();
1511   else if (const auto *OPFD =
1512                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1513     HasCancel = OPFD->hasCancel();
1514   else if (const auto *OPFD =
1515                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1516     HasCancel = OPFD->hasCancel();
1517 
1518   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1519   //       parallel region to make cancellation barriers work properly.
1520   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1521   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1522   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1523                                     HasCancel, OutlinedHelperName);
1524   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1525   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1526 }
1527 
1528 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1529     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1530     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1531   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1532   return emitParallelOrTeamsOutlinedFunction(
1533       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1534 }
1535 
1536 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1537     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1538     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1539   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1540   return emitParallelOrTeamsOutlinedFunction(
1541       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1542 }
1543 
1544 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1545     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1546     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1547     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1548     bool Tied, unsigned &NumberOfParts) {
1549   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1550                                               PrePostActionTy &) {
1551     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1552     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1553     llvm::Value *TaskArgs[] = {
1554         UpLoc, ThreadID,
1555         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1556                                     TaskTVar->getType()->castAs<PointerType>())
1557             .getPointer(CGF)};
1558     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1559   };
1560   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1561                                                             UntiedCodeGen);
1562   CodeGen.setAction(Action);
1563   assert(!ThreadIDVar->getType()->isPointerType() &&
1564          "thread id variable must be of type kmp_int32 for tasks");
1565   const OpenMPDirectiveKind Region =
1566       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1567                                                       : OMPD_task;
1568   const CapturedStmt *CS = D.getCapturedStmt(Region);
1569   bool HasCancel = false;
1570   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1571     HasCancel = TD->hasCancel();
1572   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1573     HasCancel = TD->hasCancel();
1574   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1575     HasCancel = TD->hasCancel();
1576   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1577     HasCancel = TD->hasCancel();
1578 
1579   CodeGenFunction CGF(CGM, true);
1580   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1581                                         InnermostKind, HasCancel, Action);
1582   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1583   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1584   if (!Tied)
1585     NumberOfParts = Action.getNumberOfParts();
1586   return Res;
1587 }
1588 
1589 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1590                              const RecordDecl *RD, const CGRecordLayout &RL,
1591                              ArrayRef<llvm::Constant *> Data) {
1592   llvm::StructType *StructTy = RL.getLLVMType();
1593   unsigned PrevIdx = 0;
1594   ConstantInitBuilder CIBuilder(CGM);
1595   auto DI = Data.begin();
1596   for (const FieldDecl *FD : RD->fields()) {
1597     unsigned Idx = RL.getLLVMFieldNo(FD);
1598     // Fill the alignment.
1599     for (unsigned I = PrevIdx; I < Idx; ++I)
1600       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1601     PrevIdx = Idx + 1;
1602     Fields.add(*DI);
1603     ++DI;
1604   }
1605 }
1606 
1607 template <class... As>
1608 static llvm::GlobalVariable *
1609 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1610                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1611                    As &&... Args) {
1612   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1613   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1614   ConstantInitBuilder CIBuilder(CGM);
1615   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1616   buildStructValue(Fields, CGM, RD, RL, Data);
1617   return Fields.finishAndCreateGlobal(
1618       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1619       std::forward<As>(Args)...);
1620 }
1621 
1622 template <typename T>
1623 static void
1624 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1625                                          ArrayRef<llvm::Constant *> Data,
1626                                          T &Parent) {
1627   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1628   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1629   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1630   buildStructValue(Fields, CGM, RD, RL, Data);
1631   Fields.finishAndAddTo(Parent);
1632 }
1633 
1634 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1635   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1636   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1637   FlagsTy FlagsKey(Flags, Reserved2Flags);
1638   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1639   if (!Entry) {
1640     if (!DefaultOpenMPPSource) {
1641       // Initialize default location for psource field of ident_t structure of
1642       // all ident_t objects. Format is ";file;function;line;column;;".
1643       // Taken from
1644       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1645       DefaultOpenMPPSource =
1646           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1647       DefaultOpenMPPSource =
1648           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1649     }
1650 
1651     llvm::Constant *Data[] = {
1652         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1653         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1654         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1655         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1656     llvm::GlobalValue *DefaultOpenMPLocation =
1657         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1658                            llvm::GlobalValue::PrivateLinkage);
1659     DefaultOpenMPLocation->setUnnamedAddr(
1660         llvm::GlobalValue::UnnamedAddr::Global);
1661 
1662     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1663   }
1664   return Address(Entry, Align);
1665 }
1666 
1667 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1668                                              bool AtCurrentPoint) {
1669   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1670   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1671 
1672   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1673   if (AtCurrentPoint) {
1674     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1675         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1676   } else {
1677     Elem.second.ServiceInsertPt =
1678         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1679     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1680   }
1681 }
1682 
1683 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1684   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1685   if (Elem.second.ServiceInsertPt) {
1686     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1687     Elem.second.ServiceInsertPt = nullptr;
1688     Ptr->eraseFromParent();
1689   }
1690 }
1691 
1692 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1693                                                  SourceLocation Loc,
1694                                                  unsigned Flags) {
1695   Flags |= OMP_IDENT_KMPC;
1696   // If no debug info is generated - return global default location.
1697   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1698       Loc.isInvalid())
1699     return getOrCreateDefaultLocation(Flags).getPointer();
1700 
1701   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1702 
1703   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1704   Address LocValue = Address::invalid();
1705   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1706   if (I != OpenMPLocThreadIDMap.end())
1707     LocValue = Address(I->second.DebugLoc, Align);
1708 
1709   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1710   // GetOpenMPThreadID was called before this routine.
1711   if (!LocValue.isValid()) {
1712     // Generate "ident_t .kmpc_loc.addr;"
1713     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1714     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1715     Elem.second.DebugLoc = AI.getPointer();
1716     LocValue = AI;
1717 
1718     if (!Elem.second.ServiceInsertPt)
1719       setLocThreadIdInsertPt(CGF);
1720     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1721     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1722     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1723                              CGF.getTypeSize(IdentQTy));
1724   }
1725 
1726   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1727   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1728   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1729   LValue PSource =
1730       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1731 
1732   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1733   if (OMPDebugLoc == nullptr) {
1734     SmallString<128> Buffer2;
1735     llvm::raw_svector_ostream OS2(Buffer2);
1736     // Build debug location
1737     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1738     OS2 << ";" << PLoc.getFilename() << ";";
1739     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1740       OS2 << FD->getQualifiedNameAsString();
1741     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1742     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1743     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1744   }
1745   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1746   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1747 
1748   // Our callers always pass this to a runtime function, so for
1749   // convenience, go ahead and return a naked pointer.
1750   return LocValue.getPointer();
1751 }
1752 
1753 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1754                                           SourceLocation Loc) {
1755   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1756 
1757   llvm::Value *ThreadID = nullptr;
1758   // Check whether we've already cached a load of the thread id in this
1759   // function.
1760   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1761   if (I != OpenMPLocThreadIDMap.end()) {
1762     ThreadID = I->second.ThreadID;
1763     if (ThreadID != nullptr)
1764       return ThreadID;
1765   }
1766   // If exceptions are enabled, do not use parameter to avoid possible crash.
1767   if (auto *OMPRegionInfo =
1768           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1769     if (OMPRegionInfo->getThreadIDVariable()) {
1770       // Check if this an outlined function with thread id passed as argument.
1771       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1772       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1773       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1774           !CGF.getLangOpts().CXXExceptions ||
1775           CGF.Builder.GetInsertBlock() == TopBlock ||
1776           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1777           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1778               TopBlock ||
1779           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1780               CGF.Builder.GetInsertBlock()) {
1781         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1782         // If value loaded in entry block, cache it and use it everywhere in
1783         // function.
1784         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1785           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1786           Elem.second.ThreadID = ThreadID;
1787         }
1788         return ThreadID;
1789       }
1790     }
1791   }
1792 
1793   // This is not an outlined function region - need to call __kmpc_int32
1794   // kmpc_global_thread_num(ident_t *loc).
1795   // Generate thread id value and cache this value for use across the
1796   // function.
1797   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1798   if (!Elem.second.ServiceInsertPt)
1799     setLocThreadIdInsertPt(CGF);
1800   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1801   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1802   llvm::CallInst *Call = CGF.Builder.CreateCall(
1803       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1804       emitUpdateLocation(CGF, Loc));
1805   Call->setCallingConv(CGF.getRuntimeCC());
1806   Elem.second.ThreadID = Call;
1807   return Call;
1808 }
1809 
1810 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1811   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1812   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1813     clearLocThreadIdInsertPt(CGF);
1814     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1815   }
1816   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1817     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1818       UDRMap.erase(D);
1819     FunctionUDRMap.erase(CGF.CurFn);
1820   }
1821   auto I = FunctionUDMMap.find(CGF.CurFn);
1822   if (I != FunctionUDMMap.end()) {
1823     for(const auto *D : I->second)
1824       UDMMap.erase(D);
1825     FunctionUDMMap.erase(I);
1826   }
1827   LastprivateConditionalToTypes.erase(CGF.CurFn);
1828 }
1829 
1830 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1831   return IdentTy->getPointerTo();
1832 }
1833 
1834 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1835   if (!Kmpc_MicroTy) {
1836     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1837     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1838                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1839     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1840   }
1841   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1842 }
1843 
1844 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1845   llvm::FunctionCallee RTLFn = nullptr;
1846   switch (static_cast<OpenMPRTLFunction>(Function)) {
1847   case OMPRTL__kmpc_fork_call: {
1848     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1849     // microtask, ...);
1850     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1851                                 getKmpc_MicroPointerTy()};
1852     auto *FnTy =
1853         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1854     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1855     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1856       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1857         llvm::LLVMContext &Ctx = F->getContext();
1858         llvm::MDBuilder MDB(Ctx);
1859         // Annotate the callback behavior of the __kmpc_fork_call:
1860         //  - The callback callee is argument number 2 (microtask).
1861         //  - The first two arguments of the callback callee are unknown (-1).
1862         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1863         //    callback callee.
1864         F->addMetadata(
1865             llvm::LLVMContext::MD_callback,
1866             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1867                                         2, {-1, -1},
1868                                         /* VarArgsArePassed */ true)}));
1869       }
1870     }
1871     break;
1872   }
1873   case OMPRTL__kmpc_global_thread_num: {
1874     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1875     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1876     auto *FnTy =
1877         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1878     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1879     break;
1880   }
1881   case OMPRTL__kmpc_threadprivate_cached: {
1882     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1883     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1884     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1885                                 CGM.VoidPtrTy, CGM.SizeTy,
1886                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1887     auto *FnTy =
1888         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1889     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1890     break;
1891   }
1892   case OMPRTL__kmpc_critical: {
1893     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1894     // kmp_critical_name *crit);
1895     llvm::Type *TypeParams[] = {
1896         getIdentTyPointerTy(), CGM.Int32Ty,
1897         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1898     auto *FnTy =
1899         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1900     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1901     break;
1902   }
1903   case OMPRTL__kmpc_critical_with_hint: {
1904     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1905     // kmp_critical_name *crit, uintptr_t hint);
1906     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1907                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1908                                 CGM.IntPtrTy};
1909     auto *FnTy =
1910         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1911     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1912     break;
1913   }
1914   case OMPRTL__kmpc_threadprivate_register: {
1915     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1916     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1917     // typedef void *(*kmpc_ctor)(void *);
1918     auto *KmpcCtorTy =
1919         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1920                                 /*isVarArg*/ false)->getPointerTo();
1921     // typedef void *(*kmpc_cctor)(void *, void *);
1922     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1923     auto *KmpcCopyCtorTy =
1924         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1925                                 /*isVarArg*/ false)
1926             ->getPointerTo();
1927     // typedef void (*kmpc_dtor)(void *);
1928     auto *KmpcDtorTy =
1929         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1930             ->getPointerTo();
1931     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1932                               KmpcCopyCtorTy, KmpcDtorTy};
1933     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1934                                         /*isVarArg*/ false);
1935     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1936     break;
1937   }
1938   case OMPRTL__kmpc_end_critical: {
1939     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1940     // kmp_critical_name *crit);
1941     llvm::Type *TypeParams[] = {
1942         getIdentTyPointerTy(), CGM.Int32Ty,
1943         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1944     auto *FnTy =
1945         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1946     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1947     break;
1948   }
1949   case OMPRTL__kmpc_cancel_barrier: {
1950     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1951     // global_tid);
1952     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1953     auto *FnTy =
1954         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1955     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1956     break;
1957   }
1958   case OMPRTL__kmpc_barrier: {
1959     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1960     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1961     auto *FnTy =
1962         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1963     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1964     break;
1965   }
1966   case OMPRTL__kmpc_for_static_fini: {
1967     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1968     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1969     auto *FnTy =
1970         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1971     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1972     break;
1973   }
1974   case OMPRTL__kmpc_push_num_threads: {
1975     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1976     // kmp_int32 num_threads)
1977     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1978                                 CGM.Int32Ty};
1979     auto *FnTy =
1980         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1981     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1982     break;
1983   }
1984   case OMPRTL__kmpc_serialized_parallel: {
1985     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1986     // global_tid);
1987     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1991     break;
1992   }
1993   case OMPRTL__kmpc_end_serialized_parallel: {
1994     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1995     // global_tid);
1996     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1997     auto *FnTy =
1998         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1999     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
2000     break;
2001   }
2002   case OMPRTL__kmpc_flush: {
2003     // Build void __kmpc_flush(ident_t *loc);
2004     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
2008     break;
2009   }
2010   case OMPRTL__kmpc_master: {
2011     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
2012     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2013     auto *FnTy =
2014         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2015     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2016     break;
2017   }
2018   case OMPRTL__kmpc_end_master: {
2019     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2020     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2021     auto *FnTy =
2022         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2023     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2024     break;
2025   }
2026   case OMPRTL__kmpc_omp_taskyield: {
2027     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2028     // int end_part);
2029     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2030     auto *FnTy =
2031         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2032     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2033     break;
2034   }
2035   case OMPRTL__kmpc_single: {
2036     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2037     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2040     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2041     break;
2042   }
2043   case OMPRTL__kmpc_end_single: {
2044     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2045     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2046     auto *FnTy =
2047         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2048     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2049     break;
2050   }
2051   case OMPRTL__kmpc_omp_task_alloc: {
2052     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2053     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2054     // kmp_routine_entry_t *task_entry);
2055     assert(KmpRoutineEntryPtrTy != nullptr &&
2056            "Type kmp_routine_entry_t must be created.");
2057     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2058                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2059     // Return void * and then cast to particular kmp_task_t type.
2060     auto *FnTy =
2061         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2062     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2063     break;
2064   }
2065   case OMPRTL__kmpc_omp_target_task_alloc: {
2066     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2067     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2068     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2069     assert(KmpRoutineEntryPtrTy != nullptr &&
2070            "Type kmp_routine_entry_t must be created.");
2071     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2072                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2073                                 CGM.Int64Ty};
2074     // Return void * and then cast to particular kmp_task_t type.
2075     auto *FnTy =
2076         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2077     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2078     break;
2079   }
2080   case OMPRTL__kmpc_omp_task: {
2081     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2082     // *new_task);
2083     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2084                                 CGM.VoidPtrTy};
2085     auto *FnTy =
2086         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2087     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2088     break;
2089   }
2090   case OMPRTL__kmpc_copyprivate: {
2091     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2092     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2093     // kmp_int32 didit);
2094     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2095     auto *CpyFnTy =
2096         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2097     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2098                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2099                                 CGM.Int32Ty};
2100     auto *FnTy =
2101         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2102     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2103     break;
2104   }
2105   case OMPRTL__kmpc_reduce: {
2106     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2107     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2108     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2109     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2110     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2111                                                /*isVarArg=*/false);
2112     llvm::Type *TypeParams[] = {
2113         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2114         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2115         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2116     auto *FnTy =
2117         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2118     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2119     break;
2120   }
2121   case OMPRTL__kmpc_reduce_nowait: {
2122     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2123     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2124     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2125     // *lck);
2126     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2127     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2128                                                /*isVarArg=*/false);
2129     llvm::Type *TypeParams[] = {
2130         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2131         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2132         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2133     auto *FnTy =
2134         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2135     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2136     break;
2137   }
2138   case OMPRTL__kmpc_end_reduce: {
2139     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2140     // kmp_critical_name *lck);
2141     llvm::Type *TypeParams[] = {
2142         getIdentTyPointerTy(), CGM.Int32Ty,
2143         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2144     auto *FnTy =
2145         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2146     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2147     break;
2148   }
2149   case OMPRTL__kmpc_end_reduce_nowait: {
2150     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2151     // kmp_critical_name *lck);
2152     llvm::Type *TypeParams[] = {
2153         getIdentTyPointerTy(), CGM.Int32Ty,
2154         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2155     auto *FnTy =
2156         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2157     RTLFn =
2158         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2159     break;
2160   }
2161   case OMPRTL__kmpc_omp_task_begin_if0: {
2162     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2163     // *new_task);
2164     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2165                                 CGM.VoidPtrTy};
2166     auto *FnTy =
2167         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2168     RTLFn =
2169         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2170     break;
2171   }
2172   case OMPRTL__kmpc_omp_task_complete_if0: {
2173     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2174     // *new_task);
2175     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2176                                 CGM.VoidPtrTy};
2177     auto *FnTy =
2178         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2179     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2180                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2181     break;
2182   }
2183   case OMPRTL__kmpc_ordered: {
2184     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2185     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2186     auto *FnTy =
2187         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2188     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2189     break;
2190   }
2191   case OMPRTL__kmpc_end_ordered: {
2192     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2193     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2194     auto *FnTy =
2195         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2196     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2197     break;
2198   }
2199   case OMPRTL__kmpc_omp_taskwait: {
2200     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2201     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2202     auto *FnTy =
2203         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2204     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2205     break;
2206   }
2207   case OMPRTL__kmpc_taskgroup: {
2208     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2209     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2212     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2213     break;
2214   }
2215   case OMPRTL__kmpc_end_taskgroup: {
2216     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2217     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2218     auto *FnTy =
2219         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2220     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2221     break;
2222   }
2223   case OMPRTL__kmpc_push_proc_bind: {
2224     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2225     // int proc_bind)
2226     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2227     auto *FnTy =
2228         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2229     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2230     break;
2231   }
2232   case OMPRTL__kmpc_omp_task_with_deps: {
2233     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2234     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2235     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2236     llvm::Type *TypeParams[] = {
2237         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2238         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2239     auto *FnTy =
2240         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2241     RTLFn =
2242         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2243     break;
2244   }
2245   case OMPRTL__kmpc_omp_wait_deps: {
2246     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2247     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2248     // kmp_depend_info_t *noalias_dep_list);
2249     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2250                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2251                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2252     auto *FnTy =
2253         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2254     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2255     break;
2256   }
2257   case OMPRTL__kmpc_cancellationpoint: {
2258     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2259     // global_tid, kmp_int32 cncl_kind)
2260     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2261     auto *FnTy =
2262         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2263     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2264     break;
2265   }
2266   case OMPRTL__kmpc_cancel: {
2267     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2268     // kmp_int32 cncl_kind)
2269     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2270     auto *FnTy =
2271         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2272     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2273     break;
2274   }
2275   case OMPRTL__kmpc_push_num_teams: {
2276     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2277     // kmp_int32 num_teams, kmp_int32 num_threads)
2278     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2279         CGM.Int32Ty};
2280     auto *FnTy =
2281         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2282     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2283     break;
2284   }
2285   case OMPRTL__kmpc_fork_teams: {
2286     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2287     // microtask, ...);
2288     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2289                                 getKmpc_MicroPointerTy()};
2290     auto *FnTy =
2291         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2292     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2293     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2294       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2295         llvm::LLVMContext &Ctx = F->getContext();
2296         llvm::MDBuilder MDB(Ctx);
2297         // Annotate the callback behavior of the __kmpc_fork_teams:
2298         //  - The callback callee is argument number 2 (microtask).
2299         //  - The first two arguments of the callback callee are unknown (-1).
2300         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2301         //    callback callee.
2302         F->addMetadata(
2303             llvm::LLVMContext::MD_callback,
2304             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2305                                         2, {-1, -1},
2306                                         /* VarArgsArePassed */ true)}));
2307       }
2308     }
2309     break;
2310   }
2311   case OMPRTL__kmpc_taskloop: {
2312     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2313     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2314     // sched, kmp_uint64 grainsize, void *task_dup);
2315     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2316                                 CGM.IntTy,
2317                                 CGM.VoidPtrTy,
2318                                 CGM.IntTy,
2319                                 CGM.Int64Ty->getPointerTo(),
2320                                 CGM.Int64Ty->getPointerTo(),
2321                                 CGM.Int64Ty,
2322                                 CGM.IntTy,
2323                                 CGM.IntTy,
2324                                 CGM.Int64Ty,
2325                                 CGM.VoidPtrTy};
2326     auto *FnTy =
2327         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2328     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2329     break;
2330   }
2331   case OMPRTL__kmpc_doacross_init: {
2332     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2333     // num_dims, struct kmp_dim *dims);
2334     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2335                                 CGM.Int32Ty,
2336                                 CGM.Int32Ty,
2337                                 CGM.VoidPtrTy};
2338     auto *FnTy =
2339         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2340     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2341     break;
2342   }
2343   case OMPRTL__kmpc_doacross_fini: {
2344     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2345     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2346     auto *FnTy =
2347         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2348     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2349     break;
2350   }
2351   case OMPRTL__kmpc_doacross_post: {
2352     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2353     // *vec);
2354     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2355                                 CGM.Int64Ty->getPointerTo()};
2356     auto *FnTy =
2357         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2358     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2359     break;
2360   }
2361   case OMPRTL__kmpc_doacross_wait: {
2362     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2363     // *vec);
2364     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2365                                 CGM.Int64Ty->getPointerTo()};
2366     auto *FnTy =
2367         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2368     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2369     break;
2370   }
2371   case OMPRTL__kmpc_task_reduction_init: {
2372     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2373     // *data);
2374     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2375     auto *FnTy =
2376         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2377     RTLFn =
2378         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2379     break;
2380   }
2381   case OMPRTL__kmpc_task_reduction_get_th_data: {
2382     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2383     // *d);
2384     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2385     auto *FnTy =
2386         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2387     RTLFn = CGM.CreateRuntimeFunction(
2388         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2389     break;
2390   }
2391   case OMPRTL__kmpc_alloc: {
2392     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2393     // al); omp_allocator_handle_t type is void *.
2394     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2395     auto *FnTy =
2396         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2397     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2398     break;
2399   }
2400   case OMPRTL__kmpc_free: {
2401     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2402     // al); omp_allocator_handle_t type is void *.
2403     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2404     auto *FnTy =
2405         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2406     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2407     break;
2408   }
2409   case OMPRTL__kmpc_push_target_tripcount: {
2410     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2411     // size);
2412     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2413     llvm::FunctionType *FnTy =
2414         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2415     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2416     break;
2417   }
2418   case OMPRTL__tgt_target: {
2419     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2420     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2421     // *arg_types);
2422     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2423                                 CGM.VoidPtrTy,
2424                                 CGM.Int32Ty,
2425                                 CGM.VoidPtrPtrTy,
2426                                 CGM.VoidPtrPtrTy,
2427                                 CGM.Int64Ty->getPointerTo(),
2428                                 CGM.Int64Ty->getPointerTo()};
2429     auto *FnTy =
2430         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2431     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2432     break;
2433   }
2434   case OMPRTL__tgt_target_nowait: {
2435     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2436     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2437     // int64_t *arg_types);
2438     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2439                                 CGM.VoidPtrTy,
2440                                 CGM.Int32Ty,
2441                                 CGM.VoidPtrPtrTy,
2442                                 CGM.VoidPtrPtrTy,
2443                                 CGM.Int64Ty->getPointerTo(),
2444                                 CGM.Int64Ty->getPointerTo()};
2445     auto *FnTy =
2446         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2447     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2448     break;
2449   }
2450   case OMPRTL__tgt_target_teams: {
2451     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2452     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2453     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2454     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2455                                 CGM.VoidPtrTy,
2456                                 CGM.Int32Ty,
2457                                 CGM.VoidPtrPtrTy,
2458                                 CGM.VoidPtrPtrTy,
2459                                 CGM.Int64Ty->getPointerTo(),
2460                                 CGM.Int64Ty->getPointerTo(),
2461                                 CGM.Int32Ty,
2462                                 CGM.Int32Ty};
2463     auto *FnTy =
2464         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2465     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2466     break;
2467   }
2468   case OMPRTL__tgt_target_teams_nowait: {
2469     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2470     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2471     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2472     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2473                                 CGM.VoidPtrTy,
2474                                 CGM.Int32Ty,
2475                                 CGM.VoidPtrPtrTy,
2476                                 CGM.VoidPtrPtrTy,
2477                                 CGM.Int64Ty->getPointerTo(),
2478                                 CGM.Int64Ty->getPointerTo(),
2479                                 CGM.Int32Ty,
2480                                 CGM.Int32Ty};
2481     auto *FnTy =
2482         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2483     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2484     break;
2485   }
2486   case OMPRTL__tgt_register_requires: {
2487     // Build void __tgt_register_requires(int64_t flags);
2488     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2489     auto *FnTy =
2490         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2491     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2492     break;
2493   }
2494   case OMPRTL__tgt_target_data_begin: {
2495     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2496     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2497     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2498                                 CGM.Int32Ty,
2499                                 CGM.VoidPtrPtrTy,
2500                                 CGM.VoidPtrPtrTy,
2501                                 CGM.Int64Ty->getPointerTo(),
2502                                 CGM.Int64Ty->getPointerTo()};
2503     auto *FnTy =
2504         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2505     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2506     break;
2507   }
2508   case OMPRTL__tgt_target_data_begin_nowait: {
2509     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2510     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2511     // *arg_types);
2512     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2513                                 CGM.Int32Ty,
2514                                 CGM.VoidPtrPtrTy,
2515                                 CGM.VoidPtrPtrTy,
2516                                 CGM.Int64Ty->getPointerTo(),
2517                                 CGM.Int64Ty->getPointerTo()};
2518     auto *FnTy =
2519         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2520     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2521     break;
2522   }
2523   case OMPRTL__tgt_target_data_end: {
2524     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2525     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2526     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2527                                 CGM.Int32Ty,
2528                                 CGM.VoidPtrPtrTy,
2529                                 CGM.VoidPtrPtrTy,
2530                                 CGM.Int64Ty->getPointerTo(),
2531                                 CGM.Int64Ty->getPointerTo()};
2532     auto *FnTy =
2533         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2534     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2535     break;
2536   }
2537   case OMPRTL__tgt_target_data_end_nowait: {
2538     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2539     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2540     // *arg_types);
2541     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2542                                 CGM.Int32Ty,
2543                                 CGM.VoidPtrPtrTy,
2544                                 CGM.VoidPtrPtrTy,
2545                                 CGM.Int64Ty->getPointerTo(),
2546                                 CGM.Int64Ty->getPointerTo()};
2547     auto *FnTy =
2548         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2549     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2550     break;
2551   }
2552   case OMPRTL__tgt_target_data_update: {
2553     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2554     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2555     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2556                                 CGM.Int32Ty,
2557                                 CGM.VoidPtrPtrTy,
2558                                 CGM.VoidPtrPtrTy,
2559                                 CGM.Int64Ty->getPointerTo(),
2560                                 CGM.Int64Ty->getPointerTo()};
2561     auto *FnTy =
2562         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2563     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2564     break;
2565   }
2566   case OMPRTL__tgt_target_data_update_nowait: {
2567     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2568     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2569     // *arg_types);
2570     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2571                                 CGM.Int32Ty,
2572                                 CGM.VoidPtrPtrTy,
2573                                 CGM.VoidPtrPtrTy,
2574                                 CGM.Int64Ty->getPointerTo(),
2575                                 CGM.Int64Ty->getPointerTo()};
2576     auto *FnTy =
2577         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2578     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2579     break;
2580   }
2581   case OMPRTL__tgt_mapper_num_components: {
2582     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2583     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2584     auto *FnTy =
2585         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2586     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2587     break;
2588   }
2589   case OMPRTL__tgt_push_mapper_component: {
2590     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2591     // *base, void *begin, int64_t size, int64_t type);
2592     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2593                                 CGM.Int64Ty, CGM.Int64Ty};
2594     auto *FnTy =
2595         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2596     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2597     break;
2598   }
2599   }
2600   assert(RTLFn && "Unable to find OpenMP runtime function");
2601   return RTLFn;
2602 }
2603 
2604 llvm::FunctionCallee
2605 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2606   assert((IVSize == 32 || IVSize == 64) &&
2607          "IV size is not compatible with the omp runtime");
2608   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2609                                             : "__kmpc_for_static_init_4u")
2610                                 : (IVSigned ? "__kmpc_for_static_init_8"
2611                                             : "__kmpc_for_static_init_8u");
2612   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2613   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2614   llvm::Type *TypeParams[] = {
2615     getIdentTyPointerTy(),                     // loc
2616     CGM.Int32Ty,                               // tid
2617     CGM.Int32Ty,                               // schedtype
2618     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2619     PtrTy,                                     // p_lower
2620     PtrTy,                                     // p_upper
2621     PtrTy,                                     // p_stride
2622     ITy,                                       // incr
2623     ITy                                        // chunk
2624   };
2625   auto *FnTy =
2626       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2627   return CGM.CreateRuntimeFunction(FnTy, Name);
2628 }
2629 
2630 llvm::FunctionCallee
2631 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2632   assert((IVSize == 32 || IVSize == 64) &&
2633          "IV size is not compatible with the omp runtime");
2634   StringRef Name =
2635       IVSize == 32
2636           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2637           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2638   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2639   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2640                                CGM.Int32Ty,           // tid
2641                                CGM.Int32Ty,           // schedtype
2642                                ITy,                   // lower
2643                                ITy,                   // upper
2644                                ITy,                   // stride
2645                                ITy                    // chunk
2646   };
2647   auto *FnTy =
2648       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2649   return CGM.CreateRuntimeFunction(FnTy, Name);
2650 }
2651 
2652 llvm::FunctionCallee
2653 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2654   assert((IVSize == 32 || IVSize == 64) &&
2655          "IV size is not compatible with the omp runtime");
2656   StringRef Name =
2657       IVSize == 32
2658           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2659           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2660   llvm::Type *TypeParams[] = {
2661       getIdentTyPointerTy(), // loc
2662       CGM.Int32Ty,           // tid
2663   };
2664   auto *FnTy =
2665       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2666   return CGM.CreateRuntimeFunction(FnTy, Name);
2667 }
2668 
2669 llvm::FunctionCallee
2670 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2671   assert((IVSize == 32 || IVSize == 64) &&
2672          "IV size is not compatible with the omp runtime");
2673   StringRef Name =
2674       IVSize == 32
2675           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2676           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2677   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2678   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2679   llvm::Type *TypeParams[] = {
2680     getIdentTyPointerTy(),                     // loc
2681     CGM.Int32Ty,                               // tid
2682     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2683     PtrTy,                                     // p_lower
2684     PtrTy,                                     // p_upper
2685     PtrTy                                      // p_stride
2686   };
2687   auto *FnTy =
2688       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2689   return CGM.CreateRuntimeFunction(FnTy, Name);
2690 }
2691 
2692 /// Obtain information that uniquely identifies a target entry. This
2693 /// consists of the file and device IDs as well as line number associated with
2694 /// the relevant entry source location.
2695 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2696                                      unsigned &DeviceID, unsigned &FileID,
2697                                      unsigned &LineNum) {
2698   SourceManager &SM = C.getSourceManager();
2699 
2700   // The loc should be always valid and have a file ID (the user cannot use
2701   // #pragma directives in macros)
2702 
2703   assert(Loc.isValid() && "Source location is expected to be always valid.");
2704 
2705   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2706   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2707 
2708   llvm::sys::fs::UniqueID ID;
2709   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2710     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2711         << PLoc.getFilename() << EC.message();
2712 
2713   DeviceID = ID.getDevice();
2714   FileID = ID.getFile();
2715   LineNum = PLoc.getLine();
2716 }
2717 
2718 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2719   if (CGM.getLangOpts().OpenMPSimd)
2720     return Address::invalid();
2721   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2722       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2723   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2724               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2725                HasRequiresUnifiedSharedMemory))) {
2726     SmallString<64> PtrName;
2727     {
2728       llvm::raw_svector_ostream OS(PtrName);
2729       OS << CGM.getMangledName(GlobalDecl(VD));
2730       if (!VD->isExternallyVisible()) {
2731         unsigned DeviceID, FileID, Line;
2732         getTargetEntryUniqueInfo(CGM.getContext(),
2733                                  VD->getCanonicalDecl()->getBeginLoc(),
2734                                  DeviceID, FileID, Line);
2735         OS << llvm::format("_%x", FileID);
2736       }
2737       OS << "_decl_tgt_ref_ptr";
2738     }
2739     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2740     if (!Ptr) {
2741       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2742       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2743                                         PtrName);
2744 
2745       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2746       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2747 
2748       if (!CGM.getLangOpts().OpenMPIsDevice)
2749         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2750       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2751     }
2752     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2753   }
2754   return Address::invalid();
2755 }
2756 
2757 llvm::Constant *
2758 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2759   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2760          !CGM.getContext().getTargetInfo().isTLSSupported());
2761   // Lookup the entry, lazily creating it if necessary.
2762   std::string Suffix = getName({"cache", ""});
2763   return getOrCreateInternalVariable(
2764       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2765 }
2766 
2767 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2768                                                 const VarDecl *VD,
2769                                                 Address VDAddr,
2770                                                 SourceLocation Loc) {
2771   if (CGM.getLangOpts().OpenMPUseTLS &&
2772       CGM.getContext().getTargetInfo().isTLSSupported())
2773     return VDAddr;
2774 
2775   llvm::Type *VarTy = VDAddr.getElementType();
2776   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2777                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2778                                                        CGM.Int8PtrTy),
2779                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2780                          getOrCreateThreadPrivateCache(VD)};
2781   return Address(CGF.EmitRuntimeCall(
2782       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2783                  VDAddr.getAlignment());
2784 }
2785 
2786 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2787     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2788     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2789   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2790   // library.
2791   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2792   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2793                       OMPLoc);
2794   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2795   // to register constructor/destructor for variable.
2796   llvm::Value *Args[] = {
2797       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2798       Ctor, CopyCtor, Dtor};
2799   CGF.EmitRuntimeCall(
2800       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2801 }
2802 
2803 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2804     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2805     bool PerformInit, CodeGenFunction *CGF) {
2806   if (CGM.getLangOpts().OpenMPUseTLS &&
2807       CGM.getContext().getTargetInfo().isTLSSupported())
2808     return nullptr;
2809 
2810   VD = VD->getDefinition(CGM.getContext());
2811   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2812     QualType ASTTy = VD->getType();
2813 
2814     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2815     const Expr *Init = VD->getAnyInitializer();
2816     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2817       // Generate function that re-emits the declaration's initializer into the
2818       // threadprivate copy of the variable VD
2819       CodeGenFunction CtorCGF(CGM);
2820       FunctionArgList Args;
2821       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2822                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2823                             ImplicitParamDecl::Other);
2824       Args.push_back(&Dst);
2825 
2826       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2827           CGM.getContext().VoidPtrTy, Args);
2828       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2829       std::string Name = getName({"__kmpc_global_ctor_", ""});
2830       llvm::Function *Fn =
2831           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2832       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2833                             Args, Loc, Loc);
2834       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2835           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2836           CGM.getContext().VoidPtrTy, Dst.getLocation());
2837       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2838       Arg = CtorCGF.Builder.CreateElementBitCast(
2839           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2840       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2841                                /*IsInitializer=*/true);
2842       ArgVal = CtorCGF.EmitLoadOfScalar(
2843           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2844           CGM.getContext().VoidPtrTy, Dst.getLocation());
2845       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2846       CtorCGF.FinishFunction();
2847       Ctor = Fn;
2848     }
2849     if (VD->getType().isDestructedType() != QualType::DK_none) {
2850       // Generate function that emits destructor call for the threadprivate copy
2851       // of the variable VD
2852       CodeGenFunction DtorCGF(CGM);
2853       FunctionArgList Args;
2854       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2855                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2856                             ImplicitParamDecl::Other);
2857       Args.push_back(&Dst);
2858 
2859       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2860           CGM.getContext().VoidTy, Args);
2861       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2862       std::string Name = getName({"__kmpc_global_dtor_", ""});
2863       llvm::Function *Fn =
2864           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2865       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2866       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2867                             Loc, Loc);
2868       // Create a scope with an artificial location for the body of this function.
2869       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2870       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2871           DtorCGF.GetAddrOfLocalVar(&Dst),
2872           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2873       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2874                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2875                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2876       DtorCGF.FinishFunction();
2877       Dtor = Fn;
2878     }
2879     // Do not emit init function if it is not required.
2880     if (!Ctor && !Dtor)
2881       return nullptr;
2882 
2883     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2884     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2885                                                /*isVarArg=*/false)
2886                            ->getPointerTo();
2887     // Copying constructor for the threadprivate variable.
2888     // Must be NULL - reserved by runtime, but currently it requires that this
2889     // parameter is always NULL. Otherwise it fires assertion.
2890     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2891     if (Ctor == nullptr) {
2892       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2893                                              /*isVarArg=*/false)
2894                          ->getPointerTo();
2895       Ctor = llvm::Constant::getNullValue(CtorTy);
2896     }
2897     if (Dtor == nullptr) {
2898       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2899                                              /*isVarArg=*/false)
2900                          ->getPointerTo();
2901       Dtor = llvm::Constant::getNullValue(DtorTy);
2902     }
2903     if (!CGF) {
2904       auto *InitFunctionTy =
2905           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2906       std::string Name = getName({"__omp_threadprivate_init_", ""});
2907       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2908           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2909       CodeGenFunction InitCGF(CGM);
2910       FunctionArgList ArgList;
2911       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2912                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2913                             Loc, Loc);
2914       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2915       InitCGF.FinishFunction();
2916       return InitFunction;
2917     }
2918     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2919   }
2920   return nullptr;
2921 }
2922 
2923 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2924                                                      llvm::GlobalVariable *Addr,
2925                                                      bool PerformInit) {
2926   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2927       !CGM.getLangOpts().OpenMPIsDevice)
2928     return false;
2929   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2930       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2931   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2932       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2933        HasRequiresUnifiedSharedMemory))
2934     return CGM.getLangOpts().OpenMPIsDevice;
2935   VD = VD->getDefinition(CGM.getContext());
2936   assert(VD && "Unknown VarDecl");
2937 
2938   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2939     return CGM.getLangOpts().OpenMPIsDevice;
2940 
2941   QualType ASTTy = VD->getType();
2942   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2943 
2944   // Produce the unique prefix to identify the new target regions. We use
2945   // the source location of the variable declaration which we know to not
2946   // conflict with any target region.
2947   unsigned DeviceID;
2948   unsigned FileID;
2949   unsigned Line;
2950   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2951   SmallString<128> Buffer, Out;
2952   {
2953     llvm::raw_svector_ostream OS(Buffer);
2954     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2955        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2956   }
2957 
2958   const Expr *Init = VD->getAnyInitializer();
2959   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2960     llvm::Constant *Ctor;
2961     llvm::Constant *ID;
2962     if (CGM.getLangOpts().OpenMPIsDevice) {
2963       // Generate function that re-emits the declaration's initializer into
2964       // the threadprivate copy of the variable VD
2965       CodeGenFunction CtorCGF(CGM);
2966 
2967       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2968       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2969       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2970           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2971       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2972       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2973                             FunctionArgList(), Loc, Loc);
2974       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2975       CtorCGF.EmitAnyExprToMem(Init,
2976                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2977                                Init->getType().getQualifiers(),
2978                                /*IsInitializer=*/true);
2979       CtorCGF.FinishFunction();
2980       Ctor = Fn;
2981       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2982       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2983     } else {
2984       Ctor = new llvm::GlobalVariable(
2985           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2986           llvm::GlobalValue::PrivateLinkage,
2987           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2988       ID = Ctor;
2989     }
2990 
2991     // Register the information for the entry associated with the constructor.
2992     Out.clear();
2993     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2994         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2995         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2996   }
2997   if (VD->getType().isDestructedType() != QualType::DK_none) {
2998     llvm::Constant *Dtor;
2999     llvm::Constant *ID;
3000     if (CGM.getLangOpts().OpenMPIsDevice) {
3001       // Generate function that emits destructor call for the threadprivate
3002       // copy of the variable VD
3003       CodeGenFunction DtorCGF(CGM);
3004 
3005       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
3006       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3007       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
3008           FTy, Twine(Buffer, "_dtor"), FI, Loc);
3009       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
3010       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
3011                             FunctionArgList(), Loc, Loc);
3012       // Create a scope with an artificial location for the body of this
3013       // function.
3014       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
3015       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
3016                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3017                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3018       DtorCGF.FinishFunction();
3019       Dtor = Fn;
3020       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3021       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3022     } else {
3023       Dtor = new llvm::GlobalVariable(
3024           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3025           llvm::GlobalValue::PrivateLinkage,
3026           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3027       ID = Dtor;
3028     }
3029     // Register the information for the entry associated with the destructor.
3030     Out.clear();
3031     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3032         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3033         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3034   }
3035   return CGM.getLangOpts().OpenMPIsDevice;
3036 }
3037 
3038 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3039                                                           QualType VarType,
3040                                                           StringRef Name) {
3041   std::string Suffix = getName({"artificial", ""});
3042   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3043   llvm::Value *GAddr =
3044       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3045   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3046       CGM.getTarget().isTLSSupported()) {
3047     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3048     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3049   }
3050   std::string CacheSuffix = getName({"cache", ""});
3051   llvm::Value *Args[] = {
3052       emitUpdateLocation(CGF, SourceLocation()),
3053       getThreadID(CGF, SourceLocation()),
3054       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3055       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3056                                 /*isSigned=*/false),
3057       getOrCreateInternalVariable(
3058           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3059   return Address(
3060       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3061           CGF.EmitRuntimeCall(
3062               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3063           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3064       CGM.getContext().getTypeAlignInChars(VarType));
3065 }
3066 
3067 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3068                                    const RegionCodeGenTy &ThenGen,
3069                                    const RegionCodeGenTy &ElseGen) {
3070   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3071 
3072   // If the condition constant folds and can be elided, try to avoid emitting
3073   // the condition and the dead arm of the if/else.
3074   bool CondConstant;
3075   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3076     if (CondConstant)
3077       ThenGen(CGF);
3078     else
3079       ElseGen(CGF);
3080     return;
3081   }
3082 
3083   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3084   // emit the conditional branch.
3085   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3086   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3087   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3088   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3089 
3090   // Emit the 'then' code.
3091   CGF.EmitBlock(ThenBlock);
3092   ThenGen(CGF);
3093   CGF.EmitBranch(ContBlock);
3094   // Emit the 'else' code if present.
3095   // There is no need to emit line number for unconditional branch.
3096   (void)ApplyDebugLocation::CreateEmpty(CGF);
3097   CGF.EmitBlock(ElseBlock);
3098   ElseGen(CGF);
3099   // There is no need to emit line number for unconditional branch.
3100   (void)ApplyDebugLocation::CreateEmpty(CGF);
3101   CGF.EmitBranch(ContBlock);
3102   // Emit the continuation block for code after the if.
3103   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3104 }
3105 
3106 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3107                                        llvm::Function *OutlinedFn,
3108                                        ArrayRef<llvm::Value *> CapturedVars,
3109                                        const Expr *IfCond) {
3110   if (!CGF.HaveInsertPoint())
3111     return;
3112   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3113   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3114                                                      PrePostActionTy &) {
3115     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3116     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3117     llvm::Value *Args[] = {
3118         RTLoc,
3119         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3120         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3121     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3122     RealArgs.append(std::begin(Args), std::end(Args));
3123     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3124 
3125     llvm::FunctionCallee RTLFn =
3126         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3127     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3128   };
3129   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3130                                                           PrePostActionTy &) {
3131     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3132     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3133     // Build calls:
3134     // __kmpc_serialized_parallel(&Loc, GTid);
3135     llvm::Value *Args[] = {RTLoc, ThreadID};
3136     CGF.EmitRuntimeCall(
3137         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3138 
3139     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3140     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3141     Address ZeroAddrBound =
3142         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3143                                          /*Name=*/".bound.zero.addr");
3144     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3145     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3146     // ThreadId for serialized parallels is 0.
3147     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3148     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3149     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3150     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3151 
3152     // __kmpc_end_serialized_parallel(&Loc, GTid);
3153     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3154     CGF.EmitRuntimeCall(
3155         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3156         EndArgs);
3157   };
3158   if (IfCond) {
3159     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3160   } else {
3161     RegionCodeGenTy ThenRCG(ThenGen);
3162     ThenRCG(CGF);
3163   }
3164 }
3165 
3166 // If we're inside an (outlined) parallel region, use the region info's
3167 // thread-ID variable (it is passed in a first argument of the outlined function
3168 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3169 // regular serial code region, get thread ID by calling kmp_int32
3170 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3171 // return the address of that temp.
3172 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3173                                              SourceLocation Loc) {
3174   if (auto *OMPRegionInfo =
3175           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3176     if (OMPRegionInfo->getThreadIDVariable())
3177       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3178 
3179   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3180   QualType Int32Ty =
3181       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3182   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3183   CGF.EmitStoreOfScalar(ThreadID,
3184                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3185 
3186   return ThreadIDTemp;
3187 }
3188 
3189 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3190     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3191   SmallString<256> Buffer;
3192   llvm::raw_svector_ostream Out(Buffer);
3193   Out << Name;
3194   StringRef RuntimeName = Out.str();
3195   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3196   if (Elem.second) {
3197     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3198            "OMP internal variable has different type than requested");
3199     return &*Elem.second;
3200   }
3201 
3202   return Elem.second = new llvm::GlobalVariable(
3203              CGM.getModule(), Ty, /*IsConstant*/ false,
3204              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3205              Elem.first(), /*InsertBefore=*/nullptr,
3206              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3207 }
3208 
3209 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3210   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3211   std::string Name = getName({Prefix, "var"});
3212   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3213 }
3214 
3215 namespace {
3216 /// Common pre(post)-action for different OpenMP constructs.
3217 class CommonActionTy final : public PrePostActionTy {
3218   llvm::FunctionCallee EnterCallee;
3219   ArrayRef<llvm::Value *> EnterArgs;
3220   llvm::FunctionCallee ExitCallee;
3221   ArrayRef<llvm::Value *> ExitArgs;
3222   bool Conditional;
3223   llvm::BasicBlock *ContBlock = nullptr;
3224 
3225 public:
3226   CommonActionTy(llvm::FunctionCallee EnterCallee,
3227                  ArrayRef<llvm::Value *> EnterArgs,
3228                  llvm::FunctionCallee ExitCallee,
3229                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3230       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3231         ExitArgs(ExitArgs), Conditional(Conditional) {}
3232   void Enter(CodeGenFunction &CGF) override {
3233     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3234     if (Conditional) {
3235       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3236       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3237       ContBlock = CGF.createBasicBlock("omp_if.end");
3238       // Generate the branch (If-stmt)
3239       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3240       CGF.EmitBlock(ThenBlock);
3241     }
3242   }
3243   void Done(CodeGenFunction &CGF) {
3244     // Emit the rest of blocks/branches
3245     CGF.EmitBranch(ContBlock);
3246     CGF.EmitBlock(ContBlock, true);
3247   }
3248   void Exit(CodeGenFunction &CGF) override {
3249     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3250   }
3251 };
3252 } // anonymous namespace
3253 
3254 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3255                                          StringRef CriticalName,
3256                                          const RegionCodeGenTy &CriticalOpGen,
3257                                          SourceLocation Loc, const Expr *Hint) {
3258   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3259   // CriticalOpGen();
3260   // __kmpc_end_critical(ident_t *, gtid, Lock);
3261   // Prepare arguments and build a call to __kmpc_critical
3262   if (!CGF.HaveInsertPoint())
3263     return;
3264   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3265                          getCriticalRegionLock(CriticalName)};
3266   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3267                                                 std::end(Args));
3268   if (Hint) {
3269     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3270         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3271   }
3272   CommonActionTy Action(
3273       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3274                                  : OMPRTL__kmpc_critical),
3275       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3276   CriticalOpGen.setAction(Action);
3277   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3278 }
3279 
3280 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3281                                        const RegionCodeGenTy &MasterOpGen,
3282                                        SourceLocation Loc) {
3283   if (!CGF.HaveInsertPoint())
3284     return;
3285   // if(__kmpc_master(ident_t *, gtid)) {
3286   //   MasterOpGen();
3287   //   __kmpc_end_master(ident_t *, gtid);
3288   // }
3289   // Prepare arguments and build a call to __kmpc_master
3290   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3291   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3292                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3293                         /*Conditional=*/true);
3294   MasterOpGen.setAction(Action);
3295   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3296   Action.Done(CGF);
3297 }
3298 
3299 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3300                                         SourceLocation Loc) {
3301   if (!CGF.HaveInsertPoint())
3302     return;
3303   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3304   if (OMPBuilder) {
3305     OMPBuilder->CreateTaskyield(CGF.Builder);
3306   } else {
3307     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3308     llvm::Value *Args[] = {
3309         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3310         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3311     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield),
3312                         Args);
3313   }
3314 
3315   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3316     Region->emitUntiedSwitch(CGF);
3317 }
3318 
3319 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3320                                           const RegionCodeGenTy &TaskgroupOpGen,
3321                                           SourceLocation Loc) {
3322   if (!CGF.HaveInsertPoint())
3323     return;
3324   // __kmpc_taskgroup(ident_t *, gtid);
3325   // TaskgroupOpGen();
3326   // __kmpc_end_taskgroup(ident_t *, gtid);
3327   // Prepare arguments and build a call to __kmpc_taskgroup
3328   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3329   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3330                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3331                         Args);
3332   TaskgroupOpGen.setAction(Action);
3333   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3334 }
3335 
3336 /// Given an array of pointers to variables, project the address of a
3337 /// given variable.
3338 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3339                                       unsigned Index, const VarDecl *Var) {
3340   // Pull out the pointer to the variable.
3341   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3342   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3343 
3344   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3345   Addr = CGF.Builder.CreateElementBitCast(
3346       Addr, CGF.ConvertTypeForMem(Var->getType()));
3347   return Addr;
3348 }
3349 
3350 static llvm::Value *emitCopyprivateCopyFunction(
3351     CodeGenModule &CGM, llvm::Type *ArgsType,
3352     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3353     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3354     SourceLocation Loc) {
3355   ASTContext &C = CGM.getContext();
3356   // void copy_func(void *LHSArg, void *RHSArg);
3357   FunctionArgList Args;
3358   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3359                            ImplicitParamDecl::Other);
3360   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3361                            ImplicitParamDecl::Other);
3362   Args.push_back(&LHSArg);
3363   Args.push_back(&RHSArg);
3364   const auto &CGFI =
3365       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3366   std::string Name =
3367       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3368   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3369                                     llvm::GlobalValue::InternalLinkage, Name,
3370                                     &CGM.getModule());
3371   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3372   Fn->setDoesNotRecurse();
3373   CodeGenFunction CGF(CGM);
3374   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3375   // Dest = (void*[n])(LHSArg);
3376   // Src = (void*[n])(RHSArg);
3377   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3378       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3379       ArgsType), CGF.getPointerAlign());
3380   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3381       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3382       ArgsType), CGF.getPointerAlign());
3383   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3384   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3385   // ...
3386   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3387   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3388     const auto *DestVar =
3389         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3390     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3391 
3392     const auto *SrcVar =
3393         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3394     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3395 
3396     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3397     QualType Type = VD->getType();
3398     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3399   }
3400   CGF.FinishFunction();
3401   return Fn;
3402 }
3403 
3404 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3405                                        const RegionCodeGenTy &SingleOpGen,
3406                                        SourceLocation Loc,
3407                                        ArrayRef<const Expr *> CopyprivateVars,
3408                                        ArrayRef<const Expr *> SrcExprs,
3409                                        ArrayRef<const Expr *> DstExprs,
3410                                        ArrayRef<const Expr *> AssignmentOps) {
3411   if (!CGF.HaveInsertPoint())
3412     return;
3413   assert(CopyprivateVars.size() == SrcExprs.size() &&
3414          CopyprivateVars.size() == DstExprs.size() &&
3415          CopyprivateVars.size() == AssignmentOps.size());
3416   ASTContext &C = CGM.getContext();
3417   // int32 did_it = 0;
3418   // if(__kmpc_single(ident_t *, gtid)) {
3419   //   SingleOpGen();
3420   //   __kmpc_end_single(ident_t *, gtid);
3421   //   did_it = 1;
3422   // }
3423   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3424   // <copy_func>, did_it);
3425 
3426   Address DidIt = Address::invalid();
3427   if (!CopyprivateVars.empty()) {
3428     // int32 did_it = 0;
3429     QualType KmpInt32Ty =
3430         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3431     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3432     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3433   }
3434   // Prepare arguments and build a call to __kmpc_single
3435   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3436   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3437                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3438                         /*Conditional=*/true);
3439   SingleOpGen.setAction(Action);
3440   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3441   if (DidIt.isValid()) {
3442     // did_it = 1;
3443     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3444   }
3445   Action.Done(CGF);
3446   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3447   // <copy_func>, did_it);
3448   if (DidIt.isValid()) {
3449     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3450     QualType CopyprivateArrayTy = C.getConstantArrayType(
3451         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3452         /*IndexTypeQuals=*/0);
3453     // Create a list of all private variables for copyprivate.
3454     Address CopyprivateList =
3455         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3456     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3457       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3458       CGF.Builder.CreateStore(
3459           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3460               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3461               CGF.VoidPtrTy),
3462           Elem);
3463     }
3464     // Build function that copies private values from single region to all other
3465     // threads in the corresponding parallel region.
3466     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3467         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3468         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3469     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3470     Address CL =
3471       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3472                                                       CGF.VoidPtrTy);
3473     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3474     llvm::Value *Args[] = {
3475         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3476         getThreadID(CGF, Loc),        // i32 <gtid>
3477         BufSize,                      // size_t <buf_size>
3478         CL.getPointer(),              // void *<copyprivate list>
3479         CpyFn,                        // void (*) (void *, void *) <copy_func>
3480         DidItVal                      // i32 did_it
3481     };
3482     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3483   }
3484 }
3485 
3486 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3487                                         const RegionCodeGenTy &OrderedOpGen,
3488                                         SourceLocation Loc, bool IsThreads) {
3489   if (!CGF.HaveInsertPoint())
3490     return;
3491   // __kmpc_ordered(ident_t *, gtid);
3492   // OrderedOpGen();
3493   // __kmpc_end_ordered(ident_t *, gtid);
3494   // Prepare arguments and build a call to __kmpc_ordered
3495   if (IsThreads) {
3496     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3497     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3498                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3499                           Args);
3500     OrderedOpGen.setAction(Action);
3501     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3502     return;
3503   }
3504   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3505 }
3506 
3507 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3508   unsigned Flags;
3509   if (Kind == OMPD_for)
3510     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3511   else if (Kind == OMPD_sections)
3512     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3513   else if (Kind == OMPD_single)
3514     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3515   else if (Kind == OMPD_barrier)
3516     Flags = OMP_IDENT_BARRIER_EXPL;
3517   else
3518     Flags = OMP_IDENT_BARRIER_IMPL;
3519   return Flags;
3520 }
3521 
3522 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3523     CodeGenFunction &CGF, const OMPLoopDirective &S,
3524     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3525   // Check if the loop directive is actually a doacross loop directive. In this
3526   // case choose static, 1 schedule.
3527   if (llvm::any_of(
3528           S.getClausesOfKind<OMPOrderedClause>(),
3529           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3530     ScheduleKind = OMPC_SCHEDULE_static;
3531     // Chunk size is 1 in this case.
3532     llvm::APInt ChunkSize(32, 1);
3533     ChunkExpr = IntegerLiteral::Create(
3534         CGF.getContext(), ChunkSize,
3535         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3536         SourceLocation());
3537   }
3538 }
3539 
3540 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3541                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3542                                       bool ForceSimpleCall) {
3543   // Check if we should use the OMPBuilder
3544   auto *OMPRegionInfo =
3545       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3546   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3547   if (OMPBuilder) {
3548     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3549         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3550     return;
3551   }
3552 
3553   if (!CGF.HaveInsertPoint())
3554     return;
3555   // Build call __kmpc_cancel_barrier(loc, thread_id);
3556   // Build call __kmpc_barrier(loc, thread_id);
3557   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3558   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3559   // thread_id);
3560   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3561                          getThreadID(CGF, Loc)};
3562   if (OMPRegionInfo) {
3563     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3564       llvm::Value *Result = CGF.EmitRuntimeCall(
3565           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3566       if (EmitChecks) {
3567         // if (__kmpc_cancel_barrier()) {
3568         //   exit from construct;
3569         // }
3570         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3571         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3572         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3573         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3574         CGF.EmitBlock(ExitBB);
3575         //   exit from construct;
3576         CodeGenFunction::JumpDest CancelDestination =
3577             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3578         CGF.EmitBranchThroughCleanup(CancelDestination);
3579         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3580       }
3581       return;
3582     }
3583   }
3584   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3585 }
3586 
3587 /// Map the OpenMP loop schedule to the runtime enumeration.
3588 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3589                                           bool Chunked, bool Ordered) {
3590   switch (ScheduleKind) {
3591   case OMPC_SCHEDULE_static:
3592     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3593                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3594   case OMPC_SCHEDULE_dynamic:
3595     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3596   case OMPC_SCHEDULE_guided:
3597     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3598   case OMPC_SCHEDULE_runtime:
3599     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3600   case OMPC_SCHEDULE_auto:
3601     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3602   case OMPC_SCHEDULE_unknown:
3603     assert(!Chunked && "chunk was specified but schedule kind not known");
3604     return Ordered ? OMP_ord_static : OMP_sch_static;
3605   }
3606   llvm_unreachable("Unexpected runtime schedule");
3607 }
3608 
3609 /// Map the OpenMP distribute schedule to the runtime enumeration.
3610 static OpenMPSchedType
3611 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3612   // only static is allowed for dist_schedule
3613   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3614 }
3615 
3616 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3617                                          bool Chunked) const {
3618   OpenMPSchedType Schedule =
3619       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3620   return Schedule == OMP_sch_static;
3621 }
3622 
3623 bool CGOpenMPRuntime::isStaticNonchunked(
3624     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3625   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3626   return Schedule == OMP_dist_sch_static;
3627 }
3628 
3629 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3630                                       bool Chunked) const {
3631   OpenMPSchedType Schedule =
3632       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3633   return Schedule == OMP_sch_static_chunked;
3634 }
3635 
3636 bool CGOpenMPRuntime::isStaticChunked(
3637     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3638   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3639   return Schedule == OMP_dist_sch_static_chunked;
3640 }
3641 
3642 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3643   OpenMPSchedType Schedule =
3644       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3645   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3646   return Schedule != OMP_sch_static;
3647 }
3648 
3649 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3650                                   OpenMPScheduleClauseModifier M1,
3651                                   OpenMPScheduleClauseModifier M2) {
3652   int Modifier = 0;
3653   switch (M1) {
3654   case OMPC_SCHEDULE_MODIFIER_monotonic:
3655     Modifier = OMP_sch_modifier_monotonic;
3656     break;
3657   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3658     Modifier = OMP_sch_modifier_nonmonotonic;
3659     break;
3660   case OMPC_SCHEDULE_MODIFIER_simd:
3661     if (Schedule == OMP_sch_static_chunked)
3662       Schedule = OMP_sch_static_balanced_chunked;
3663     break;
3664   case OMPC_SCHEDULE_MODIFIER_last:
3665   case OMPC_SCHEDULE_MODIFIER_unknown:
3666     break;
3667   }
3668   switch (M2) {
3669   case OMPC_SCHEDULE_MODIFIER_monotonic:
3670     Modifier = OMP_sch_modifier_monotonic;
3671     break;
3672   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3673     Modifier = OMP_sch_modifier_nonmonotonic;
3674     break;
3675   case OMPC_SCHEDULE_MODIFIER_simd:
3676     if (Schedule == OMP_sch_static_chunked)
3677       Schedule = OMP_sch_static_balanced_chunked;
3678     break;
3679   case OMPC_SCHEDULE_MODIFIER_last:
3680   case OMPC_SCHEDULE_MODIFIER_unknown:
3681     break;
3682   }
3683   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3684   // If the static schedule kind is specified or if the ordered clause is
3685   // specified, and if the nonmonotonic modifier is not specified, the effect is
3686   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3687   // modifier is specified, the effect is as if the nonmonotonic modifier is
3688   // specified.
3689   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3690     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3691           Schedule == OMP_sch_static_balanced_chunked ||
3692           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3693           Schedule == OMP_dist_sch_static_chunked ||
3694           Schedule == OMP_dist_sch_static))
3695       Modifier = OMP_sch_modifier_nonmonotonic;
3696   }
3697   return Schedule | Modifier;
3698 }
3699 
3700 void CGOpenMPRuntime::emitForDispatchInit(
3701     CodeGenFunction &CGF, SourceLocation Loc,
3702     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3703     bool Ordered, const DispatchRTInput &DispatchValues) {
3704   if (!CGF.HaveInsertPoint())
3705     return;
3706   OpenMPSchedType Schedule = getRuntimeSchedule(
3707       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3708   assert(Ordered ||
3709          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3710           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3711           Schedule != OMP_sch_static_balanced_chunked));
3712   // Call __kmpc_dispatch_init(
3713   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3714   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3715   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3716 
3717   // If the Chunk was not specified in the clause - use default value 1.
3718   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3719                                             : CGF.Builder.getIntN(IVSize, 1);
3720   llvm::Value *Args[] = {
3721       emitUpdateLocation(CGF, Loc),
3722       getThreadID(CGF, Loc),
3723       CGF.Builder.getInt32(addMonoNonMonoModifier(
3724           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3725       DispatchValues.LB,                                     // Lower
3726       DispatchValues.UB,                                     // Upper
3727       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3728       Chunk                                                  // Chunk
3729   };
3730   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3731 }
3732 
3733 static void emitForStaticInitCall(
3734     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3735     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3736     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3737     const CGOpenMPRuntime::StaticRTInput &Values) {
3738   if (!CGF.HaveInsertPoint())
3739     return;
3740 
3741   assert(!Values.Ordered);
3742   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3743          Schedule == OMP_sch_static_balanced_chunked ||
3744          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3745          Schedule == OMP_dist_sch_static ||
3746          Schedule == OMP_dist_sch_static_chunked);
3747 
3748   // Call __kmpc_for_static_init(
3749   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3750   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3751   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3752   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3753   llvm::Value *Chunk = Values.Chunk;
3754   if (Chunk == nullptr) {
3755     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3756             Schedule == OMP_dist_sch_static) &&
3757            "expected static non-chunked schedule");
3758     // If the Chunk was not specified in the clause - use default value 1.
3759     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3760   } else {
3761     assert((Schedule == OMP_sch_static_chunked ||
3762             Schedule == OMP_sch_static_balanced_chunked ||
3763             Schedule == OMP_ord_static_chunked ||
3764             Schedule == OMP_dist_sch_static_chunked) &&
3765            "expected static chunked schedule");
3766   }
3767   llvm::Value *Args[] = {
3768       UpdateLocation,
3769       ThreadId,
3770       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3771                                                   M2)), // Schedule type
3772       Values.IL.getPointer(),                           // &isLastIter
3773       Values.LB.getPointer(),                           // &LB
3774       Values.UB.getPointer(),                           // &UB
3775       Values.ST.getPointer(),                           // &Stride
3776       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3777       Chunk                                             // Chunk
3778   };
3779   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3780 }
3781 
3782 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3783                                         SourceLocation Loc,
3784                                         OpenMPDirectiveKind DKind,
3785                                         const OpenMPScheduleTy &ScheduleKind,
3786                                         const StaticRTInput &Values) {
3787   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3788       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3789   assert(isOpenMPWorksharingDirective(DKind) &&
3790          "Expected loop-based or sections-based directive.");
3791   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3792                                              isOpenMPLoopDirective(DKind)
3793                                                  ? OMP_IDENT_WORK_LOOP
3794                                                  : OMP_IDENT_WORK_SECTIONS);
3795   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3796   llvm::FunctionCallee StaticInitFunction =
3797       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3798   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3799   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3800                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3801 }
3802 
3803 void CGOpenMPRuntime::emitDistributeStaticInit(
3804     CodeGenFunction &CGF, SourceLocation Loc,
3805     OpenMPDistScheduleClauseKind SchedKind,
3806     const CGOpenMPRuntime::StaticRTInput &Values) {
3807   OpenMPSchedType ScheduleNum =
3808       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3809   llvm::Value *UpdatedLocation =
3810       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3811   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3812   llvm::FunctionCallee StaticInitFunction =
3813       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3814   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3815                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3816                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3817 }
3818 
3819 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3820                                           SourceLocation Loc,
3821                                           OpenMPDirectiveKind DKind) {
3822   if (!CGF.HaveInsertPoint())
3823     return;
3824   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3825   llvm::Value *Args[] = {
3826       emitUpdateLocation(CGF, Loc,
3827                          isOpenMPDistributeDirective(DKind)
3828                              ? OMP_IDENT_WORK_DISTRIBUTE
3829                              : isOpenMPLoopDirective(DKind)
3830                                    ? OMP_IDENT_WORK_LOOP
3831                                    : OMP_IDENT_WORK_SECTIONS),
3832       getThreadID(CGF, Loc)};
3833   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
3834   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3835                       Args);
3836 }
3837 
3838 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3839                                                  SourceLocation Loc,
3840                                                  unsigned IVSize,
3841                                                  bool IVSigned) {
3842   if (!CGF.HaveInsertPoint())
3843     return;
3844   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3845   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3846   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3847 }
3848 
3849 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3850                                           SourceLocation Loc, unsigned IVSize,
3851                                           bool IVSigned, Address IL,
3852                                           Address LB, Address UB,
3853                                           Address ST) {
3854   // Call __kmpc_dispatch_next(
3855   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3856   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3857   //          kmp_int[32|64] *p_stride);
3858   llvm::Value *Args[] = {
3859       emitUpdateLocation(CGF, Loc),
3860       getThreadID(CGF, Loc),
3861       IL.getPointer(), // &isLastIter
3862       LB.getPointer(), // &Lower
3863       UB.getPointer(), // &Upper
3864       ST.getPointer()  // &Stride
3865   };
3866   llvm::Value *Call =
3867       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3868   return CGF.EmitScalarConversion(
3869       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3870       CGF.getContext().BoolTy, Loc);
3871 }
3872 
3873 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3874                                            llvm::Value *NumThreads,
3875                                            SourceLocation Loc) {
3876   if (!CGF.HaveInsertPoint())
3877     return;
3878   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3879   llvm::Value *Args[] = {
3880       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3881       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3882   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3883                       Args);
3884 }
3885 
3886 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3887                                          ProcBindKind ProcBind,
3888                                          SourceLocation Loc) {
3889   if (!CGF.HaveInsertPoint())
3890     return;
3891   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3892   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3893   llvm::Value *Args[] = {
3894       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3895       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3896   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3897 }
3898 
3899 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3900                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
3901   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3902   if (OMPBuilder) {
3903     OMPBuilder->CreateFlush(CGF.Builder);
3904   } else {
3905     if (!CGF.HaveInsertPoint())
3906       return;
3907     // Build call void __kmpc_flush(ident_t *loc)
3908     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3909                         emitUpdateLocation(CGF, Loc));
3910   }
3911 }
3912 
3913 namespace {
3914 /// Indexes of fields for type kmp_task_t.
3915 enum KmpTaskTFields {
3916   /// List of shared variables.
3917   KmpTaskTShareds,
3918   /// Task routine.
3919   KmpTaskTRoutine,
3920   /// Partition id for the untied tasks.
3921   KmpTaskTPartId,
3922   /// Function with call of destructors for private variables.
3923   Data1,
3924   /// Task priority.
3925   Data2,
3926   /// (Taskloops only) Lower bound.
3927   KmpTaskTLowerBound,
3928   /// (Taskloops only) Upper bound.
3929   KmpTaskTUpperBound,
3930   /// (Taskloops only) Stride.
3931   KmpTaskTStride,
3932   /// (Taskloops only) Is last iteration flag.
3933   KmpTaskTLastIter,
3934   /// (Taskloops only) Reduction data.
3935   KmpTaskTReductions,
3936 };
3937 } // anonymous namespace
3938 
3939 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3940   return OffloadEntriesTargetRegion.empty() &&
3941          OffloadEntriesDeviceGlobalVar.empty();
3942 }
3943 
3944 /// Initialize target region entry.
3945 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3946     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3947                                     StringRef ParentName, unsigned LineNum,
3948                                     unsigned Order) {
3949   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3950                                              "only required for the device "
3951                                              "code generation.");
3952   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3953       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3954                                    OMPTargetRegionEntryTargetRegion);
3955   ++OffloadingEntriesNum;
3956 }
3957 
3958 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3959     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3960                                   StringRef ParentName, unsigned LineNum,
3961                                   llvm::Constant *Addr, llvm::Constant *ID,
3962                                   OMPTargetRegionEntryKind Flags) {
3963   // If we are emitting code for a target, the entry is already initialized,
3964   // only has to be registered.
3965   if (CGM.getLangOpts().OpenMPIsDevice) {
3966     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3967       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3968           DiagnosticsEngine::Error,
3969           "Unable to find target region on line '%0' in the device code.");
3970       CGM.getDiags().Report(DiagID) << LineNum;
3971       return;
3972     }
3973     auto &Entry =
3974         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3975     assert(Entry.isValid() && "Entry not initialized!");
3976     Entry.setAddress(Addr);
3977     Entry.setID(ID);
3978     Entry.setFlags(Flags);
3979   } else {
3980     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3981     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3982     ++OffloadingEntriesNum;
3983   }
3984 }
3985 
3986 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3987     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3988     unsigned LineNum) const {
3989   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3990   if (PerDevice == OffloadEntriesTargetRegion.end())
3991     return false;
3992   auto PerFile = PerDevice->second.find(FileID);
3993   if (PerFile == PerDevice->second.end())
3994     return false;
3995   auto PerParentName = PerFile->second.find(ParentName);
3996   if (PerParentName == PerFile->second.end())
3997     return false;
3998   auto PerLine = PerParentName->second.find(LineNum);
3999   if (PerLine == PerParentName->second.end())
4000     return false;
4001   // Fail if this entry is already registered.
4002   if (PerLine->second.getAddress() || PerLine->second.getID())
4003     return false;
4004   return true;
4005 }
4006 
4007 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
4008     const OffloadTargetRegionEntryInfoActTy &Action) {
4009   // Scan all target region entries and perform the provided action.
4010   for (const auto &D : OffloadEntriesTargetRegion)
4011     for (const auto &F : D.second)
4012       for (const auto &P : F.second)
4013         for (const auto &L : P.second)
4014           Action(D.first, F.first, P.first(), L.first, L.second);
4015 }
4016 
4017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4018     initializeDeviceGlobalVarEntryInfo(StringRef Name,
4019                                        OMPTargetGlobalVarEntryKind Flags,
4020                                        unsigned Order) {
4021   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
4022                                              "only required for the device "
4023                                              "code generation.");
4024   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
4025   ++OffloadingEntriesNum;
4026 }
4027 
4028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4029     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4030                                      CharUnits VarSize,
4031                                      OMPTargetGlobalVarEntryKind Flags,
4032                                      llvm::GlobalValue::LinkageTypes Linkage) {
4033   if (CGM.getLangOpts().OpenMPIsDevice) {
4034     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4035     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4036            "Entry not initialized!");
4037     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4038            "Resetting with the new address.");
4039     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4040       if (Entry.getVarSize().isZero()) {
4041         Entry.setVarSize(VarSize);
4042         Entry.setLinkage(Linkage);
4043       }
4044       return;
4045     }
4046     Entry.setVarSize(VarSize);
4047     Entry.setLinkage(Linkage);
4048     Entry.setAddress(Addr);
4049   } else {
4050     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4051       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4052       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4053              "Entry not initialized!");
4054       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4055              "Resetting with the new address.");
4056       if (Entry.getVarSize().isZero()) {
4057         Entry.setVarSize(VarSize);
4058         Entry.setLinkage(Linkage);
4059       }
4060       return;
4061     }
4062     OffloadEntriesDeviceGlobalVar.try_emplace(
4063         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4064     ++OffloadingEntriesNum;
4065   }
4066 }
4067 
4068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4069     actOnDeviceGlobalVarEntriesInfo(
4070         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4071   // Scan all target region entries and perform the provided action.
4072   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4073     Action(E.getKey(), E.getValue());
4074 }
4075 
4076 void CGOpenMPRuntime::createOffloadEntry(
4077     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4078     llvm::GlobalValue::LinkageTypes Linkage) {
4079   StringRef Name = Addr->getName();
4080   llvm::Module &M = CGM.getModule();
4081   llvm::LLVMContext &C = M.getContext();
4082 
4083   // Create constant string with the name.
4084   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4085 
4086   std::string StringName = getName({"omp_offloading", "entry_name"});
4087   auto *Str = new llvm::GlobalVariable(
4088       M, StrPtrInit->getType(), /*isConstant=*/true,
4089       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4090   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4091 
4092   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4093                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4094                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4095                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4096                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4097   std::string EntryName = getName({"omp_offloading", "entry", ""});
4098   llvm::GlobalVariable *Entry = createGlobalStruct(
4099       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4100       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4101 
4102   // The entry has to be created in the section the linker expects it to be.
4103   Entry->setSection("omp_offloading_entries");
4104 }
4105 
4106 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4107   // Emit the offloading entries and metadata so that the device codegen side
4108   // can easily figure out what to emit. The produced metadata looks like
4109   // this:
4110   //
4111   // !omp_offload.info = !{!1, ...}
4112   //
4113   // Right now we only generate metadata for function that contain target
4114   // regions.
4115 
4116   // If we are in simd mode or there are no entries, we don't need to do
4117   // anything.
4118   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4119     return;
4120 
4121   llvm::Module &M = CGM.getModule();
4122   llvm::LLVMContext &C = M.getContext();
4123   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4124                          SourceLocation, StringRef>,
4125               16>
4126       OrderedEntries(OffloadEntriesInfoManager.size());
4127   llvm::SmallVector<StringRef, 16> ParentFunctions(
4128       OffloadEntriesInfoManager.size());
4129 
4130   // Auxiliary methods to create metadata values and strings.
4131   auto &&GetMDInt = [this](unsigned V) {
4132     return llvm::ConstantAsMetadata::get(
4133         llvm::ConstantInt::get(CGM.Int32Ty, V));
4134   };
4135 
4136   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4137 
4138   // Create the offloading info metadata node.
4139   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4140 
4141   // Create function that emits metadata for each target region entry;
4142   auto &&TargetRegionMetadataEmitter =
4143       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4144        &GetMDString](
4145           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4146           unsigned Line,
4147           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4148         // Generate metadata for target regions. Each entry of this metadata
4149         // contains:
4150         // - Entry 0 -> Kind of this type of metadata (0).
4151         // - Entry 1 -> Device ID of the file where the entry was identified.
4152         // - Entry 2 -> File ID of the file where the entry was identified.
4153         // - Entry 3 -> Mangled name of the function where the entry was
4154         // identified.
4155         // - Entry 4 -> Line in the file where the entry was identified.
4156         // - Entry 5 -> Order the entry was created.
4157         // The first element of the metadata node is the kind.
4158         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4159                                  GetMDInt(FileID),      GetMDString(ParentName),
4160                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4161 
4162         SourceLocation Loc;
4163         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4164                   E = CGM.getContext().getSourceManager().fileinfo_end();
4165              I != E; ++I) {
4166           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4167               I->getFirst()->getUniqueID().getFile() == FileID) {
4168             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4169                 I->getFirst(), Line, 1);
4170             break;
4171           }
4172         }
4173         // Save this entry in the right position of the ordered entries array.
4174         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4175         ParentFunctions[E.getOrder()] = ParentName;
4176 
4177         // Add metadata to the named metadata node.
4178         MD->addOperand(llvm::MDNode::get(C, Ops));
4179       };
4180 
4181   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4182       TargetRegionMetadataEmitter);
4183 
4184   // Create function that emits metadata for each device global variable entry;
4185   auto &&DeviceGlobalVarMetadataEmitter =
4186       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4187        MD](StringRef MangledName,
4188            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4189                &E) {
4190         // Generate metadata for global variables. Each entry of this metadata
4191         // contains:
4192         // - Entry 0 -> Kind of this type of metadata (1).
4193         // - Entry 1 -> Mangled name of the variable.
4194         // - Entry 2 -> Declare target kind.
4195         // - Entry 3 -> Order the entry was created.
4196         // The first element of the metadata node is the kind.
4197         llvm::Metadata *Ops[] = {
4198             GetMDInt(E.getKind()), GetMDString(MangledName),
4199             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4200 
4201         // Save this entry in the right position of the ordered entries array.
4202         OrderedEntries[E.getOrder()] =
4203             std::make_tuple(&E, SourceLocation(), MangledName);
4204 
4205         // Add metadata to the named metadata node.
4206         MD->addOperand(llvm::MDNode::get(C, Ops));
4207       };
4208 
4209   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4210       DeviceGlobalVarMetadataEmitter);
4211 
4212   for (const auto &E : OrderedEntries) {
4213     assert(std::get<0>(E) && "All ordered entries must exist!");
4214     if (const auto *CE =
4215             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4216                 std::get<0>(E))) {
4217       if (!CE->getID() || !CE->getAddress()) {
4218         // Do not blame the entry if the parent funtion is not emitted.
4219         StringRef FnName = ParentFunctions[CE->getOrder()];
4220         if (!CGM.GetGlobalValue(FnName))
4221           continue;
4222         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4223             DiagnosticsEngine::Error,
4224             "Offloading entry for target region in %0 is incorrect: either the "
4225             "address or the ID is invalid.");
4226         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4227         continue;
4228       }
4229       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4230                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4231     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4232                                              OffloadEntryInfoDeviceGlobalVar>(
4233                    std::get<0>(E))) {
4234       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4235           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4236               CE->getFlags());
4237       switch (Flags) {
4238       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4239         if (CGM.getLangOpts().OpenMPIsDevice &&
4240             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4241           continue;
4242         if (!CE->getAddress()) {
4243           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4244               DiagnosticsEngine::Error, "Offloading entry for declare target "
4245                                         "variable %0 is incorrect: the "
4246                                         "address is invalid.");
4247           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4248           continue;
4249         }
4250         // The vaiable has no definition - no need to add the entry.
4251         if (CE->getVarSize().isZero())
4252           continue;
4253         break;
4254       }
4255       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4256         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4257                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4258                "Declaret target link address is set.");
4259         if (CGM.getLangOpts().OpenMPIsDevice)
4260           continue;
4261         if (!CE->getAddress()) {
4262           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4263               DiagnosticsEngine::Error,
4264               "Offloading entry for declare target variable is incorrect: the "
4265               "address is invalid.");
4266           CGM.getDiags().Report(DiagID);
4267           continue;
4268         }
4269         break;
4270       }
4271       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4272                          CE->getVarSize().getQuantity(), Flags,
4273                          CE->getLinkage());
4274     } else {
4275       llvm_unreachable("Unsupported entry kind.");
4276     }
4277   }
4278 }
4279 
4280 /// Loads all the offload entries information from the host IR
4281 /// metadata.
4282 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4283   // If we are in target mode, load the metadata from the host IR. This code has
4284   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4285 
4286   if (!CGM.getLangOpts().OpenMPIsDevice)
4287     return;
4288 
4289   if (CGM.getLangOpts().OMPHostIRFile.empty())
4290     return;
4291 
4292   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4293   if (auto EC = Buf.getError()) {
4294     CGM.getDiags().Report(diag::err_cannot_open_file)
4295         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4296     return;
4297   }
4298 
4299   llvm::LLVMContext C;
4300   auto ME = expectedToErrorOrAndEmitErrors(
4301       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4302 
4303   if (auto EC = ME.getError()) {
4304     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4305         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4306     CGM.getDiags().Report(DiagID)
4307         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4308     return;
4309   }
4310 
4311   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4312   if (!MD)
4313     return;
4314 
4315   for (llvm::MDNode *MN : MD->operands()) {
4316     auto &&GetMDInt = [MN](unsigned Idx) {
4317       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4318       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4319     };
4320 
4321     auto &&GetMDString = [MN](unsigned Idx) {
4322       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4323       return V->getString();
4324     };
4325 
4326     switch (GetMDInt(0)) {
4327     default:
4328       llvm_unreachable("Unexpected metadata!");
4329       break;
4330     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4331         OffloadingEntryInfoTargetRegion:
4332       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4333           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4334           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4335           /*Order=*/GetMDInt(5));
4336       break;
4337     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4338         OffloadingEntryInfoDeviceGlobalVar:
4339       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4340           /*MangledName=*/GetMDString(1),
4341           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4342               /*Flags=*/GetMDInt(2)),
4343           /*Order=*/GetMDInt(3));
4344       break;
4345     }
4346   }
4347 }
4348 
4349 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4350   if (!KmpRoutineEntryPtrTy) {
4351     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4352     ASTContext &C = CGM.getContext();
4353     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4354     FunctionProtoType::ExtProtoInfo EPI;
4355     KmpRoutineEntryPtrQTy = C.getPointerType(
4356         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4357     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4358   }
4359 }
4360 
4361 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4362   // Make sure the type of the entry is already created. This is the type we
4363   // have to create:
4364   // struct __tgt_offload_entry{
4365   //   void      *addr;       // Pointer to the offload entry info.
4366   //                          // (function or global)
4367   //   char      *name;       // Name of the function or global.
4368   //   size_t     size;       // Size of the entry info (0 if it a function).
4369   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4370   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4371   // };
4372   if (TgtOffloadEntryQTy.isNull()) {
4373     ASTContext &C = CGM.getContext();
4374     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4375     RD->startDefinition();
4376     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4377     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4378     addFieldToRecordDecl(C, RD, C.getSizeType());
4379     addFieldToRecordDecl(
4380         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4381     addFieldToRecordDecl(
4382         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4383     RD->completeDefinition();
4384     RD->addAttr(PackedAttr::CreateImplicit(C));
4385     TgtOffloadEntryQTy = C.getRecordType(RD);
4386   }
4387   return TgtOffloadEntryQTy;
4388 }
4389 
4390 namespace {
4391 struct PrivateHelpersTy {
4392   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
4393                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
4394       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
4395         PrivateElemInit(PrivateElemInit) {}
4396   const Expr *OriginalRef = nullptr;
4397   const VarDecl *Original = nullptr;
4398   const VarDecl *PrivateCopy = nullptr;
4399   const VarDecl *PrivateElemInit = nullptr;
4400 };
4401 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4402 } // anonymous namespace
4403 
4404 static RecordDecl *
4405 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4406   if (!Privates.empty()) {
4407     ASTContext &C = CGM.getContext();
4408     // Build struct .kmp_privates_t. {
4409     //         /*  private vars  */
4410     //       };
4411     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4412     RD->startDefinition();
4413     for (const auto &Pair : Privates) {
4414       const VarDecl *VD = Pair.second.Original;
4415       QualType Type = VD->getType().getNonReferenceType();
4416       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4417       if (VD->hasAttrs()) {
4418         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4419              E(VD->getAttrs().end());
4420              I != E; ++I)
4421           FD->addAttr(*I);
4422       }
4423     }
4424     RD->completeDefinition();
4425     return RD;
4426   }
4427   return nullptr;
4428 }
4429 
4430 static RecordDecl *
4431 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4432                          QualType KmpInt32Ty,
4433                          QualType KmpRoutineEntryPointerQTy) {
4434   ASTContext &C = CGM.getContext();
4435   // Build struct kmp_task_t {
4436   //         void *              shareds;
4437   //         kmp_routine_entry_t routine;
4438   //         kmp_int32           part_id;
4439   //         kmp_cmplrdata_t data1;
4440   //         kmp_cmplrdata_t data2;
4441   // For taskloops additional fields:
4442   //         kmp_uint64          lb;
4443   //         kmp_uint64          ub;
4444   //         kmp_int64           st;
4445   //         kmp_int32           liter;
4446   //         void *              reductions;
4447   //       };
4448   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4449   UD->startDefinition();
4450   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4451   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4452   UD->completeDefinition();
4453   QualType KmpCmplrdataTy = C.getRecordType(UD);
4454   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4455   RD->startDefinition();
4456   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4457   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4458   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4459   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4460   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4461   if (isOpenMPTaskLoopDirective(Kind)) {
4462     QualType KmpUInt64Ty =
4463         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4464     QualType KmpInt64Ty =
4465         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4466     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4467     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4468     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4469     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4470     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4471   }
4472   RD->completeDefinition();
4473   return RD;
4474 }
4475 
4476 static RecordDecl *
4477 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4478                                      ArrayRef<PrivateDataTy> Privates) {
4479   ASTContext &C = CGM.getContext();
4480   // Build struct kmp_task_t_with_privates {
4481   //         kmp_task_t task_data;
4482   //         .kmp_privates_t. privates;
4483   //       };
4484   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4485   RD->startDefinition();
4486   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4487   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4488     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4489   RD->completeDefinition();
4490   return RD;
4491 }
4492 
4493 /// Emit a proxy function which accepts kmp_task_t as the second
4494 /// argument.
4495 /// \code
4496 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4497 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4498 ///   For taskloops:
4499 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4500 ///   tt->reductions, tt->shareds);
4501 ///   return 0;
4502 /// }
4503 /// \endcode
4504 static llvm::Function *
4505 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4506                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4507                       QualType KmpTaskTWithPrivatesPtrQTy,
4508                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4509                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4510                       llvm::Value *TaskPrivatesMap) {
4511   ASTContext &C = CGM.getContext();
4512   FunctionArgList Args;
4513   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4514                             ImplicitParamDecl::Other);
4515   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4516                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4517                                 ImplicitParamDecl::Other);
4518   Args.push_back(&GtidArg);
4519   Args.push_back(&TaskTypeArg);
4520   const auto &TaskEntryFnInfo =
4521       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4522   llvm::FunctionType *TaskEntryTy =
4523       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4524   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4525   auto *TaskEntry = llvm::Function::Create(
4526       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4527   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4528   TaskEntry->setDoesNotRecurse();
4529   CodeGenFunction CGF(CGM);
4530   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4531                     Loc, Loc);
4532 
4533   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4534   // tt,
4535   // For taskloops:
4536   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4537   // tt->task_data.shareds);
4538   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4539       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4540   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4541       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4542       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4543   const auto *KmpTaskTWithPrivatesQTyRD =
4544       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4545   LValue Base =
4546       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4547   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4548   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4549   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4550   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4551 
4552   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4553   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4554   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4555       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4556       CGF.ConvertTypeForMem(SharedsPtrTy));
4557 
4558   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4559   llvm::Value *PrivatesParam;
4560   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4561     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4562     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4563         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4564   } else {
4565     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4566   }
4567 
4568   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4569                                TaskPrivatesMap,
4570                                CGF.Builder
4571                                    .CreatePointerBitCastOrAddrSpaceCast(
4572                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4573                                    .getPointer()};
4574   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4575                                           std::end(CommonArgs));
4576   if (isOpenMPTaskLoopDirective(Kind)) {
4577     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4578     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4579     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4580     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4581     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4582     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4583     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4584     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4585     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4586     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4587     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4588     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4589     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4590     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4591     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4592     CallArgs.push_back(LBParam);
4593     CallArgs.push_back(UBParam);
4594     CallArgs.push_back(StParam);
4595     CallArgs.push_back(LIParam);
4596     CallArgs.push_back(RParam);
4597   }
4598   CallArgs.push_back(SharedsParam);
4599 
4600   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4601                                                   CallArgs);
4602   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4603                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4604   CGF.FinishFunction();
4605   return TaskEntry;
4606 }
4607 
4608 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4609                                             SourceLocation Loc,
4610                                             QualType KmpInt32Ty,
4611                                             QualType KmpTaskTWithPrivatesPtrQTy,
4612                                             QualType KmpTaskTWithPrivatesQTy) {
4613   ASTContext &C = CGM.getContext();
4614   FunctionArgList Args;
4615   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4616                             ImplicitParamDecl::Other);
4617   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4618                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4619                                 ImplicitParamDecl::Other);
4620   Args.push_back(&GtidArg);
4621   Args.push_back(&TaskTypeArg);
4622   const auto &DestructorFnInfo =
4623       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4624   llvm::FunctionType *DestructorFnTy =
4625       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4626   std::string Name =
4627       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4628   auto *DestructorFn =
4629       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4630                              Name, &CGM.getModule());
4631   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4632                                     DestructorFnInfo);
4633   DestructorFn->setDoesNotRecurse();
4634   CodeGenFunction CGF(CGM);
4635   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4636                     Args, Loc, Loc);
4637 
4638   LValue Base = CGF.EmitLoadOfPointerLValue(
4639       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4640       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4641   const auto *KmpTaskTWithPrivatesQTyRD =
4642       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4643   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4644   Base = CGF.EmitLValueForField(Base, *FI);
4645   for (const auto *Field :
4646        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4647     if (QualType::DestructionKind DtorKind =
4648             Field->getType().isDestructedType()) {
4649       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4650       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4651     }
4652   }
4653   CGF.FinishFunction();
4654   return DestructorFn;
4655 }
4656 
4657 /// Emit a privates mapping function for correct handling of private and
4658 /// firstprivate variables.
4659 /// \code
4660 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4661 /// **noalias priv1,...,  <tyn> **noalias privn) {
4662 ///   *priv1 = &.privates.priv1;
4663 ///   ...;
4664 ///   *privn = &.privates.privn;
4665 /// }
4666 /// \endcode
4667 static llvm::Value *
4668 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4669                                ArrayRef<const Expr *> PrivateVars,
4670                                ArrayRef<const Expr *> FirstprivateVars,
4671                                ArrayRef<const Expr *> LastprivateVars,
4672                                QualType PrivatesQTy,
4673                                ArrayRef<PrivateDataTy> Privates) {
4674   ASTContext &C = CGM.getContext();
4675   FunctionArgList Args;
4676   ImplicitParamDecl TaskPrivatesArg(
4677       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4678       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4679       ImplicitParamDecl::Other);
4680   Args.push_back(&TaskPrivatesArg);
4681   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4682   unsigned Counter = 1;
4683   for (const Expr *E : PrivateVars) {
4684     Args.push_back(ImplicitParamDecl::Create(
4685         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4686         C.getPointerType(C.getPointerType(E->getType()))
4687             .withConst()
4688             .withRestrict(),
4689         ImplicitParamDecl::Other));
4690     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4691     PrivateVarsPos[VD] = Counter;
4692     ++Counter;
4693   }
4694   for (const Expr *E : FirstprivateVars) {
4695     Args.push_back(ImplicitParamDecl::Create(
4696         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4697         C.getPointerType(C.getPointerType(E->getType()))
4698             .withConst()
4699             .withRestrict(),
4700         ImplicitParamDecl::Other));
4701     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4702     PrivateVarsPos[VD] = Counter;
4703     ++Counter;
4704   }
4705   for (const Expr *E : LastprivateVars) {
4706     Args.push_back(ImplicitParamDecl::Create(
4707         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4708         C.getPointerType(C.getPointerType(E->getType()))
4709             .withConst()
4710             .withRestrict(),
4711         ImplicitParamDecl::Other));
4712     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4713     PrivateVarsPos[VD] = Counter;
4714     ++Counter;
4715   }
4716   const auto &TaskPrivatesMapFnInfo =
4717       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4718   llvm::FunctionType *TaskPrivatesMapTy =
4719       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4720   std::string Name =
4721       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4722   auto *TaskPrivatesMap = llvm::Function::Create(
4723       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4724       &CGM.getModule());
4725   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4726                                     TaskPrivatesMapFnInfo);
4727   if (CGM.getLangOpts().Optimize) {
4728     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4729     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4730     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4731   }
4732   CodeGenFunction CGF(CGM);
4733   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4734                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4735 
4736   // *privi = &.privates.privi;
4737   LValue Base = CGF.EmitLoadOfPointerLValue(
4738       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4739       TaskPrivatesArg.getType()->castAs<PointerType>());
4740   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4741   Counter = 0;
4742   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4743     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4744     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4745     LValue RefLVal =
4746         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4747     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4748         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4749     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4750     ++Counter;
4751   }
4752   CGF.FinishFunction();
4753   return TaskPrivatesMap;
4754 }
4755 
4756 /// Emit initialization for private variables in task-based directives.
4757 static void emitPrivatesInit(CodeGenFunction &CGF,
4758                              const OMPExecutableDirective &D,
4759                              Address KmpTaskSharedsPtr, LValue TDBase,
4760                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4761                              QualType SharedsTy, QualType SharedsPtrTy,
4762                              const OMPTaskDataTy &Data,
4763                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4764   ASTContext &C = CGF.getContext();
4765   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4766   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4767   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4768                                  ? OMPD_taskloop
4769                                  : OMPD_task;
4770   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4771   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4772   LValue SrcBase;
4773   bool IsTargetTask =
4774       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4775       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4776   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4777   // PointersArray and SizesArray. The original variables for these arrays are
4778   // not captured and we get their addresses explicitly.
4779   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
4780       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4781     SrcBase = CGF.MakeAddrLValue(
4782         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4783             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4784         SharedsTy);
4785   }
4786   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4787   for (const PrivateDataTy &Pair : Privates) {
4788     const VarDecl *VD = Pair.second.PrivateCopy;
4789     const Expr *Init = VD->getAnyInitializer();
4790     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4791                              !CGF.isTrivialInitializer(Init)))) {
4792       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4793       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4794         const VarDecl *OriginalVD = Pair.second.Original;
4795         // Check if the variable is the target-based BasePointersArray,
4796         // PointersArray or SizesArray.
4797         LValue SharedRefLValue;
4798         QualType Type = PrivateLValue.getType();
4799         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4800         if (IsTargetTask && !SharedField) {
4801           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4802                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4803                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4804                          ->getNumParams() == 0 &&
4805                  isa<TranslationUnitDecl>(
4806                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4807                          ->getDeclContext()) &&
4808                  "Expected artificial target data variable.");
4809           SharedRefLValue =
4810               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4811         } else if (ForDup) {
4812           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4813           SharedRefLValue = CGF.MakeAddrLValue(
4814               Address(SharedRefLValue.getPointer(CGF),
4815                       C.getDeclAlign(OriginalVD)),
4816               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4817               SharedRefLValue.getTBAAInfo());
4818         } else {
4819           InlinedOpenMPRegionRAII Region(
4820               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
4821               /*HasCancel=*/false);
4822           SharedRefLValue =  CGF.EmitLValue(Pair.second.OriginalRef);
4823         }
4824         if (Type->isArrayType()) {
4825           // Initialize firstprivate array.
4826           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4827             // Perform simple memcpy.
4828             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4829           } else {
4830             // Initialize firstprivate array using element-by-element
4831             // initialization.
4832             CGF.EmitOMPAggregateAssign(
4833                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4834                 Type,
4835                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4836                                                   Address SrcElement) {
4837                   // Clean up any temporaries needed by the initialization.
4838                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4839                   InitScope.addPrivate(
4840                       Elem, [SrcElement]() -> Address { return SrcElement; });
4841                   (void)InitScope.Privatize();
4842                   // Emit initialization for single element.
4843                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4844                       CGF, &CapturesInfo);
4845                   CGF.EmitAnyExprToMem(Init, DestElement,
4846                                        Init->getType().getQualifiers(),
4847                                        /*IsInitializer=*/false);
4848                 });
4849           }
4850         } else {
4851           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4852           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4853             return SharedRefLValue.getAddress(CGF);
4854           });
4855           (void)InitScope.Privatize();
4856           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4857           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4858                              /*capturedByInit=*/false);
4859         }
4860       } else {
4861         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4862       }
4863     }
4864     ++FI;
4865   }
4866 }
4867 
4868 /// Check if duplication function is required for taskloops.
4869 static bool checkInitIsRequired(CodeGenFunction &CGF,
4870                                 ArrayRef<PrivateDataTy> Privates) {
4871   bool InitRequired = false;
4872   for (const PrivateDataTy &Pair : Privates) {
4873     const VarDecl *VD = Pair.second.PrivateCopy;
4874     const Expr *Init = VD->getAnyInitializer();
4875     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4876                                     !CGF.isTrivialInitializer(Init));
4877     if (InitRequired)
4878       break;
4879   }
4880   return InitRequired;
4881 }
4882 
4883 
4884 /// Emit task_dup function (for initialization of
4885 /// private/firstprivate/lastprivate vars and last_iter flag)
4886 /// \code
4887 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4888 /// lastpriv) {
4889 /// // setup lastprivate flag
4890 ///    task_dst->last = lastpriv;
4891 /// // could be constructor calls here...
4892 /// }
4893 /// \endcode
4894 static llvm::Value *
4895 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4896                     const OMPExecutableDirective &D,
4897                     QualType KmpTaskTWithPrivatesPtrQTy,
4898                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4899                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4900                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4901                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4902   ASTContext &C = CGM.getContext();
4903   FunctionArgList Args;
4904   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4905                            KmpTaskTWithPrivatesPtrQTy,
4906                            ImplicitParamDecl::Other);
4907   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4908                            KmpTaskTWithPrivatesPtrQTy,
4909                            ImplicitParamDecl::Other);
4910   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4911                                 ImplicitParamDecl::Other);
4912   Args.push_back(&DstArg);
4913   Args.push_back(&SrcArg);
4914   Args.push_back(&LastprivArg);
4915   const auto &TaskDupFnInfo =
4916       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4917   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4918   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4919   auto *TaskDup = llvm::Function::Create(
4920       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4921   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4922   TaskDup->setDoesNotRecurse();
4923   CodeGenFunction CGF(CGM);
4924   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4925                     Loc);
4926 
4927   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4928       CGF.GetAddrOfLocalVar(&DstArg),
4929       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4930   // task_dst->liter = lastpriv;
4931   if (WithLastIter) {
4932     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4933     LValue Base = CGF.EmitLValueForField(
4934         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4935     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4936     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4937         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4938     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4939   }
4940 
4941   // Emit initial values for private copies (if any).
4942   assert(!Privates.empty());
4943   Address KmpTaskSharedsPtr = Address::invalid();
4944   if (!Data.FirstprivateVars.empty()) {
4945     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4946         CGF.GetAddrOfLocalVar(&SrcArg),
4947         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4948     LValue Base = CGF.EmitLValueForField(
4949         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4950     KmpTaskSharedsPtr = Address(
4951         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4952                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4953                                                   KmpTaskTShareds)),
4954                              Loc),
4955         CGF.getNaturalTypeAlignment(SharedsTy));
4956   }
4957   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4958                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4959   CGF.FinishFunction();
4960   return TaskDup;
4961 }
4962 
4963 /// Checks if destructor function is required to be generated.
4964 /// \return true if cleanups are required, false otherwise.
4965 static bool
4966 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4967   bool NeedsCleanup = false;
4968   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4969   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4970   for (const FieldDecl *FD : PrivateRD->fields()) {
4971     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4972     if (NeedsCleanup)
4973       break;
4974   }
4975   return NeedsCleanup;
4976 }
4977 
4978 CGOpenMPRuntime::TaskResultTy
4979 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4980                               const OMPExecutableDirective &D,
4981                               llvm::Function *TaskFunction, QualType SharedsTy,
4982                               Address Shareds, const OMPTaskDataTy &Data) {
4983   ASTContext &C = CGM.getContext();
4984   llvm::SmallVector<PrivateDataTy, 4> Privates;
4985   // Aggregate privates and sort them by the alignment.
4986   const auto *I = Data.PrivateCopies.begin();
4987   for (const Expr *E : Data.PrivateVars) {
4988     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4989     Privates.emplace_back(
4990         C.getDeclAlign(VD),
4991         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4992                          /*PrivateElemInit=*/nullptr));
4993     ++I;
4994   }
4995   I = Data.FirstprivateCopies.begin();
4996   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4997   for (const Expr *E : Data.FirstprivateVars) {
4998     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4999     Privates.emplace_back(
5000         C.getDeclAlign(VD),
5001         PrivateHelpersTy(
5002             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5003             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5004     ++I;
5005     ++IElemInitRef;
5006   }
5007   I = Data.LastprivateCopies.begin();
5008   for (const Expr *E : Data.LastprivateVars) {
5009     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5010     Privates.emplace_back(
5011         C.getDeclAlign(VD),
5012         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5013                          /*PrivateElemInit=*/nullptr));
5014     ++I;
5015   }
5016   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5017     return L.first > R.first;
5018   });
5019   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5020   // Build type kmp_routine_entry_t (if not built yet).
5021   emitKmpRoutineEntryT(KmpInt32Ty);
5022   // Build type kmp_task_t (if not built yet).
5023   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5024     if (SavedKmpTaskloopTQTy.isNull()) {
5025       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5026           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5027     }
5028     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5029   } else {
5030     assert((D.getDirectiveKind() == OMPD_task ||
5031             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5032             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5033            "Expected taskloop, task or target directive");
5034     if (SavedKmpTaskTQTy.isNull()) {
5035       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5036           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5037     }
5038     KmpTaskTQTy = SavedKmpTaskTQTy;
5039   }
5040   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5041   // Build particular struct kmp_task_t for the given task.
5042   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5043       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5044   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5045   QualType KmpTaskTWithPrivatesPtrQTy =
5046       C.getPointerType(KmpTaskTWithPrivatesQTy);
5047   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5048   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5049       KmpTaskTWithPrivatesTy->getPointerTo();
5050   llvm::Value *KmpTaskTWithPrivatesTySize =
5051       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5052   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5053 
5054   // Emit initial values for private copies (if any).
5055   llvm::Value *TaskPrivatesMap = nullptr;
5056   llvm::Type *TaskPrivatesMapTy =
5057       std::next(TaskFunction->arg_begin(), 3)->getType();
5058   if (!Privates.empty()) {
5059     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5060     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5061         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5062         FI->getType(), Privates);
5063     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5064         TaskPrivatesMap, TaskPrivatesMapTy);
5065   } else {
5066     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5067         cast<llvm::PointerType>(TaskPrivatesMapTy));
5068   }
5069   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5070   // kmp_task_t *tt);
5071   llvm::Function *TaskEntry = emitProxyTaskFunction(
5072       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5073       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5074       TaskPrivatesMap);
5075 
5076   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5077   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5078   // kmp_routine_entry_t *task_entry);
5079   // Task flags. Format is taken from
5080   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5081   // description of kmp_tasking_flags struct.
5082   enum {
5083     TiedFlag = 0x1,
5084     FinalFlag = 0x2,
5085     DestructorsFlag = 0x8,
5086     PriorityFlag = 0x20
5087   };
5088   unsigned Flags = Data.Tied ? TiedFlag : 0;
5089   bool NeedsCleanup = false;
5090   if (!Privates.empty()) {
5091     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5092     if (NeedsCleanup)
5093       Flags = Flags | DestructorsFlag;
5094   }
5095   if (Data.Priority.getInt())
5096     Flags = Flags | PriorityFlag;
5097   llvm::Value *TaskFlags =
5098       Data.Final.getPointer()
5099           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5100                                      CGF.Builder.getInt32(FinalFlag),
5101                                      CGF.Builder.getInt32(/*C=*/0))
5102           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5103   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5104   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5105   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5106       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5107       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5108           TaskEntry, KmpRoutineEntryPtrTy)};
5109   llvm::Value *NewTask;
5110   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5111     // Check if we have any device clause associated with the directive.
5112     const Expr *Device = nullptr;
5113     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5114       Device = C->getDevice();
5115     // Emit device ID if any otherwise use default value.
5116     llvm::Value *DeviceID;
5117     if (Device)
5118       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5119                                            CGF.Int64Ty, /*isSigned=*/true);
5120     else
5121       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5122     AllocArgs.push_back(DeviceID);
5123     NewTask = CGF.EmitRuntimeCall(
5124       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5125   } else {
5126     NewTask = CGF.EmitRuntimeCall(
5127       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5128   }
5129   llvm::Value *NewTaskNewTaskTTy =
5130       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5131           NewTask, KmpTaskTWithPrivatesPtrTy);
5132   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5133                                                KmpTaskTWithPrivatesQTy);
5134   LValue TDBase =
5135       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5136   // Fill the data in the resulting kmp_task_t record.
5137   // Copy shareds if there are any.
5138   Address KmpTaskSharedsPtr = Address::invalid();
5139   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5140     KmpTaskSharedsPtr =
5141         Address(CGF.EmitLoadOfScalar(
5142                     CGF.EmitLValueForField(
5143                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5144                                            KmpTaskTShareds)),
5145                     Loc),
5146                 CGF.getNaturalTypeAlignment(SharedsTy));
5147     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5148     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5149     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5150   }
5151   // Emit initial values for private copies (if any).
5152   TaskResultTy Result;
5153   if (!Privates.empty()) {
5154     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5155                      SharedsTy, SharedsPtrTy, Data, Privates,
5156                      /*ForDup=*/false);
5157     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5158         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5159       Result.TaskDupFn = emitTaskDupFunction(
5160           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5161           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5162           /*WithLastIter=*/!Data.LastprivateVars.empty());
5163     }
5164   }
5165   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5166   enum { Priority = 0, Destructors = 1 };
5167   // Provide pointer to function with destructors for privates.
5168   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5169   const RecordDecl *KmpCmplrdataUD =
5170       (*FI)->getType()->getAsUnionType()->getDecl();
5171   if (NeedsCleanup) {
5172     llvm::Value *DestructorFn = emitDestructorsFunction(
5173         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5174         KmpTaskTWithPrivatesQTy);
5175     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5176     LValue DestructorsLV = CGF.EmitLValueForField(
5177         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5178     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5179                               DestructorFn, KmpRoutineEntryPtrTy),
5180                           DestructorsLV);
5181   }
5182   // Set priority.
5183   if (Data.Priority.getInt()) {
5184     LValue Data2LV = CGF.EmitLValueForField(
5185         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5186     LValue PriorityLV = CGF.EmitLValueForField(
5187         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5188     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5189   }
5190   Result.NewTask = NewTask;
5191   Result.TaskEntry = TaskEntry;
5192   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5193   Result.TDBase = TDBase;
5194   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5195   return Result;
5196 }
5197 
5198 namespace {
5199 /// Dependence kind for RTL.
5200 enum RTLDependenceKindTy {
5201   DepIn = 0x01,
5202   DepInOut = 0x3,
5203   DepMutexInOutSet = 0x4
5204 };
5205 /// Fields ids in kmp_depend_info record.
5206 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5207 } // namespace
5208 
5209 /// Translates internal dependency kind into the runtime kind.
5210 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
5211   RTLDependenceKindTy DepKind;
5212   switch (K) {
5213   case OMPC_DEPEND_in:
5214     DepKind = DepIn;
5215     break;
5216   // Out and InOut dependencies must use the same code.
5217   case OMPC_DEPEND_out:
5218   case OMPC_DEPEND_inout:
5219     DepKind = DepInOut;
5220     break;
5221   case OMPC_DEPEND_mutexinoutset:
5222     DepKind = DepMutexInOutSet;
5223     break;
5224   case OMPC_DEPEND_source:
5225   case OMPC_DEPEND_sink:
5226   case OMPC_DEPEND_depobj:
5227   case OMPC_DEPEND_unknown:
5228     llvm_unreachable("Unknown task dependence type");
5229   }
5230   return DepKind;
5231 }
5232 
5233 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
5234 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
5235                            QualType &FlagsTy) {
5236   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5237   if (KmpDependInfoTy.isNull()) {
5238     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5239     KmpDependInfoRD->startDefinition();
5240     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5241     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5242     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5243     KmpDependInfoRD->completeDefinition();
5244     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5245   }
5246 }
5247 
5248 std::pair<llvm::Value *, LValue>
5249 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
5250                                    SourceLocation Loc) {
5251   ASTContext &C = CGM.getContext();
5252   QualType FlagsTy;
5253   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5254   RecordDecl *KmpDependInfoRD =
5255       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5256   LValue Base = CGF.EmitLoadOfPointerLValue(
5257       DepobjLVal.getAddress(CGF),
5258       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5259   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5260   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5261           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5262   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
5263                             Base.getTBAAInfo());
5264   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5265       Addr.getPointer(),
5266       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5267   LValue NumDepsBase = CGF.MakeAddrLValue(
5268       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
5269       Base.getBaseInfo(), Base.getTBAAInfo());
5270   // NumDeps = deps[i].base_addr;
5271   LValue BaseAddrLVal = CGF.EmitLValueForField(
5272       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5273   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
5274   return std::make_pair(NumDeps, Base);
5275 }
5276 
5277 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
5278     CodeGenFunction &CGF,
5279     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies,
5280     bool ForDepobj, SourceLocation Loc) {
5281   // Process list of dependencies.
5282   ASTContext &C = CGM.getContext();
5283   Address DependenciesArray = Address::invalid();
5284   unsigned NumDependencies = Dependencies.size();
5285   llvm::Value *NumOfElements = nullptr;
5286   if (NumDependencies) {
5287     QualType FlagsTy;
5288     getDependTypes(C, KmpDependInfoTy, FlagsTy);
5289     RecordDecl *KmpDependInfoRD =
5290         cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5291     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5292     unsigned NumDepobjDependecies = 0;
5293     SmallVector<std::pair<llvm::Value *, LValue>, 4> Depobjs;
5294     llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
5295     // Calculate number of depobj dependecies.
5296     for (const std::pair<OpenMPDependClauseKind, const Expr *> &Pair :
5297          Dependencies) {
5298       if (Pair.first != OMPC_DEPEND_depobj)
5299         continue;
5300       LValue DepobjLVal = CGF.EmitLValue(Pair.second);
5301       llvm::Value *NumDeps;
5302       LValue Base;
5303       std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5304       NumOfDepobjElements =
5305           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumDeps);
5306       Depobjs.emplace_back(NumDeps, Base);
5307       ++NumDepobjDependecies;
5308     }
5309 
5310     QualType KmpDependInfoArrayTy;
5311     // Define type kmp_depend_info[<Dependencies.size()>];
5312     // For depobj reserve one extra element to store the number of elements.
5313     // It is required to handle depobj(x) update(in) construct.
5314     // kmp_depend_info[<Dependencies.size()>] deps;
5315     if (ForDepobj) {
5316       assert(NumDepobjDependecies == 0 &&
5317              "depobj dependency kind is not expected in depobj directive.");
5318       KmpDependInfoArrayTy = C.getConstantArrayType(
5319           KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5320           nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5321       // Need to allocate on the dynamic memory.
5322       llvm::Value *ThreadID = getThreadID(CGF, Loc);
5323       // Use default allocator.
5324       llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5325       CharUnits Align = C.getTypeAlignInChars(KmpDependInfoArrayTy);
5326       CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5327       llvm::Value *Size = CGF.CGM.getSize(Sz.alignTo(Align));
5328       llvm::Value *Args[] = {ThreadID, Size, Allocator};
5329 
5330       llvm::Value *Addr = CGF.EmitRuntimeCall(
5331           createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr");
5332       Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5333           Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo());
5334       DependenciesArray = Address(Addr, Align);
5335       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5336                                              /*isSigned=*/false);
5337     } else if (NumDepobjDependecies > 0) {
5338       NumOfElements = CGF.Builder.CreateNUWAdd(
5339           NumOfDepobjElements,
5340           llvm::ConstantInt::get(CGM.IntPtrTy,
5341                                  NumDependencies - NumDepobjDependecies,
5342                                  /*isSigned=*/false));
5343       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
5344                                                 /*isSigned=*/false);
5345       OpaqueValueExpr OVE(
5346           Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
5347           VK_RValue);
5348       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
5349                                                     RValue::get(NumOfElements));
5350       KmpDependInfoArrayTy =
5351           C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
5352                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
5353       // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
5354       // Properly emit variable-sized array.
5355       auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
5356                                            ImplicitParamDecl::Other);
5357       CGF.EmitVarDecl(*PD);
5358       DependenciesArray = CGF.GetAddrOfLocalVar(PD);
5359     } else {
5360       KmpDependInfoArrayTy = C.getConstantArrayType(
5361           KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5362           nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5363       DependenciesArray =
5364           CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5365       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
5366                                              /*isSigned=*/false);
5367     }
5368     if (ForDepobj) {
5369       // Write number of elements in the first element of array for depobj.
5370       llvm::Value *NumVal =
5371           llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5372       LValue Base = CGF.MakeAddrLValue(
5373           CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0),
5374           KmpDependInfoTy);
5375       // deps[i].base_addr = NumDependencies;
5376       LValue BaseAddrLVal = CGF.EmitLValueForField(
5377           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5378       CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal);
5379     }
5380     unsigned Pos = ForDepobj ? 1 : 0;
5381     for (unsigned I = 0; I < NumDependencies; ++I) {
5382       if (Dependencies[I].first == OMPC_DEPEND_depobj)
5383         continue;
5384       const Expr *E = Dependencies[I].second;
5385       LValue Addr = CGF.EmitLValue(E);
5386       llvm::Value *Size;
5387       QualType Ty = E->getType();
5388       if (const auto *ASE =
5389               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5390         LValue UpAddrLVal =
5391             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5392         llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5393             UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5394         llvm::Value *LowIntPtr =
5395             CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5396         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5397         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5398       } else {
5399         Size = CGF.getTypeSize(Ty);
5400       }
5401       LValue Base;
5402       if (NumDepobjDependecies > 0) {
5403         Base = CGF.MakeAddrLValue(
5404             CGF.Builder.CreateConstGEP(DependenciesArray, Pos),
5405             KmpDependInfoTy);
5406       } else {
5407         Base = CGF.MakeAddrLValue(
5408             CGF.Builder.CreateConstArrayGEP(DependenciesArray, Pos),
5409             KmpDependInfoTy);
5410       }
5411       // deps[i].base_addr = &<Dependencies[i].second>;
5412       LValue BaseAddrLVal = CGF.EmitLValueForField(
5413           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5414       CGF.EmitStoreOfScalar(
5415           CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5416           BaseAddrLVal);
5417       // deps[i].len = sizeof(<Dependencies[i].second>);
5418       LValue LenLVal = CGF.EmitLValueForField(
5419           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5420       CGF.EmitStoreOfScalar(Size, LenLVal);
5421       // deps[i].flags = <Dependencies[i].first>;
5422       RTLDependenceKindTy DepKind =
5423           translateDependencyKind(Dependencies[I].first);
5424       LValue FlagsLVal = CGF.EmitLValueForField(
5425           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5426       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5427                             FlagsLVal);
5428       ++Pos;
5429     }
5430     // Copy final depobj arrays.
5431     if (NumDepobjDependecies > 0) {
5432       llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
5433       Address Addr = CGF.Builder.CreateConstGEP(DependenciesArray, Pos);
5434       for (const std::pair<llvm::Value *, LValue> &Pair : Depobjs) {
5435         llvm::Value *Size = CGF.Builder.CreateNUWMul(ElSize, Pair.first);
5436         CGF.Builder.CreateMemCpy(Addr, Pair.second.getAddress(CGF), Size);
5437         Addr =
5438             Address(CGF.Builder.CreateGEP(
5439                         Addr.getElementType(), Addr.getPointer(), Pair.first),
5440                     DependenciesArray.getAlignment().alignmentOfArrayElement(
5441                         C.getTypeSizeInChars(KmpDependInfoTy)));
5442       }
5443       DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5444           DependenciesArray, CGF.VoidPtrTy);
5445     } else {
5446       DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5447           CGF.Builder.CreateConstArrayGEP(DependenciesArray, ForDepobj ? 1 : 0),
5448           CGF.VoidPtrTy);
5449     }
5450   }
5451   return std::make_pair(NumOfElements, DependenciesArray);
5452 }
5453 
5454 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5455                                         SourceLocation Loc) {
5456   ASTContext &C = CGM.getContext();
5457   QualType FlagsTy;
5458   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5459   LValue Base = CGF.EmitLoadOfPointerLValue(
5460       DepobjLVal.getAddress(CGF),
5461       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5462   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5463   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5464       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5465   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5466       Addr.getPointer(),
5467       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5468   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5469                                                                CGF.VoidPtrTy);
5470   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5471   // Use default allocator.
5472   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5473   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5474 
5475   // _kmpc_free(gtid, addr, nullptr);
5476   (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args);
5477 }
5478 
5479 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5480                                        OpenMPDependClauseKind NewDepKind,
5481                                        SourceLocation Loc) {
5482   ASTContext &C = CGM.getContext();
5483   QualType FlagsTy;
5484   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5485   RecordDecl *KmpDependInfoRD =
5486       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5487   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5488   llvm::Value *NumDeps;
5489   LValue Base;
5490   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5491 
5492   Address Begin = Base.getAddress(CGF);
5493   // Cast from pointer to array type to pointer to single element.
5494   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5495   // The basic structure here is a while-do loop.
5496   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5497   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5498   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5499   CGF.EmitBlock(BodyBB);
5500   llvm::PHINode *ElementPHI =
5501       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5502   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5503   Begin = Address(ElementPHI, Begin.getAlignment());
5504   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5505                             Base.getTBAAInfo());
5506   // deps[i].flags = NewDepKind;
5507   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5508   LValue FlagsLVal = CGF.EmitLValueForField(
5509       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5510   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5511                         FlagsLVal);
5512 
5513   // Shift the address forward by one element.
5514   Address ElementNext =
5515       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5516   ElementPHI->addIncoming(ElementNext.getPointer(),
5517                           CGF.Builder.GetInsertBlock());
5518   llvm::Value *IsEmpty =
5519       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5520   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5521   // Done.
5522   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5523 }
5524 
5525 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5526                                    const OMPExecutableDirective &D,
5527                                    llvm::Function *TaskFunction,
5528                                    QualType SharedsTy, Address Shareds,
5529                                    const Expr *IfCond,
5530                                    const OMPTaskDataTy &Data) {
5531   if (!CGF.HaveInsertPoint())
5532     return;
5533 
5534   TaskResultTy Result =
5535       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5536   llvm::Value *NewTask = Result.NewTask;
5537   llvm::Function *TaskEntry = Result.TaskEntry;
5538   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5539   LValue TDBase = Result.TDBase;
5540   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5541   // Process list of dependences.
5542   Address DependenciesArray = Address::invalid();
5543   llvm::Value *NumOfElements;
5544   std::tie(NumOfElements, DependenciesArray) =
5545       emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc);
5546 
5547   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5548   // libcall.
5549   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5550   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5551   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5552   // list is not empty
5553   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5554   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5555   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5556   llvm::Value *DepTaskArgs[7];
5557   if (!Data.Dependences.empty()) {
5558     DepTaskArgs[0] = UpLoc;
5559     DepTaskArgs[1] = ThreadID;
5560     DepTaskArgs[2] = NewTask;
5561     DepTaskArgs[3] = NumOfElements;
5562     DepTaskArgs[4] = DependenciesArray.getPointer();
5563     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5564     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5565   }
5566   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5567                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5568     if (!Data.Tied) {
5569       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5570       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5571       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5572     }
5573     if (!Data.Dependences.empty()) {
5574       CGF.EmitRuntimeCall(
5575           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5576     } else {
5577       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5578                           TaskArgs);
5579     }
5580     // Check if parent region is untied and build return for untied task;
5581     if (auto *Region =
5582             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5583       Region->emitUntiedSwitch(CGF);
5584   };
5585 
5586   llvm::Value *DepWaitTaskArgs[6];
5587   if (!Data.Dependences.empty()) {
5588     DepWaitTaskArgs[0] = UpLoc;
5589     DepWaitTaskArgs[1] = ThreadID;
5590     DepWaitTaskArgs[2] = NumOfElements;
5591     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5592     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5593     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5594   }
5595   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5596                         &Data, &DepWaitTaskArgs,
5597                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5598     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5599     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5600     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5601     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5602     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5603     // is specified.
5604     if (!Data.Dependences.empty())
5605       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5606                           DepWaitTaskArgs);
5607     // Call proxy_task_entry(gtid, new_task);
5608     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5609                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5610       Action.Enter(CGF);
5611       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5612       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5613                                                           OutlinedFnArgs);
5614     };
5615 
5616     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5617     // kmp_task_t *new_task);
5618     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5619     // kmp_task_t *new_task);
5620     RegionCodeGenTy RCG(CodeGen);
5621     CommonActionTy Action(
5622         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5623         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5624     RCG.setAction(Action);
5625     RCG(CGF);
5626   };
5627 
5628   if (IfCond) {
5629     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5630   } else {
5631     RegionCodeGenTy ThenRCG(ThenCodeGen);
5632     ThenRCG(CGF);
5633   }
5634 }
5635 
5636 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5637                                        const OMPLoopDirective &D,
5638                                        llvm::Function *TaskFunction,
5639                                        QualType SharedsTy, Address Shareds,
5640                                        const Expr *IfCond,
5641                                        const OMPTaskDataTy &Data) {
5642   if (!CGF.HaveInsertPoint())
5643     return;
5644   TaskResultTy Result =
5645       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5646   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5647   // libcall.
5648   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5649   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5650   // sched, kmp_uint64 grainsize, void *task_dup);
5651   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5652   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5653   llvm::Value *IfVal;
5654   if (IfCond) {
5655     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5656                                       /*isSigned=*/true);
5657   } else {
5658     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5659   }
5660 
5661   LValue LBLVal = CGF.EmitLValueForField(
5662       Result.TDBase,
5663       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5664   const auto *LBVar =
5665       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5666   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5667                        LBLVal.getQuals(),
5668                        /*IsInitializer=*/true);
5669   LValue UBLVal = CGF.EmitLValueForField(
5670       Result.TDBase,
5671       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5672   const auto *UBVar =
5673       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5674   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5675                        UBLVal.getQuals(),
5676                        /*IsInitializer=*/true);
5677   LValue StLVal = CGF.EmitLValueForField(
5678       Result.TDBase,
5679       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5680   const auto *StVar =
5681       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5682   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5683                        StLVal.getQuals(),
5684                        /*IsInitializer=*/true);
5685   // Store reductions address.
5686   LValue RedLVal = CGF.EmitLValueForField(
5687       Result.TDBase,
5688       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5689   if (Data.Reductions) {
5690     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5691   } else {
5692     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5693                                CGF.getContext().VoidPtrTy);
5694   }
5695   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5696   llvm::Value *TaskArgs[] = {
5697       UpLoc,
5698       ThreadID,
5699       Result.NewTask,
5700       IfVal,
5701       LBLVal.getPointer(CGF),
5702       UBLVal.getPointer(CGF),
5703       CGF.EmitLoadOfScalar(StLVal, Loc),
5704       llvm::ConstantInt::getSigned(
5705           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5706       llvm::ConstantInt::getSigned(
5707           CGF.IntTy, Data.Schedule.getPointer()
5708                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5709                          : NoSchedule),
5710       Data.Schedule.getPointer()
5711           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5712                                       /*isSigned=*/false)
5713           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5714       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5715                              Result.TaskDupFn, CGF.VoidPtrTy)
5716                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5717   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5718 }
5719 
5720 /// Emit reduction operation for each element of array (required for
5721 /// array sections) LHS op = RHS.
5722 /// \param Type Type of array.
5723 /// \param LHSVar Variable on the left side of the reduction operation
5724 /// (references element of array in original variable).
5725 /// \param RHSVar Variable on the right side of the reduction operation
5726 /// (references element of array in original variable).
5727 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5728 /// RHSVar.
5729 static void EmitOMPAggregateReduction(
5730     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5731     const VarDecl *RHSVar,
5732     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5733                                   const Expr *, const Expr *)> &RedOpGen,
5734     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5735     const Expr *UpExpr = nullptr) {
5736   // Perform element-by-element initialization.
5737   QualType ElementTy;
5738   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5739   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5740 
5741   // Drill down to the base element type on both arrays.
5742   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5743   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5744 
5745   llvm::Value *RHSBegin = RHSAddr.getPointer();
5746   llvm::Value *LHSBegin = LHSAddr.getPointer();
5747   // Cast from pointer to array type to pointer to single element.
5748   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5749   // The basic structure here is a while-do loop.
5750   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5751   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5752   llvm::Value *IsEmpty =
5753       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5754   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5755 
5756   // Enter the loop body, making that address the current address.
5757   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5758   CGF.EmitBlock(BodyBB);
5759 
5760   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5761 
5762   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5763       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5764   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5765   Address RHSElementCurrent =
5766       Address(RHSElementPHI,
5767               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5768 
5769   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5770       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5771   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5772   Address LHSElementCurrent =
5773       Address(LHSElementPHI,
5774               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5775 
5776   // Emit copy.
5777   CodeGenFunction::OMPPrivateScope Scope(CGF);
5778   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5779   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5780   Scope.Privatize();
5781   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5782   Scope.ForceCleanup();
5783 
5784   // Shift the address forward by one element.
5785   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5786       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5787   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5788       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5789   // Check whether we've reached the end.
5790   llvm::Value *Done =
5791       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5792   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5793   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5794   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5795 
5796   // Done.
5797   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5798 }
5799 
5800 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5801 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5802 /// UDR combiner function.
5803 static void emitReductionCombiner(CodeGenFunction &CGF,
5804                                   const Expr *ReductionOp) {
5805   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5806     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5807       if (const auto *DRE =
5808               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5809         if (const auto *DRD =
5810                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5811           std::pair<llvm::Function *, llvm::Function *> Reduction =
5812               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5813           RValue Func = RValue::get(Reduction.first);
5814           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5815           CGF.EmitIgnoredExpr(ReductionOp);
5816           return;
5817         }
5818   CGF.EmitIgnoredExpr(ReductionOp);
5819 }
5820 
5821 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5822     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5823     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5824     ArrayRef<const Expr *> ReductionOps) {
5825   ASTContext &C = CGM.getContext();
5826 
5827   // void reduction_func(void *LHSArg, void *RHSArg);
5828   FunctionArgList Args;
5829   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5830                            ImplicitParamDecl::Other);
5831   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5832                            ImplicitParamDecl::Other);
5833   Args.push_back(&LHSArg);
5834   Args.push_back(&RHSArg);
5835   const auto &CGFI =
5836       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5837   std::string Name = getName({"omp", "reduction", "reduction_func"});
5838   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5839                                     llvm::GlobalValue::InternalLinkage, Name,
5840                                     &CGM.getModule());
5841   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5842   Fn->setDoesNotRecurse();
5843   CodeGenFunction CGF(CGM);
5844   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5845 
5846   // Dst = (void*[n])(LHSArg);
5847   // Src = (void*[n])(RHSArg);
5848   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5849       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5850       ArgsType), CGF.getPointerAlign());
5851   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5852       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5853       ArgsType), CGF.getPointerAlign());
5854 
5855   //  ...
5856   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5857   //  ...
5858   CodeGenFunction::OMPPrivateScope Scope(CGF);
5859   auto IPriv = Privates.begin();
5860   unsigned Idx = 0;
5861   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5862     const auto *RHSVar =
5863         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5864     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5865       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5866     });
5867     const auto *LHSVar =
5868         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5869     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5870       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5871     });
5872     QualType PrivTy = (*IPriv)->getType();
5873     if (PrivTy->isVariablyModifiedType()) {
5874       // Get array size and emit VLA type.
5875       ++Idx;
5876       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5877       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5878       const VariableArrayType *VLA =
5879           CGF.getContext().getAsVariableArrayType(PrivTy);
5880       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5881       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5882           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5883       CGF.EmitVariablyModifiedType(PrivTy);
5884     }
5885   }
5886   Scope.Privatize();
5887   IPriv = Privates.begin();
5888   auto ILHS = LHSExprs.begin();
5889   auto IRHS = RHSExprs.begin();
5890   for (const Expr *E : ReductionOps) {
5891     if ((*IPriv)->getType()->isArrayType()) {
5892       // Emit reduction for array section.
5893       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5894       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5895       EmitOMPAggregateReduction(
5896           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5897           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5898             emitReductionCombiner(CGF, E);
5899           });
5900     } else {
5901       // Emit reduction for array subscript or single variable.
5902       emitReductionCombiner(CGF, E);
5903     }
5904     ++IPriv;
5905     ++ILHS;
5906     ++IRHS;
5907   }
5908   Scope.ForceCleanup();
5909   CGF.FinishFunction();
5910   return Fn;
5911 }
5912 
5913 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5914                                                   const Expr *ReductionOp,
5915                                                   const Expr *PrivateRef,
5916                                                   const DeclRefExpr *LHS,
5917                                                   const DeclRefExpr *RHS) {
5918   if (PrivateRef->getType()->isArrayType()) {
5919     // Emit reduction for array section.
5920     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5921     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5922     EmitOMPAggregateReduction(
5923         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5924         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5925           emitReductionCombiner(CGF, ReductionOp);
5926         });
5927   } else {
5928     // Emit reduction for array subscript or single variable.
5929     emitReductionCombiner(CGF, ReductionOp);
5930   }
5931 }
5932 
5933 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5934                                     ArrayRef<const Expr *> Privates,
5935                                     ArrayRef<const Expr *> LHSExprs,
5936                                     ArrayRef<const Expr *> RHSExprs,
5937                                     ArrayRef<const Expr *> ReductionOps,
5938                                     ReductionOptionsTy Options) {
5939   if (!CGF.HaveInsertPoint())
5940     return;
5941 
5942   bool WithNowait = Options.WithNowait;
5943   bool SimpleReduction = Options.SimpleReduction;
5944 
5945   // Next code should be emitted for reduction:
5946   //
5947   // static kmp_critical_name lock = { 0 };
5948   //
5949   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5950   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5951   //  ...
5952   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5953   //  *(Type<n>-1*)rhs[<n>-1]);
5954   // }
5955   //
5956   // ...
5957   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5958   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5959   // RedList, reduce_func, &<lock>)) {
5960   // case 1:
5961   //  ...
5962   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5963   //  ...
5964   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5965   // break;
5966   // case 2:
5967   //  ...
5968   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5969   //  ...
5970   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5971   // break;
5972   // default:;
5973   // }
5974   //
5975   // if SimpleReduction is true, only the next code is generated:
5976   //  ...
5977   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5978   //  ...
5979 
5980   ASTContext &C = CGM.getContext();
5981 
5982   if (SimpleReduction) {
5983     CodeGenFunction::RunCleanupsScope Scope(CGF);
5984     auto IPriv = Privates.begin();
5985     auto ILHS = LHSExprs.begin();
5986     auto IRHS = RHSExprs.begin();
5987     for (const Expr *E : ReductionOps) {
5988       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5989                                   cast<DeclRefExpr>(*IRHS));
5990       ++IPriv;
5991       ++ILHS;
5992       ++IRHS;
5993     }
5994     return;
5995   }
5996 
5997   // 1. Build a list of reduction variables.
5998   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5999   auto Size = RHSExprs.size();
6000   for (const Expr *E : Privates) {
6001     if (E->getType()->isVariablyModifiedType())
6002       // Reserve place for array size.
6003       ++Size;
6004   }
6005   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
6006   QualType ReductionArrayTy =
6007       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
6008                              /*IndexTypeQuals=*/0);
6009   Address ReductionList =
6010       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
6011   auto IPriv = Privates.begin();
6012   unsigned Idx = 0;
6013   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
6014     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6015     CGF.Builder.CreateStore(
6016         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6017             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
6018         Elem);
6019     if ((*IPriv)->getType()->isVariablyModifiedType()) {
6020       // Store array size.
6021       ++Idx;
6022       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
6023       llvm::Value *Size = CGF.Builder.CreateIntCast(
6024           CGF.getVLASize(
6025                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
6026               .NumElts,
6027           CGF.SizeTy, /*isSigned=*/false);
6028       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
6029                               Elem);
6030     }
6031   }
6032 
6033   // 2. Emit reduce_func().
6034   llvm::Function *ReductionFn = emitReductionFunction(
6035       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
6036       LHSExprs, RHSExprs, ReductionOps);
6037 
6038   // 3. Create static kmp_critical_name lock = { 0 };
6039   std::string Name = getName({"reduction"});
6040   llvm::Value *Lock = getCriticalRegionLock(Name);
6041 
6042   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
6043   // RedList, reduce_func, &<lock>);
6044   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
6045   llvm::Value *ThreadId = getThreadID(CGF, Loc);
6046   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
6047   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6048       ReductionList.getPointer(), CGF.VoidPtrTy);
6049   llvm::Value *Args[] = {
6050       IdentTLoc,                             // ident_t *<loc>
6051       ThreadId,                              // i32 <gtid>
6052       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
6053       ReductionArrayTySize,                  // size_type sizeof(RedList)
6054       RL,                                    // void *RedList
6055       ReductionFn, // void (*) (void *, void *) <reduce_func>
6056       Lock         // kmp_critical_name *&<lock>
6057   };
6058   llvm::Value *Res = CGF.EmitRuntimeCall(
6059       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
6060                                        : OMPRTL__kmpc_reduce),
6061       Args);
6062 
6063   // 5. Build switch(res)
6064   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
6065   llvm::SwitchInst *SwInst =
6066       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
6067 
6068   // 6. Build case 1:
6069   //  ...
6070   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
6071   //  ...
6072   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6073   // break;
6074   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
6075   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
6076   CGF.EmitBlock(Case1BB);
6077 
6078   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
6079   llvm::Value *EndArgs[] = {
6080       IdentTLoc, // ident_t *<loc>
6081       ThreadId,  // i32 <gtid>
6082       Lock       // kmp_critical_name *&<lock>
6083   };
6084   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
6085                        CodeGenFunction &CGF, PrePostActionTy &Action) {
6086     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6087     auto IPriv = Privates.begin();
6088     auto ILHS = LHSExprs.begin();
6089     auto IRHS = RHSExprs.begin();
6090     for (const Expr *E : ReductionOps) {
6091       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
6092                                      cast<DeclRefExpr>(*IRHS));
6093       ++IPriv;
6094       ++ILHS;
6095       ++IRHS;
6096     }
6097   };
6098   RegionCodeGenTy RCG(CodeGen);
6099   CommonActionTy Action(
6100       nullptr, llvm::None,
6101       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
6102                                        : OMPRTL__kmpc_end_reduce),
6103       EndArgs);
6104   RCG.setAction(Action);
6105   RCG(CGF);
6106 
6107   CGF.EmitBranch(DefaultBB);
6108 
6109   // 7. Build case 2:
6110   //  ...
6111   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
6112   //  ...
6113   // break;
6114   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
6115   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
6116   CGF.EmitBlock(Case2BB);
6117 
6118   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
6119                              CodeGenFunction &CGF, PrePostActionTy &Action) {
6120     auto ILHS = LHSExprs.begin();
6121     auto IRHS = RHSExprs.begin();
6122     auto IPriv = Privates.begin();
6123     for (const Expr *E : ReductionOps) {
6124       const Expr *XExpr = nullptr;
6125       const Expr *EExpr = nullptr;
6126       const Expr *UpExpr = nullptr;
6127       BinaryOperatorKind BO = BO_Comma;
6128       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
6129         if (BO->getOpcode() == BO_Assign) {
6130           XExpr = BO->getLHS();
6131           UpExpr = BO->getRHS();
6132         }
6133       }
6134       // Try to emit update expression as a simple atomic.
6135       const Expr *RHSExpr = UpExpr;
6136       if (RHSExpr) {
6137         // Analyze RHS part of the whole expression.
6138         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
6139                 RHSExpr->IgnoreParenImpCasts())) {
6140           // If this is a conditional operator, analyze its condition for
6141           // min/max reduction operator.
6142           RHSExpr = ACO->getCond();
6143         }
6144         if (const auto *BORHS =
6145                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
6146           EExpr = BORHS->getRHS();
6147           BO = BORHS->getOpcode();
6148         }
6149       }
6150       if (XExpr) {
6151         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6152         auto &&AtomicRedGen = [BO, VD,
6153                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
6154                                     const Expr *EExpr, const Expr *UpExpr) {
6155           LValue X = CGF.EmitLValue(XExpr);
6156           RValue E;
6157           if (EExpr)
6158             E = CGF.EmitAnyExpr(EExpr);
6159           CGF.EmitOMPAtomicSimpleUpdateExpr(
6160               X, E, BO, /*IsXLHSInRHSPart=*/true,
6161               llvm::AtomicOrdering::Monotonic, Loc,
6162               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
6163                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6164                 PrivateScope.addPrivate(
6165                     VD, [&CGF, VD, XRValue, Loc]() {
6166                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
6167                       CGF.emitOMPSimpleStore(
6168                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
6169                           VD->getType().getNonReferenceType(), Loc);
6170                       return LHSTemp;
6171                     });
6172                 (void)PrivateScope.Privatize();
6173                 return CGF.EmitAnyExpr(UpExpr);
6174               });
6175         };
6176         if ((*IPriv)->getType()->isArrayType()) {
6177           // Emit atomic reduction for array section.
6178           const auto *RHSVar =
6179               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6180           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
6181                                     AtomicRedGen, XExpr, EExpr, UpExpr);
6182         } else {
6183           // Emit atomic reduction for array subscript or single variable.
6184           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
6185         }
6186       } else {
6187         // Emit as a critical region.
6188         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
6189                                            const Expr *, const Expr *) {
6190           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6191           std::string Name = RT.getName({"atomic_reduction"});
6192           RT.emitCriticalRegion(
6193               CGF, Name,
6194               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
6195                 Action.Enter(CGF);
6196                 emitReductionCombiner(CGF, E);
6197               },
6198               Loc);
6199         };
6200         if ((*IPriv)->getType()->isArrayType()) {
6201           const auto *LHSVar =
6202               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
6203           const auto *RHSVar =
6204               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6205           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6206                                     CritRedGen);
6207         } else {
6208           CritRedGen(CGF, nullptr, nullptr, nullptr);
6209         }
6210       }
6211       ++ILHS;
6212       ++IRHS;
6213       ++IPriv;
6214     }
6215   };
6216   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6217   if (!WithNowait) {
6218     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6219     llvm::Value *EndArgs[] = {
6220         IdentTLoc, // ident_t *<loc>
6221         ThreadId,  // i32 <gtid>
6222         Lock       // kmp_critical_name *&<lock>
6223     };
6224     CommonActionTy Action(nullptr, llvm::None,
6225                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6226                           EndArgs);
6227     AtomicRCG.setAction(Action);
6228     AtomicRCG(CGF);
6229   } else {
6230     AtomicRCG(CGF);
6231   }
6232 
6233   CGF.EmitBranch(DefaultBB);
6234   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6235 }
6236 
6237 /// Generates unique name for artificial threadprivate variables.
6238 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6239 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6240                                       const Expr *Ref) {
6241   SmallString<256> Buffer;
6242   llvm::raw_svector_ostream Out(Buffer);
6243   const clang::DeclRefExpr *DE;
6244   const VarDecl *D = ::getBaseDecl(Ref, DE);
6245   if (!D)
6246     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6247   D = D->getCanonicalDecl();
6248   std::string Name = CGM.getOpenMPRuntime().getName(
6249       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6250   Out << Prefix << Name << "_"
6251       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6252   return std::string(Out.str());
6253 }
6254 
6255 /// Emits reduction initializer function:
6256 /// \code
6257 /// void @.red_init(void* %arg) {
6258 /// %0 = bitcast void* %arg to <type>*
6259 /// store <type> <init>, <type>* %0
6260 /// ret void
6261 /// }
6262 /// \endcode
6263 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6264                                            SourceLocation Loc,
6265                                            ReductionCodeGen &RCG, unsigned N) {
6266   ASTContext &C = CGM.getContext();
6267   FunctionArgList Args;
6268   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6269                           ImplicitParamDecl::Other);
6270   Args.emplace_back(&Param);
6271   const auto &FnInfo =
6272       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6273   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6274   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6275   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6276                                     Name, &CGM.getModule());
6277   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6278   Fn->setDoesNotRecurse();
6279   CodeGenFunction CGF(CGM);
6280   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6281   Address PrivateAddr = CGF.EmitLoadOfPointer(
6282       CGF.GetAddrOfLocalVar(&Param),
6283       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6284   llvm::Value *Size = nullptr;
6285   // If the size of the reduction item is non-constant, load it from global
6286   // threadprivate variable.
6287   if (RCG.getSizes(N).second) {
6288     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6289         CGF, CGM.getContext().getSizeType(),
6290         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6291     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6292                                 CGM.getContext().getSizeType(), Loc);
6293   }
6294   RCG.emitAggregateType(CGF, N, Size);
6295   LValue SharedLVal;
6296   // If initializer uses initializer from declare reduction construct, emit a
6297   // pointer to the address of the original reduction item (reuired by reduction
6298   // initializer)
6299   if (RCG.usesReductionInitializer(N)) {
6300     Address SharedAddr =
6301         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6302             CGF, CGM.getContext().VoidPtrTy,
6303             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6304     SharedAddr = CGF.EmitLoadOfPointer(
6305         SharedAddr,
6306         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6307     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6308   } else {
6309     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6310         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6311         CGM.getContext().VoidPtrTy);
6312   }
6313   // Emit the initializer:
6314   // %0 = bitcast void* %arg to <type>*
6315   // store <type> <init>, <type>* %0
6316   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6317                          [](CodeGenFunction &) { return false; });
6318   CGF.FinishFunction();
6319   return Fn;
6320 }
6321 
6322 /// Emits reduction combiner function:
6323 /// \code
6324 /// void @.red_comb(void* %arg0, void* %arg1) {
6325 /// %lhs = bitcast void* %arg0 to <type>*
6326 /// %rhs = bitcast void* %arg1 to <type>*
6327 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6328 /// store <type> %2, <type>* %lhs
6329 /// ret void
6330 /// }
6331 /// \endcode
6332 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6333                                            SourceLocation Loc,
6334                                            ReductionCodeGen &RCG, unsigned N,
6335                                            const Expr *ReductionOp,
6336                                            const Expr *LHS, const Expr *RHS,
6337                                            const Expr *PrivateRef) {
6338   ASTContext &C = CGM.getContext();
6339   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6340   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6341   FunctionArgList Args;
6342   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6343                                C.VoidPtrTy, ImplicitParamDecl::Other);
6344   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6345                             ImplicitParamDecl::Other);
6346   Args.emplace_back(&ParamInOut);
6347   Args.emplace_back(&ParamIn);
6348   const auto &FnInfo =
6349       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6350   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6351   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6352   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6353                                     Name, &CGM.getModule());
6354   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6355   Fn->setDoesNotRecurse();
6356   CodeGenFunction CGF(CGM);
6357   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6358   llvm::Value *Size = nullptr;
6359   // If the size of the reduction item is non-constant, load it from global
6360   // threadprivate variable.
6361   if (RCG.getSizes(N).second) {
6362     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6363         CGF, CGM.getContext().getSizeType(),
6364         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6365     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6366                                 CGM.getContext().getSizeType(), Loc);
6367   }
6368   RCG.emitAggregateType(CGF, N, Size);
6369   // Remap lhs and rhs variables to the addresses of the function arguments.
6370   // %lhs = bitcast void* %arg0 to <type>*
6371   // %rhs = bitcast void* %arg1 to <type>*
6372   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6373   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6374     // Pull out the pointer to the variable.
6375     Address PtrAddr = CGF.EmitLoadOfPointer(
6376         CGF.GetAddrOfLocalVar(&ParamInOut),
6377         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6378     return CGF.Builder.CreateElementBitCast(
6379         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6380   });
6381   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6382     // Pull out the pointer to the variable.
6383     Address PtrAddr = CGF.EmitLoadOfPointer(
6384         CGF.GetAddrOfLocalVar(&ParamIn),
6385         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6386     return CGF.Builder.CreateElementBitCast(
6387         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6388   });
6389   PrivateScope.Privatize();
6390   // Emit the combiner body:
6391   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6392   // store <type> %2, <type>* %lhs
6393   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6394       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6395       cast<DeclRefExpr>(RHS));
6396   CGF.FinishFunction();
6397   return Fn;
6398 }
6399 
6400 /// Emits reduction finalizer function:
6401 /// \code
6402 /// void @.red_fini(void* %arg) {
6403 /// %0 = bitcast void* %arg to <type>*
6404 /// <destroy>(<type>* %0)
6405 /// ret void
6406 /// }
6407 /// \endcode
6408 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6409                                            SourceLocation Loc,
6410                                            ReductionCodeGen &RCG, unsigned N) {
6411   if (!RCG.needCleanups(N))
6412     return nullptr;
6413   ASTContext &C = CGM.getContext();
6414   FunctionArgList Args;
6415   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6416                           ImplicitParamDecl::Other);
6417   Args.emplace_back(&Param);
6418   const auto &FnInfo =
6419       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6420   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6421   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6422   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6423                                     Name, &CGM.getModule());
6424   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6425   Fn->setDoesNotRecurse();
6426   CodeGenFunction CGF(CGM);
6427   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6428   Address PrivateAddr = CGF.EmitLoadOfPointer(
6429       CGF.GetAddrOfLocalVar(&Param),
6430       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6431   llvm::Value *Size = nullptr;
6432   // If the size of the reduction item is non-constant, load it from global
6433   // threadprivate variable.
6434   if (RCG.getSizes(N).second) {
6435     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6436         CGF, CGM.getContext().getSizeType(),
6437         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6438     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6439                                 CGM.getContext().getSizeType(), Loc);
6440   }
6441   RCG.emitAggregateType(CGF, N, Size);
6442   // Emit the finalizer body:
6443   // <destroy>(<type>* %0)
6444   RCG.emitCleanups(CGF, N, PrivateAddr);
6445   CGF.FinishFunction(Loc);
6446   return Fn;
6447 }
6448 
6449 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6450     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6451     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6452   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6453     return nullptr;
6454 
6455   // Build typedef struct:
6456   // kmp_task_red_input {
6457   //   void *reduce_shar; // shared reduction item
6458   //   size_t reduce_size; // size of data item
6459   //   void *reduce_init; // data initialization routine
6460   //   void *reduce_fini; // data finalization routine
6461   //   void *reduce_comb; // data combiner routine
6462   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6463   // } kmp_task_red_input_t;
6464   ASTContext &C = CGM.getContext();
6465   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6466   RD->startDefinition();
6467   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6468   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6469   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6470   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6471   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6472   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6473       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6474   RD->completeDefinition();
6475   QualType RDType = C.getRecordType(RD);
6476   unsigned Size = Data.ReductionVars.size();
6477   llvm::APInt ArraySize(/*numBits=*/64, Size);
6478   QualType ArrayRDType = C.getConstantArrayType(
6479       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6480   // kmp_task_red_input_t .rd_input.[Size];
6481   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6482   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6483                        Data.ReductionOps);
6484   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6485     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6486     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6487                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6488     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6489         TaskRedInput.getPointer(), Idxs,
6490         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6491         ".rd_input.gep.");
6492     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6493     // ElemLVal.reduce_shar = &Shareds[Cnt];
6494     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6495     RCG.emitSharedLValue(CGF, Cnt);
6496     llvm::Value *CastedShared =
6497         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6498     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6499     RCG.emitAggregateType(CGF, Cnt);
6500     llvm::Value *SizeValInChars;
6501     llvm::Value *SizeVal;
6502     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6503     // We use delayed creation/initialization for VLAs, array sections and
6504     // custom reduction initializations. It is required because runtime does not
6505     // provide the way to pass the sizes of VLAs/array sections to
6506     // initializer/combiner/finalizer functions and does not pass the pointer to
6507     // original reduction item to the initializer. Instead threadprivate global
6508     // variables are used to store these values and use them in the functions.
6509     bool DelayedCreation = !!SizeVal;
6510     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6511                                                /*isSigned=*/false);
6512     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6513     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6514     // ElemLVal.reduce_init = init;
6515     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6516     llvm::Value *InitAddr =
6517         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6518     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6519     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6520     // ElemLVal.reduce_fini = fini;
6521     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6522     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6523     llvm::Value *FiniAddr = Fini
6524                                 ? CGF.EmitCastToVoidPtr(Fini)
6525                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6526     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6527     // ElemLVal.reduce_comb = comb;
6528     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6529     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6530         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6531         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6532     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6533     // ElemLVal.flags = 0;
6534     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6535     if (DelayedCreation) {
6536       CGF.EmitStoreOfScalar(
6537           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6538           FlagsLVal);
6539     } else
6540       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6541                                  FlagsLVal.getType());
6542   }
6543   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6544   // *data);
6545   llvm::Value *Args[] = {
6546       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6547                                 /*isSigned=*/true),
6548       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6549       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6550                                                       CGM.VoidPtrTy)};
6551   return CGF.EmitRuntimeCall(
6552       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6553 }
6554 
6555 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6556                                               SourceLocation Loc,
6557                                               ReductionCodeGen &RCG,
6558                                               unsigned N) {
6559   auto Sizes = RCG.getSizes(N);
6560   // Emit threadprivate global variable if the type is non-constant
6561   // (Sizes.second = nullptr).
6562   if (Sizes.second) {
6563     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6564                                                      /*isSigned=*/false);
6565     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6566         CGF, CGM.getContext().getSizeType(),
6567         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6568     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6569   }
6570   // Store address of the original reduction item if custom initializer is used.
6571   if (RCG.usesReductionInitializer(N)) {
6572     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6573         CGF, CGM.getContext().VoidPtrTy,
6574         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6575     CGF.Builder.CreateStore(
6576         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6577             RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6578         SharedAddr, /*IsVolatile=*/false);
6579   }
6580 }
6581 
6582 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6583                                               SourceLocation Loc,
6584                                               llvm::Value *ReductionsPtr,
6585                                               LValue SharedLVal) {
6586   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6587   // *d);
6588   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6589                                                    CGM.IntTy,
6590                                                    /*isSigned=*/true),
6591                          ReductionsPtr,
6592                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6593                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6594   return Address(
6595       CGF.EmitRuntimeCall(
6596           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6597       SharedLVal.getAlignment());
6598 }
6599 
6600 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6601                                        SourceLocation Loc) {
6602   if (!CGF.HaveInsertPoint())
6603     return;
6604 
6605   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
6606   if (OMPBuilder) {
6607     OMPBuilder->CreateTaskwait(CGF.Builder);
6608   } else {
6609     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6610     // global_tid);
6611     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6612     // Ignore return result until untied tasks are supported.
6613     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6614   }
6615 
6616   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6617     Region->emitUntiedSwitch(CGF);
6618 }
6619 
6620 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6621                                            OpenMPDirectiveKind InnerKind,
6622                                            const RegionCodeGenTy &CodeGen,
6623                                            bool HasCancel) {
6624   if (!CGF.HaveInsertPoint())
6625     return;
6626   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6627   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6628 }
6629 
6630 namespace {
6631 enum RTCancelKind {
6632   CancelNoreq = 0,
6633   CancelParallel = 1,
6634   CancelLoop = 2,
6635   CancelSections = 3,
6636   CancelTaskgroup = 4
6637 };
6638 } // anonymous namespace
6639 
6640 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6641   RTCancelKind CancelKind = CancelNoreq;
6642   if (CancelRegion == OMPD_parallel)
6643     CancelKind = CancelParallel;
6644   else if (CancelRegion == OMPD_for)
6645     CancelKind = CancelLoop;
6646   else if (CancelRegion == OMPD_sections)
6647     CancelKind = CancelSections;
6648   else {
6649     assert(CancelRegion == OMPD_taskgroup);
6650     CancelKind = CancelTaskgroup;
6651   }
6652   return CancelKind;
6653 }
6654 
6655 void CGOpenMPRuntime::emitCancellationPointCall(
6656     CodeGenFunction &CGF, SourceLocation Loc,
6657     OpenMPDirectiveKind CancelRegion) {
6658   if (!CGF.HaveInsertPoint())
6659     return;
6660   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6661   // global_tid, kmp_int32 cncl_kind);
6662   if (auto *OMPRegionInfo =
6663           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6664     // For 'cancellation point taskgroup', the task region info may not have a
6665     // cancel. This may instead happen in another adjacent task.
6666     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6667       llvm::Value *Args[] = {
6668           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6669           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6670       // Ignore return result until untied tasks are supported.
6671       llvm::Value *Result = CGF.EmitRuntimeCall(
6672           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6673       // if (__kmpc_cancellationpoint()) {
6674       //   exit from construct;
6675       // }
6676       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6677       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6678       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6679       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6680       CGF.EmitBlock(ExitBB);
6681       // exit from construct;
6682       CodeGenFunction::JumpDest CancelDest =
6683           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6684       CGF.EmitBranchThroughCleanup(CancelDest);
6685       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6686     }
6687   }
6688 }
6689 
6690 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6691                                      const Expr *IfCond,
6692                                      OpenMPDirectiveKind CancelRegion) {
6693   if (!CGF.HaveInsertPoint())
6694     return;
6695   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6696   // kmp_int32 cncl_kind);
6697   if (auto *OMPRegionInfo =
6698           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6699     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6700                                                         PrePostActionTy &) {
6701       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6702       llvm::Value *Args[] = {
6703           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6704           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6705       // Ignore return result until untied tasks are supported.
6706       llvm::Value *Result = CGF.EmitRuntimeCall(
6707           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6708       // if (__kmpc_cancel()) {
6709       //   exit from construct;
6710       // }
6711       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6712       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6713       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6714       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6715       CGF.EmitBlock(ExitBB);
6716       // exit from construct;
6717       CodeGenFunction::JumpDest CancelDest =
6718           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6719       CGF.EmitBranchThroughCleanup(CancelDest);
6720       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6721     };
6722     if (IfCond) {
6723       emitIfClause(CGF, IfCond, ThenGen,
6724                    [](CodeGenFunction &, PrePostActionTy &) {});
6725     } else {
6726       RegionCodeGenTy ThenRCG(ThenGen);
6727       ThenRCG(CGF);
6728     }
6729   }
6730 }
6731 
6732 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6733     const OMPExecutableDirective &D, StringRef ParentName,
6734     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6735     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6736   assert(!ParentName.empty() && "Invalid target region parent name!");
6737   HasEmittedTargetRegion = true;
6738   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6739                                    IsOffloadEntry, CodeGen);
6740 }
6741 
6742 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6743     const OMPExecutableDirective &D, StringRef ParentName,
6744     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6745     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6746   // Create a unique name for the entry function using the source location
6747   // information of the current target region. The name will be something like:
6748   //
6749   // __omp_offloading_DD_FFFF_PP_lBB
6750   //
6751   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6752   // mangled name of the function that encloses the target region and BB is the
6753   // line number of the target region.
6754 
6755   unsigned DeviceID;
6756   unsigned FileID;
6757   unsigned Line;
6758   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6759                            Line);
6760   SmallString<64> EntryFnName;
6761   {
6762     llvm::raw_svector_ostream OS(EntryFnName);
6763     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6764        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6765   }
6766 
6767   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6768 
6769   CodeGenFunction CGF(CGM, true);
6770   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6771   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6772 
6773   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6774 
6775   // If this target outline function is not an offload entry, we don't need to
6776   // register it.
6777   if (!IsOffloadEntry)
6778     return;
6779 
6780   // The target region ID is used by the runtime library to identify the current
6781   // target region, so it only has to be unique and not necessarily point to
6782   // anything. It could be the pointer to the outlined function that implements
6783   // the target region, but we aren't using that so that the compiler doesn't
6784   // need to keep that, and could therefore inline the host function if proven
6785   // worthwhile during optimization. In the other hand, if emitting code for the
6786   // device, the ID has to be the function address so that it can retrieved from
6787   // the offloading entry and launched by the runtime library. We also mark the
6788   // outlined function to have external linkage in case we are emitting code for
6789   // the device, because these functions will be entry points to the device.
6790 
6791   if (CGM.getLangOpts().OpenMPIsDevice) {
6792     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6793     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6794     OutlinedFn->setDSOLocal(false);
6795   } else {
6796     std::string Name = getName({EntryFnName, "region_id"});
6797     OutlinedFnID = new llvm::GlobalVariable(
6798         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6799         llvm::GlobalValue::WeakAnyLinkage,
6800         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6801   }
6802 
6803   // Register the information for the entry associated with this target region.
6804   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6805       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6806       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6807 }
6808 
6809 /// Checks if the expression is constant or does not have non-trivial function
6810 /// calls.
6811 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6812   // We can skip constant expressions.
6813   // We can skip expressions with trivial calls or simple expressions.
6814   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6815           !E->hasNonTrivialCall(Ctx)) &&
6816          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6817 }
6818 
6819 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6820                                                     const Stmt *Body) {
6821   const Stmt *Child = Body->IgnoreContainers();
6822   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6823     Child = nullptr;
6824     for (const Stmt *S : C->body()) {
6825       if (const auto *E = dyn_cast<Expr>(S)) {
6826         if (isTrivial(Ctx, E))
6827           continue;
6828       }
6829       // Some of the statements can be ignored.
6830       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6831           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6832         continue;
6833       // Analyze declarations.
6834       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6835         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6836               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6837                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6838                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6839                   isa<UsingDirectiveDecl>(D) ||
6840                   isa<OMPDeclareReductionDecl>(D) ||
6841                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6842                 return true;
6843               const auto *VD = dyn_cast<VarDecl>(D);
6844               if (!VD)
6845                 return false;
6846               return VD->isConstexpr() ||
6847                      ((VD->getType().isTrivialType(Ctx) ||
6848                        VD->getType()->isReferenceType()) &&
6849                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6850             }))
6851           continue;
6852       }
6853       // Found multiple children - cannot get the one child only.
6854       if (Child)
6855         return nullptr;
6856       Child = S;
6857     }
6858     if (Child)
6859       Child = Child->IgnoreContainers();
6860   }
6861   return Child;
6862 }
6863 
6864 /// Emit the number of teams for a target directive.  Inspect the num_teams
6865 /// clause associated with a teams construct combined or closely nested
6866 /// with the target directive.
6867 ///
6868 /// Emit a team of size one for directives such as 'target parallel' that
6869 /// have no associated teams construct.
6870 ///
6871 /// Otherwise, return nullptr.
6872 static llvm::Value *
6873 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6874                                const OMPExecutableDirective &D) {
6875   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6876          "Clauses associated with the teams directive expected to be emitted "
6877          "only for the host!");
6878   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6879   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6880          "Expected target-based executable directive.");
6881   CGBuilderTy &Bld = CGF.Builder;
6882   switch (DirectiveKind) {
6883   case OMPD_target: {
6884     const auto *CS = D.getInnermostCapturedStmt();
6885     const auto *Body =
6886         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6887     const Stmt *ChildStmt =
6888         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6889     if (const auto *NestedDir =
6890             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6891       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6892         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6893           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6894           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6895           const Expr *NumTeams =
6896               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6897           llvm::Value *NumTeamsVal =
6898               CGF.EmitScalarExpr(NumTeams,
6899                                  /*IgnoreResultAssign*/ true);
6900           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6901                                    /*isSigned=*/true);
6902         }
6903         return Bld.getInt32(0);
6904       }
6905       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6906           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6907         return Bld.getInt32(1);
6908       return Bld.getInt32(0);
6909     }
6910     return nullptr;
6911   }
6912   case OMPD_target_teams:
6913   case OMPD_target_teams_distribute:
6914   case OMPD_target_teams_distribute_simd:
6915   case OMPD_target_teams_distribute_parallel_for:
6916   case OMPD_target_teams_distribute_parallel_for_simd: {
6917     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6918       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6919       const Expr *NumTeams =
6920           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6921       llvm::Value *NumTeamsVal =
6922           CGF.EmitScalarExpr(NumTeams,
6923                              /*IgnoreResultAssign*/ true);
6924       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6925                                /*isSigned=*/true);
6926     }
6927     return Bld.getInt32(0);
6928   }
6929   case OMPD_target_parallel:
6930   case OMPD_target_parallel_for:
6931   case OMPD_target_parallel_for_simd:
6932   case OMPD_target_simd:
6933     return Bld.getInt32(1);
6934   case OMPD_parallel:
6935   case OMPD_for:
6936   case OMPD_parallel_for:
6937   case OMPD_parallel_master:
6938   case OMPD_parallel_sections:
6939   case OMPD_for_simd:
6940   case OMPD_parallel_for_simd:
6941   case OMPD_cancel:
6942   case OMPD_cancellation_point:
6943   case OMPD_ordered:
6944   case OMPD_threadprivate:
6945   case OMPD_allocate:
6946   case OMPD_task:
6947   case OMPD_simd:
6948   case OMPD_sections:
6949   case OMPD_section:
6950   case OMPD_single:
6951   case OMPD_master:
6952   case OMPD_critical:
6953   case OMPD_taskyield:
6954   case OMPD_barrier:
6955   case OMPD_taskwait:
6956   case OMPD_taskgroup:
6957   case OMPD_atomic:
6958   case OMPD_flush:
6959   case OMPD_depobj:
6960   case OMPD_teams:
6961   case OMPD_target_data:
6962   case OMPD_target_exit_data:
6963   case OMPD_target_enter_data:
6964   case OMPD_distribute:
6965   case OMPD_distribute_simd:
6966   case OMPD_distribute_parallel_for:
6967   case OMPD_distribute_parallel_for_simd:
6968   case OMPD_teams_distribute:
6969   case OMPD_teams_distribute_simd:
6970   case OMPD_teams_distribute_parallel_for:
6971   case OMPD_teams_distribute_parallel_for_simd:
6972   case OMPD_target_update:
6973   case OMPD_declare_simd:
6974   case OMPD_declare_variant:
6975   case OMPD_declare_target:
6976   case OMPD_end_declare_target:
6977   case OMPD_declare_reduction:
6978   case OMPD_declare_mapper:
6979   case OMPD_taskloop:
6980   case OMPD_taskloop_simd:
6981   case OMPD_master_taskloop:
6982   case OMPD_master_taskloop_simd:
6983   case OMPD_parallel_master_taskloop:
6984   case OMPD_parallel_master_taskloop_simd:
6985   case OMPD_requires:
6986   case OMPD_unknown:
6987     break;
6988   }
6989   llvm_unreachable("Unexpected directive kind.");
6990 }
6991 
6992 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6993                                   llvm::Value *DefaultThreadLimitVal) {
6994   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6995       CGF.getContext(), CS->getCapturedStmt());
6996   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6997     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6998       llvm::Value *NumThreads = nullptr;
6999       llvm::Value *CondVal = nullptr;
7000       // Handle if clause. If if clause present, the number of threads is
7001       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7002       if (Dir->hasClausesOfKind<OMPIfClause>()) {
7003         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7004         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7005         const OMPIfClause *IfClause = nullptr;
7006         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
7007           if (C->getNameModifier() == OMPD_unknown ||
7008               C->getNameModifier() == OMPD_parallel) {
7009             IfClause = C;
7010             break;
7011           }
7012         }
7013         if (IfClause) {
7014           const Expr *Cond = IfClause->getCondition();
7015           bool Result;
7016           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7017             if (!Result)
7018               return CGF.Builder.getInt32(1);
7019           } else {
7020             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
7021             if (const auto *PreInit =
7022                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
7023               for (const auto *I : PreInit->decls()) {
7024                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7025                   CGF.EmitVarDecl(cast<VarDecl>(*I));
7026                 } else {
7027                   CodeGenFunction::AutoVarEmission Emission =
7028                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7029                   CGF.EmitAutoVarCleanups(Emission);
7030                 }
7031               }
7032             }
7033             CondVal = CGF.EvaluateExprAsBool(Cond);
7034           }
7035         }
7036       }
7037       // Check the value of num_threads clause iff if clause was not specified
7038       // or is not evaluated to false.
7039       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
7040         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7041         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7042         const auto *NumThreadsClause =
7043             Dir->getSingleClause<OMPNumThreadsClause>();
7044         CodeGenFunction::LexicalScope Scope(
7045             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
7046         if (const auto *PreInit =
7047                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
7048           for (const auto *I : PreInit->decls()) {
7049             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7050               CGF.EmitVarDecl(cast<VarDecl>(*I));
7051             } else {
7052               CodeGenFunction::AutoVarEmission Emission =
7053                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7054               CGF.EmitAutoVarCleanups(Emission);
7055             }
7056           }
7057         }
7058         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
7059         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
7060                                                /*isSigned=*/false);
7061         if (DefaultThreadLimitVal)
7062           NumThreads = CGF.Builder.CreateSelect(
7063               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
7064               DefaultThreadLimitVal, NumThreads);
7065       } else {
7066         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
7067                                            : CGF.Builder.getInt32(0);
7068       }
7069       // Process condition of the if clause.
7070       if (CondVal) {
7071         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
7072                                               CGF.Builder.getInt32(1));
7073       }
7074       return NumThreads;
7075     }
7076     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
7077       return CGF.Builder.getInt32(1);
7078     return DefaultThreadLimitVal;
7079   }
7080   return DefaultThreadLimitVal ? DefaultThreadLimitVal
7081                                : CGF.Builder.getInt32(0);
7082 }
7083 
7084 /// Emit the number of threads for a target directive.  Inspect the
7085 /// thread_limit clause associated with a teams construct combined or closely
7086 /// nested with the target directive.
7087 ///
7088 /// Emit the num_threads clause for directives such as 'target parallel' that
7089 /// have no associated teams construct.
7090 ///
7091 /// Otherwise, return nullptr.
7092 static llvm::Value *
7093 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
7094                                  const OMPExecutableDirective &D) {
7095   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7096          "Clauses associated with the teams directive expected to be emitted "
7097          "only for the host!");
7098   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7099   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7100          "Expected target-based executable directive.");
7101   CGBuilderTy &Bld = CGF.Builder;
7102   llvm::Value *ThreadLimitVal = nullptr;
7103   llvm::Value *NumThreadsVal = nullptr;
7104   switch (DirectiveKind) {
7105   case OMPD_target: {
7106     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7107     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7108       return NumThreads;
7109     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7110         CGF.getContext(), CS->getCapturedStmt());
7111     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7112       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7113         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7114         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7115         const auto *ThreadLimitClause =
7116             Dir->getSingleClause<OMPThreadLimitClause>();
7117         CodeGenFunction::LexicalScope Scope(
7118             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7119         if (const auto *PreInit =
7120                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7121           for (const auto *I : PreInit->decls()) {
7122             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7123               CGF.EmitVarDecl(cast<VarDecl>(*I));
7124             } else {
7125               CodeGenFunction::AutoVarEmission Emission =
7126                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7127               CGF.EmitAutoVarCleanups(Emission);
7128             }
7129           }
7130         }
7131         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7132             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7133         ThreadLimitVal =
7134             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7135       }
7136       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7137           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7138         CS = Dir->getInnermostCapturedStmt();
7139         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7140             CGF.getContext(), CS->getCapturedStmt());
7141         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7142       }
7143       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7144           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7145         CS = Dir->getInnermostCapturedStmt();
7146         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7147           return NumThreads;
7148       }
7149       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7150         return Bld.getInt32(1);
7151     }
7152     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7153   }
7154   case OMPD_target_teams: {
7155     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7156       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7157       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7158       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7159           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7160       ThreadLimitVal =
7161           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7162     }
7163     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7164     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7165       return NumThreads;
7166     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7167         CGF.getContext(), CS->getCapturedStmt());
7168     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7169       if (Dir->getDirectiveKind() == OMPD_distribute) {
7170         CS = Dir->getInnermostCapturedStmt();
7171         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7172           return NumThreads;
7173       }
7174     }
7175     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7176   }
7177   case OMPD_target_teams_distribute:
7178     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7179       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7180       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7181       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7182           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7183       ThreadLimitVal =
7184           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7185     }
7186     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7187   case OMPD_target_parallel:
7188   case OMPD_target_parallel_for:
7189   case OMPD_target_parallel_for_simd:
7190   case OMPD_target_teams_distribute_parallel_for:
7191   case OMPD_target_teams_distribute_parallel_for_simd: {
7192     llvm::Value *CondVal = nullptr;
7193     // Handle if clause. If if clause present, the number of threads is
7194     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7195     if (D.hasClausesOfKind<OMPIfClause>()) {
7196       const OMPIfClause *IfClause = nullptr;
7197       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7198         if (C->getNameModifier() == OMPD_unknown ||
7199             C->getNameModifier() == OMPD_parallel) {
7200           IfClause = C;
7201           break;
7202         }
7203       }
7204       if (IfClause) {
7205         const Expr *Cond = IfClause->getCondition();
7206         bool Result;
7207         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7208           if (!Result)
7209             return Bld.getInt32(1);
7210         } else {
7211           CodeGenFunction::RunCleanupsScope Scope(CGF);
7212           CondVal = CGF.EvaluateExprAsBool(Cond);
7213         }
7214       }
7215     }
7216     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7217       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7218       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7219       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7220           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7221       ThreadLimitVal =
7222           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7223     }
7224     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7225       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7226       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7227       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7228           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7229       NumThreadsVal =
7230           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7231       ThreadLimitVal = ThreadLimitVal
7232                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7233                                                                 ThreadLimitVal),
7234                                               NumThreadsVal, ThreadLimitVal)
7235                            : NumThreadsVal;
7236     }
7237     if (!ThreadLimitVal)
7238       ThreadLimitVal = Bld.getInt32(0);
7239     if (CondVal)
7240       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7241     return ThreadLimitVal;
7242   }
7243   case OMPD_target_teams_distribute_simd:
7244   case OMPD_target_simd:
7245     return Bld.getInt32(1);
7246   case OMPD_parallel:
7247   case OMPD_for:
7248   case OMPD_parallel_for:
7249   case OMPD_parallel_master:
7250   case OMPD_parallel_sections:
7251   case OMPD_for_simd:
7252   case OMPD_parallel_for_simd:
7253   case OMPD_cancel:
7254   case OMPD_cancellation_point:
7255   case OMPD_ordered:
7256   case OMPD_threadprivate:
7257   case OMPD_allocate:
7258   case OMPD_task:
7259   case OMPD_simd:
7260   case OMPD_sections:
7261   case OMPD_section:
7262   case OMPD_single:
7263   case OMPD_master:
7264   case OMPD_critical:
7265   case OMPD_taskyield:
7266   case OMPD_barrier:
7267   case OMPD_taskwait:
7268   case OMPD_taskgroup:
7269   case OMPD_atomic:
7270   case OMPD_flush:
7271   case OMPD_depobj:
7272   case OMPD_teams:
7273   case OMPD_target_data:
7274   case OMPD_target_exit_data:
7275   case OMPD_target_enter_data:
7276   case OMPD_distribute:
7277   case OMPD_distribute_simd:
7278   case OMPD_distribute_parallel_for:
7279   case OMPD_distribute_parallel_for_simd:
7280   case OMPD_teams_distribute:
7281   case OMPD_teams_distribute_simd:
7282   case OMPD_teams_distribute_parallel_for:
7283   case OMPD_teams_distribute_parallel_for_simd:
7284   case OMPD_target_update:
7285   case OMPD_declare_simd:
7286   case OMPD_declare_variant:
7287   case OMPD_declare_target:
7288   case OMPD_end_declare_target:
7289   case OMPD_declare_reduction:
7290   case OMPD_declare_mapper:
7291   case OMPD_taskloop:
7292   case OMPD_taskloop_simd:
7293   case OMPD_master_taskloop:
7294   case OMPD_master_taskloop_simd:
7295   case OMPD_parallel_master_taskloop:
7296   case OMPD_parallel_master_taskloop_simd:
7297   case OMPD_requires:
7298   case OMPD_unknown:
7299     break;
7300   }
7301   llvm_unreachable("Unsupported directive kind.");
7302 }
7303 
7304 namespace {
7305 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7306 
7307 // Utility to handle information from clauses associated with a given
7308 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7309 // It provides a convenient interface to obtain the information and generate
7310 // code for that information.
7311 class MappableExprsHandler {
7312 public:
7313   /// Values for bit flags used to specify the mapping type for
7314   /// offloading.
7315   enum OpenMPOffloadMappingFlags : uint64_t {
7316     /// No flags
7317     OMP_MAP_NONE = 0x0,
7318     /// Allocate memory on the device and move data from host to device.
7319     OMP_MAP_TO = 0x01,
7320     /// Allocate memory on the device and move data from device to host.
7321     OMP_MAP_FROM = 0x02,
7322     /// Always perform the requested mapping action on the element, even
7323     /// if it was already mapped before.
7324     OMP_MAP_ALWAYS = 0x04,
7325     /// Delete the element from the device environment, ignoring the
7326     /// current reference count associated with the element.
7327     OMP_MAP_DELETE = 0x08,
7328     /// The element being mapped is a pointer-pointee pair; both the
7329     /// pointer and the pointee should be mapped.
7330     OMP_MAP_PTR_AND_OBJ = 0x10,
7331     /// This flags signals that the base address of an entry should be
7332     /// passed to the target kernel as an argument.
7333     OMP_MAP_TARGET_PARAM = 0x20,
7334     /// Signal that the runtime library has to return the device pointer
7335     /// in the current position for the data being mapped. Used when we have the
7336     /// use_device_ptr clause.
7337     OMP_MAP_RETURN_PARAM = 0x40,
7338     /// This flag signals that the reference being passed is a pointer to
7339     /// private data.
7340     OMP_MAP_PRIVATE = 0x80,
7341     /// Pass the element to the device by value.
7342     OMP_MAP_LITERAL = 0x100,
7343     /// Implicit map
7344     OMP_MAP_IMPLICIT = 0x200,
7345     /// Close is a hint to the runtime to allocate memory close to
7346     /// the target device.
7347     OMP_MAP_CLOSE = 0x400,
7348     /// The 16 MSBs of the flags indicate whether the entry is member of some
7349     /// struct/class.
7350     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7351     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7352   };
7353 
7354   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7355   static unsigned getFlagMemberOffset() {
7356     unsigned Offset = 0;
7357     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7358          Remain = Remain >> 1)
7359       Offset++;
7360     return Offset;
7361   }
7362 
7363   /// Class that associates information with a base pointer to be passed to the
7364   /// runtime library.
7365   class BasePointerInfo {
7366     /// The base pointer.
7367     llvm::Value *Ptr = nullptr;
7368     /// The base declaration that refers to this device pointer, or null if
7369     /// there is none.
7370     const ValueDecl *DevPtrDecl = nullptr;
7371 
7372   public:
7373     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7374         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7375     llvm::Value *operator*() const { return Ptr; }
7376     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7377     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7378   };
7379 
7380   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7381   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7382   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7383 
7384   /// Map between a struct and the its lowest & highest elements which have been
7385   /// mapped.
7386   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7387   ///                    HE(FieldIndex, Pointer)}
7388   struct StructRangeInfoTy {
7389     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7390         0, Address::invalid()};
7391     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7392         0, Address::invalid()};
7393     Address Base = Address::invalid();
7394   };
7395 
7396 private:
7397   /// Kind that defines how a device pointer has to be returned.
7398   struct MapInfo {
7399     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7400     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7401     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7402     bool ReturnDevicePointer = false;
7403     bool IsImplicit = false;
7404 
7405     MapInfo() = default;
7406     MapInfo(
7407         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7408         OpenMPMapClauseKind MapType,
7409         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7410         bool ReturnDevicePointer, bool IsImplicit)
7411         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7412           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7413   };
7414 
7415   /// If use_device_ptr is used on a pointer which is a struct member and there
7416   /// is no map information about it, then emission of that entry is deferred
7417   /// until the whole struct has been processed.
7418   struct DeferredDevicePtrEntryTy {
7419     const Expr *IE = nullptr;
7420     const ValueDecl *VD = nullptr;
7421 
7422     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7423         : IE(IE), VD(VD) {}
7424   };
7425 
7426   /// The target directive from where the mappable clauses were extracted. It
7427   /// is either a executable directive or a user-defined mapper directive.
7428   llvm::PointerUnion<const OMPExecutableDirective *,
7429                      const OMPDeclareMapperDecl *>
7430       CurDir;
7431 
7432   /// Function the directive is being generated for.
7433   CodeGenFunction &CGF;
7434 
7435   /// Set of all first private variables in the current directive.
7436   /// bool data is set to true if the variable is implicitly marked as
7437   /// firstprivate, false otherwise.
7438   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7439 
7440   /// Map between device pointer declarations and their expression components.
7441   /// The key value for declarations in 'this' is null.
7442   llvm::DenseMap<
7443       const ValueDecl *,
7444       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7445       DevPointersMap;
7446 
7447   llvm::Value *getExprTypeSize(const Expr *E) const {
7448     QualType ExprTy = E->getType().getCanonicalType();
7449 
7450     // Reference types are ignored for mapping purposes.
7451     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7452       ExprTy = RefTy->getPointeeType().getCanonicalType();
7453 
7454     // Given that an array section is considered a built-in type, we need to
7455     // do the calculation based on the length of the section instead of relying
7456     // on CGF.getTypeSize(E->getType()).
7457     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7458       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7459                             OAE->getBase()->IgnoreParenImpCasts())
7460                             .getCanonicalType();
7461 
7462       // If there is no length associated with the expression and lower bound is
7463       // not specified too, that means we are using the whole length of the
7464       // base.
7465       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7466           !OAE->getLowerBound())
7467         return CGF.getTypeSize(BaseTy);
7468 
7469       llvm::Value *ElemSize;
7470       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7471         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7472       } else {
7473         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7474         assert(ATy && "Expecting array type if not a pointer type.");
7475         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7476       }
7477 
7478       // If we don't have a length at this point, that is because we have an
7479       // array section with a single element.
7480       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7481         return ElemSize;
7482 
7483       if (const Expr *LenExpr = OAE->getLength()) {
7484         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7485         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7486                                              CGF.getContext().getSizeType(),
7487                                              LenExpr->getExprLoc());
7488         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7489       }
7490       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7491              OAE->getLowerBound() && "expected array_section[lb:].");
7492       // Size = sizetype - lb * elemtype;
7493       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7494       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7495       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7496                                        CGF.getContext().getSizeType(),
7497                                        OAE->getLowerBound()->getExprLoc());
7498       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7499       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7500       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7501       LengthVal = CGF.Builder.CreateSelect(
7502           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7503       return LengthVal;
7504     }
7505     return CGF.getTypeSize(ExprTy);
7506   }
7507 
7508   /// Return the corresponding bits for a given map clause modifier. Add
7509   /// a flag marking the map as a pointer if requested. Add a flag marking the
7510   /// map as the first one of a series of maps that relate to the same map
7511   /// expression.
7512   OpenMPOffloadMappingFlags getMapTypeBits(
7513       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7514       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7515     OpenMPOffloadMappingFlags Bits =
7516         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7517     switch (MapType) {
7518     case OMPC_MAP_alloc:
7519     case OMPC_MAP_release:
7520       // alloc and release is the default behavior in the runtime library,  i.e.
7521       // if we don't pass any bits alloc/release that is what the runtime is
7522       // going to do. Therefore, we don't need to signal anything for these two
7523       // type modifiers.
7524       break;
7525     case OMPC_MAP_to:
7526       Bits |= OMP_MAP_TO;
7527       break;
7528     case OMPC_MAP_from:
7529       Bits |= OMP_MAP_FROM;
7530       break;
7531     case OMPC_MAP_tofrom:
7532       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7533       break;
7534     case OMPC_MAP_delete:
7535       Bits |= OMP_MAP_DELETE;
7536       break;
7537     case OMPC_MAP_unknown:
7538       llvm_unreachable("Unexpected map type!");
7539     }
7540     if (AddPtrFlag)
7541       Bits |= OMP_MAP_PTR_AND_OBJ;
7542     if (AddIsTargetParamFlag)
7543       Bits |= OMP_MAP_TARGET_PARAM;
7544     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7545         != MapModifiers.end())
7546       Bits |= OMP_MAP_ALWAYS;
7547     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7548         != MapModifiers.end())
7549       Bits |= OMP_MAP_CLOSE;
7550     return Bits;
7551   }
7552 
7553   /// Return true if the provided expression is a final array section. A
7554   /// final array section, is one whose length can't be proved to be one.
7555   bool isFinalArraySectionExpression(const Expr *E) const {
7556     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7557 
7558     // It is not an array section and therefore not a unity-size one.
7559     if (!OASE)
7560       return false;
7561 
7562     // An array section with no colon always refer to a single element.
7563     if (OASE->getColonLoc().isInvalid())
7564       return false;
7565 
7566     const Expr *Length = OASE->getLength();
7567 
7568     // If we don't have a length we have to check if the array has size 1
7569     // for this dimension. Also, we should always expect a length if the
7570     // base type is pointer.
7571     if (!Length) {
7572       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7573                              OASE->getBase()->IgnoreParenImpCasts())
7574                              .getCanonicalType();
7575       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7576         return ATy->getSize().getSExtValue() != 1;
7577       // If we don't have a constant dimension length, we have to consider
7578       // the current section as having any size, so it is not necessarily
7579       // unitary. If it happen to be unity size, that's user fault.
7580       return true;
7581     }
7582 
7583     // Check if the length evaluates to 1.
7584     Expr::EvalResult Result;
7585     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7586       return true; // Can have more that size 1.
7587 
7588     llvm::APSInt ConstLength = Result.Val.getInt();
7589     return ConstLength.getSExtValue() != 1;
7590   }
7591 
7592   /// Generate the base pointers, section pointers, sizes and map type
7593   /// bits for the provided map type, map modifier, and expression components.
7594   /// \a IsFirstComponent should be set to true if the provided set of
7595   /// components is the first associated with a capture.
7596   void generateInfoForComponentList(
7597       OpenMPMapClauseKind MapType,
7598       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7599       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7600       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7601       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7602       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7603       bool IsImplicit,
7604       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7605           OverlappedElements = llvm::None) const {
7606     // The following summarizes what has to be generated for each map and the
7607     // types below. The generated information is expressed in this order:
7608     // base pointer, section pointer, size, flags
7609     // (to add to the ones that come from the map type and modifier).
7610     //
7611     // double d;
7612     // int i[100];
7613     // float *p;
7614     //
7615     // struct S1 {
7616     //   int i;
7617     //   float f[50];
7618     // }
7619     // struct S2 {
7620     //   int i;
7621     //   float f[50];
7622     //   S1 s;
7623     //   double *p;
7624     //   struct S2 *ps;
7625     // }
7626     // S2 s;
7627     // S2 *ps;
7628     //
7629     // map(d)
7630     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7631     //
7632     // map(i)
7633     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7634     //
7635     // map(i[1:23])
7636     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7637     //
7638     // map(p)
7639     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7640     //
7641     // map(p[1:24])
7642     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7643     //
7644     // map(s)
7645     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7646     //
7647     // map(s.i)
7648     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7649     //
7650     // map(s.s.f)
7651     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7652     //
7653     // map(s.p)
7654     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7655     //
7656     // map(to: s.p[:22])
7657     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7658     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7659     // &(s.p), &(s.p[0]), 22*sizeof(double),
7660     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7661     // (*) alloc space for struct members, only this is a target parameter
7662     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7663     //      optimizes this entry out, same in the examples below)
7664     // (***) map the pointee (map: to)
7665     //
7666     // map(s.ps)
7667     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7668     //
7669     // map(from: s.ps->s.i)
7670     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7671     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7672     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7673     //
7674     // map(to: s.ps->ps)
7675     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7676     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7677     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7678     //
7679     // map(s.ps->ps->ps)
7680     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7681     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7682     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7683     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7684     //
7685     // map(to: s.ps->ps->s.f[:22])
7686     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7687     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7688     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7689     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7690     //
7691     // map(ps)
7692     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7693     //
7694     // map(ps->i)
7695     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7696     //
7697     // map(ps->s.f)
7698     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7699     //
7700     // map(from: ps->p)
7701     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7702     //
7703     // map(to: ps->p[:22])
7704     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7705     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7706     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7707     //
7708     // map(ps->ps)
7709     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7710     //
7711     // map(from: ps->ps->s.i)
7712     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7713     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7714     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7715     //
7716     // map(from: ps->ps->ps)
7717     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7718     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7719     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7720     //
7721     // map(ps->ps->ps->ps)
7722     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7723     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7724     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7725     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7726     //
7727     // map(to: ps->ps->ps->s.f[:22])
7728     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7729     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7730     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7731     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7732     //
7733     // map(to: s.f[:22]) map(from: s.p[:33])
7734     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7735     //     sizeof(double*) (**), TARGET_PARAM
7736     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7737     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7738     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7739     // (*) allocate contiguous space needed to fit all mapped members even if
7740     //     we allocate space for members not mapped (in this example,
7741     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7742     //     them as well because they fall between &s.f[0] and &s.p)
7743     //
7744     // map(from: s.f[:22]) map(to: ps->p[:33])
7745     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7746     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7747     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7748     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7749     // (*) the struct this entry pertains to is the 2nd element in the list of
7750     //     arguments, hence MEMBER_OF(2)
7751     //
7752     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7753     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7754     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7755     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7756     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7757     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7758     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7759     // (*) the struct this entry pertains to is the 4th element in the list
7760     //     of arguments, hence MEMBER_OF(4)
7761 
7762     // Track if the map information being generated is the first for a capture.
7763     bool IsCaptureFirstInfo = IsFirstComponentList;
7764     // When the variable is on a declare target link or in a to clause with
7765     // unified memory, a reference is needed to hold the host/device address
7766     // of the variable.
7767     bool RequiresReference = false;
7768 
7769     // Scan the components from the base to the complete expression.
7770     auto CI = Components.rbegin();
7771     auto CE = Components.rend();
7772     auto I = CI;
7773 
7774     // Track if the map information being generated is the first for a list of
7775     // components.
7776     bool IsExpressionFirstInfo = true;
7777     Address BP = Address::invalid();
7778     const Expr *AssocExpr = I->getAssociatedExpression();
7779     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7780     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7781 
7782     if (isa<MemberExpr>(AssocExpr)) {
7783       // The base is the 'this' pointer. The content of the pointer is going
7784       // to be the base of the field being mapped.
7785       BP = CGF.LoadCXXThisAddress();
7786     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7787                (OASE &&
7788                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7789       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7790     } else {
7791       // The base is the reference to the variable.
7792       // BP = &Var.
7793       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7794       if (const auto *VD =
7795               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7796         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7797                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7798           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7799               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7800                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7801             RequiresReference = true;
7802             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7803           }
7804         }
7805       }
7806 
7807       // If the variable is a pointer and is being dereferenced (i.e. is not
7808       // the last component), the base has to be the pointer itself, not its
7809       // reference. References are ignored for mapping purposes.
7810       QualType Ty =
7811           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7812       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7813         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7814 
7815         // We do not need to generate individual map information for the
7816         // pointer, it can be associated with the combined storage.
7817         ++I;
7818       }
7819     }
7820 
7821     // Track whether a component of the list should be marked as MEMBER_OF some
7822     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7823     // in a component list should be marked as MEMBER_OF, all subsequent entries
7824     // do not belong to the base struct. E.g.
7825     // struct S2 s;
7826     // s.ps->ps->ps->f[:]
7827     //   (1) (2) (3) (4)
7828     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7829     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7830     // is the pointee of ps(2) which is not member of struct s, so it should not
7831     // be marked as such (it is still PTR_AND_OBJ).
7832     // The variable is initialized to false so that PTR_AND_OBJ entries which
7833     // are not struct members are not considered (e.g. array of pointers to
7834     // data).
7835     bool ShouldBeMemberOf = false;
7836 
7837     // Variable keeping track of whether or not we have encountered a component
7838     // in the component list which is a member expression. Useful when we have a
7839     // pointer or a final array section, in which case it is the previous
7840     // component in the list which tells us whether we have a member expression.
7841     // E.g. X.f[:]
7842     // While processing the final array section "[:]" it is "f" which tells us
7843     // whether we are dealing with a member of a declared struct.
7844     const MemberExpr *EncounteredME = nullptr;
7845 
7846     for (; I != CE; ++I) {
7847       // If the current component is member of a struct (parent struct) mark it.
7848       if (!EncounteredME) {
7849         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7850         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7851         // as MEMBER_OF the parent struct.
7852         if (EncounteredME)
7853           ShouldBeMemberOf = true;
7854       }
7855 
7856       auto Next = std::next(I);
7857 
7858       // We need to generate the addresses and sizes if this is the last
7859       // component, if the component is a pointer or if it is an array section
7860       // whose length can't be proved to be one. If this is a pointer, it
7861       // becomes the base address for the following components.
7862 
7863       // A final array section, is one whose length can't be proved to be one.
7864       bool IsFinalArraySection =
7865           isFinalArraySectionExpression(I->getAssociatedExpression());
7866 
7867       // Get information on whether the element is a pointer. Have to do a
7868       // special treatment for array sections given that they are built-in
7869       // types.
7870       const auto *OASE =
7871           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7872       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7873       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7874       bool IsPointer =
7875           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7876                        .getCanonicalType()
7877                        ->isAnyPointerType()) ||
7878           I->getAssociatedExpression()->getType()->isAnyPointerType();
7879       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7880 
7881       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7882         // If this is not the last component, we expect the pointer to be
7883         // associated with an array expression or member expression.
7884         assert((Next == CE ||
7885                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7886                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7887                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7888                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7889                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7890                "Unexpected expression");
7891 
7892         Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7893                          .getAddress(CGF);
7894 
7895         // If this component is a pointer inside the base struct then we don't
7896         // need to create any entry for it - it will be combined with the object
7897         // it is pointing to into a single PTR_AND_OBJ entry.
7898         bool IsMemberPointer =
7899             IsPointer && EncounteredME &&
7900             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7901              EncounteredME);
7902         if (!OverlappedElements.empty()) {
7903           // Handle base element with the info for overlapped elements.
7904           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7905           assert(Next == CE &&
7906                  "Expected last element for the overlapped elements.");
7907           assert(!IsPointer &&
7908                  "Unexpected base element with the pointer type.");
7909           // Mark the whole struct as the struct that requires allocation on the
7910           // device.
7911           PartialStruct.LowestElem = {0, LB};
7912           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7913               I->getAssociatedExpression()->getType());
7914           Address HB = CGF.Builder.CreateConstGEP(
7915               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7916                                                               CGF.VoidPtrTy),
7917               TypeSize.getQuantity() - 1);
7918           PartialStruct.HighestElem = {
7919               std::numeric_limits<decltype(
7920                   PartialStruct.HighestElem.first)>::max(),
7921               HB};
7922           PartialStruct.Base = BP;
7923           // Emit data for non-overlapped data.
7924           OpenMPOffloadMappingFlags Flags =
7925               OMP_MAP_MEMBER_OF |
7926               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7927                              /*AddPtrFlag=*/false,
7928                              /*AddIsTargetParamFlag=*/false);
7929           LB = BP;
7930           llvm::Value *Size = nullptr;
7931           // Do bitcopy of all non-overlapped structure elements.
7932           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7933                    Component : OverlappedElements) {
7934             Address ComponentLB = Address::invalid();
7935             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7936                  Component) {
7937               if (MC.getAssociatedDeclaration()) {
7938                 ComponentLB =
7939                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7940                         .getAddress(CGF);
7941                 Size = CGF.Builder.CreatePtrDiff(
7942                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7943                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7944                 break;
7945               }
7946             }
7947             BasePointers.push_back(BP.getPointer());
7948             Pointers.push_back(LB.getPointer());
7949             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7950                                                       /*isSigned=*/true));
7951             Types.push_back(Flags);
7952             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7953           }
7954           BasePointers.push_back(BP.getPointer());
7955           Pointers.push_back(LB.getPointer());
7956           Size = CGF.Builder.CreatePtrDiff(
7957               CGF.EmitCastToVoidPtr(
7958                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7959               CGF.EmitCastToVoidPtr(LB.getPointer()));
7960           Sizes.push_back(
7961               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7962           Types.push_back(Flags);
7963           break;
7964         }
7965         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7966         if (!IsMemberPointer) {
7967           BasePointers.push_back(BP.getPointer());
7968           Pointers.push_back(LB.getPointer());
7969           Sizes.push_back(
7970               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7971 
7972           // We need to add a pointer flag for each map that comes from the
7973           // same expression except for the first one. We also need to signal
7974           // this map is the first one that relates with the current capture
7975           // (there is a set of entries for each capture).
7976           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7977               MapType, MapModifiers, IsImplicit,
7978               !IsExpressionFirstInfo || RequiresReference,
7979               IsCaptureFirstInfo && !RequiresReference);
7980 
7981           if (!IsExpressionFirstInfo) {
7982             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7983             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7984             if (IsPointer)
7985               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7986                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7987 
7988             if (ShouldBeMemberOf) {
7989               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7990               // should be later updated with the correct value of MEMBER_OF.
7991               Flags |= OMP_MAP_MEMBER_OF;
7992               // From now on, all subsequent PTR_AND_OBJ entries should not be
7993               // marked as MEMBER_OF.
7994               ShouldBeMemberOf = false;
7995             }
7996           }
7997 
7998           Types.push_back(Flags);
7999         }
8000 
8001         // If we have encountered a member expression so far, keep track of the
8002         // mapped member. If the parent is "*this", then the value declaration
8003         // is nullptr.
8004         if (EncounteredME) {
8005           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8006           unsigned FieldIndex = FD->getFieldIndex();
8007 
8008           // Update info about the lowest and highest elements for this struct
8009           if (!PartialStruct.Base.isValid()) {
8010             PartialStruct.LowestElem = {FieldIndex, LB};
8011             PartialStruct.HighestElem = {FieldIndex, LB};
8012             PartialStruct.Base = BP;
8013           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8014             PartialStruct.LowestElem = {FieldIndex, LB};
8015           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8016             PartialStruct.HighestElem = {FieldIndex, LB};
8017           }
8018         }
8019 
8020         // If we have a final array section, we are done with this expression.
8021         if (IsFinalArraySection)
8022           break;
8023 
8024         // The pointer becomes the base for the next element.
8025         if (Next != CE)
8026           BP = LB;
8027 
8028         IsExpressionFirstInfo = false;
8029         IsCaptureFirstInfo = false;
8030       }
8031     }
8032   }
8033 
8034   /// Return the adjusted map modifiers if the declaration a capture refers to
8035   /// appears in a first-private clause. This is expected to be used only with
8036   /// directives that start with 'target'.
8037   MappableExprsHandler::OpenMPOffloadMappingFlags
8038   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8039     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8040 
8041     // A first private variable captured by reference will use only the
8042     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8043     // declaration is known as first-private in this handler.
8044     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8045       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8046           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8047         return MappableExprsHandler::OMP_MAP_ALWAYS |
8048                MappableExprsHandler::OMP_MAP_TO;
8049       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8050         return MappableExprsHandler::OMP_MAP_TO |
8051                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8052       return MappableExprsHandler::OMP_MAP_PRIVATE |
8053              MappableExprsHandler::OMP_MAP_TO;
8054     }
8055     return MappableExprsHandler::OMP_MAP_TO |
8056            MappableExprsHandler::OMP_MAP_FROM;
8057   }
8058 
8059   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8060     // Rotate by getFlagMemberOffset() bits.
8061     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8062                                                   << getFlagMemberOffset());
8063   }
8064 
8065   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8066                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8067     // If the entry is PTR_AND_OBJ but has not been marked with the special
8068     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8069     // marked as MEMBER_OF.
8070     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8071         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8072       return;
8073 
8074     // Reset the placeholder value to prepare the flag for the assignment of the
8075     // proper MEMBER_OF value.
8076     Flags &= ~OMP_MAP_MEMBER_OF;
8077     Flags |= MemberOfFlag;
8078   }
8079 
8080   void getPlainLayout(const CXXRecordDecl *RD,
8081                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8082                       bool AsBase) const {
8083     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8084 
8085     llvm::StructType *St =
8086         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8087 
8088     unsigned NumElements = St->getNumElements();
8089     llvm::SmallVector<
8090         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8091         RecordLayout(NumElements);
8092 
8093     // Fill bases.
8094     for (const auto &I : RD->bases()) {
8095       if (I.isVirtual())
8096         continue;
8097       const auto *Base = I.getType()->getAsCXXRecordDecl();
8098       // Ignore empty bases.
8099       if (Base->isEmpty() || CGF.getContext()
8100                                  .getASTRecordLayout(Base)
8101                                  .getNonVirtualSize()
8102                                  .isZero())
8103         continue;
8104 
8105       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8106       RecordLayout[FieldIndex] = Base;
8107     }
8108     // Fill in virtual bases.
8109     for (const auto &I : RD->vbases()) {
8110       const auto *Base = I.getType()->getAsCXXRecordDecl();
8111       // Ignore empty bases.
8112       if (Base->isEmpty())
8113         continue;
8114       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8115       if (RecordLayout[FieldIndex])
8116         continue;
8117       RecordLayout[FieldIndex] = Base;
8118     }
8119     // Fill in all the fields.
8120     assert(!RD->isUnion() && "Unexpected union.");
8121     for (const auto *Field : RD->fields()) {
8122       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8123       // will fill in later.)
8124       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8125         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8126         RecordLayout[FieldIndex] = Field;
8127       }
8128     }
8129     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8130              &Data : RecordLayout) {
8131       if (Data.isNull())
8132         continue;
8133       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8134         getPlainLayout(Base, Layout, /*AsBase=*/true);
8135       else
8136         Layout.push_back(Data.get<const FieldDecl *>());
8137     }
8138   }
8139 
8140 public:
8141   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8142       : CurDir(&Dir), CGF(CGF) {
8143     // Extract firstprivate clause information.
8144     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8145       for (const auto *D : C->varlists())
8146         FirstPrivateDecls.try_emplace(
8147             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8148     // Extract device pointer clause information.
8149     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8150       for (auto L : C->component_lists())
8151         DevPointersMap[L.first].push_back(L.second);
8152   }
8153 
8154   /// Constructor for the declare mapper directive.
8155   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8156       : CurDir(&Dir), CGF(CGF) {}
8157 
8158   /// Generate code for the combined entry if we have a partially mapped struct
8159   /// and take care of the mapping flags of the arguments corresponding to
8160   /// individual struct members.
8161   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
8162                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8163                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
8164                          const StructRangeInfoTy &PartialStruct) const {
8165     // Base is the base of the struct
8166     BasePointers.push_back(PartialStruct.Base.getPointer());
8167     // Pointer is the address of the lowest element
8168     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
8169     Pointers.push_back(LB);
8170     // Size is (addr of {highest+1} element) - (addr of lowest element)
8171     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
8172     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8173     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8174     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8175     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8176     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8177                                                   /*isSigned=*/false);
8178     Sizes.push_back(Size);
8179     // Map type is always TARGET_PARAM
8180     Types.push_back(OMP_MAP_TARGET_PARAM);
8181     // Remove TARGET_PARAM flag from the first element
8182     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8183 
8184     // All other current entries will be MEMBER_OF the combined entry
8185     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8186     // 0xFFFF in the MEMBER_OF field).
8187     OpenMPOffloadMappingFlags MemberOfFlag =
8188         getMemberOfFlag(BasePointers.size() - 1);
8189     for (auto &M : CurTypes)
8190       setCorrectMemberOfFlag(M, MemberOfFlag);
8191   }
8192 
8193   /// Generate all the base pointers, section pointers, sizes and map
8194   /// types for the extracted mappable expressions. Also, for each item that
8195   /// relates with a device pointer, a pair of the relevant declaration and
8196   /// index where it occurs is appended to the device pointers info array.
8197   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
8198                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8199                        MapFlagsArrayTy &Types) const {
8200     // We have to process the component lists that relate with the same
8201     // declaration in a single chunk so that we can generate the map flags
8202     // correctly. Therefore, we organize all lists in a map.
8203     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8204 
8205     // Helper function to fill the information map for the different supported
8206     // clauses.
8207     auto &&InfoGen = [&Info](
8208         const ValueDecl *D,
8209         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8210         OpenMPMapClauseKind MapType,
8211         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8212         bool ReturnDevicePointer, bool IsImplicit) {
8213       const ValueDecl *VD =
8214           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8215       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8216                             IsImplicit);
8217     };
8218 
8219     assert(CurDir.is<const OMPExecutableDirective *>() &&
8220            "Expect a executable directive");
8221     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8222     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8223       for (const auto L : C->component_lists()) {
8224         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
8225             /*ReturnDevicePointer=*/false, C->isImplicit());
8226       }
8227     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8228       for (const auto L : C->component_lists()) {
8229         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
8230             /*ReturnDevicePointer=*/false, C->isImplicit());
8231       }
8232     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8233       for (const auto L : C->component_lists()) {
8234         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8235             /*ReturnDevicePointer=*/false, C->isImplicit());
8236       }
8237 
8238     // Look at the use_device_ptr clause information and mark the existing map
8239     // entries as such. If there is no map information for an entry in the
8240     // use_device_ptr list, we create one with map type 'alloc' and zero size
8241     // section. It is the user fault if that was not mapped before. If there is
8242     // no map information and the pointer is a struct member, then we defer the
8243     // emission of that entry until the whole struct has been processed.
8244     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8245         DeferredInfo;
8246 
8247     for (const auto *C :
8248          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8249       for (const auto L : C->component_lists()) {
8250         assert(!L.second.empty() && "Not expecting empty list of components!");
8251         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8252         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8253         const Expr *IE = L.second.back().getAssociatedExpression();
8254         // If the first component is a member expression, we have to look into
8255         // 'this', which maps to null in the map of map information. Otherwise
8256         // look directly for the information.
8257         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8258 
8259         // We potentially have map information for this declaration already.
8260         // Look for the first set of components that refer to it.
8261         if (It != Info.end()) {
8262           auto CI = std::find_if(
8263               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8264                 return MI.Components.back().getAssociatedDeclaration() == VD;
8265               });
8266           // If we found a map entry, signal that the pointer has to be returned
8267           // and move on to the next declaration.
8268           if (CI != It->second.end()) {
8269             CI->ReturnDevicePointer = true;
8270             continue;
8271           }
8272         }
8273 
8274         // We didn't find any match in our map information - generate a zero
8275         // size array section - if the pointer is a struct member we defer this
8276         // action until the whole struct has been processed.
8277         if (isa<MemberExpr>(IE)) {
8278           // Insert the pointer into Info to be processed by
8279           // generateInfoForComponentList. Because it is a member pointer
8280           // without a pointee, no entry will be generated for it, therefore
8281           // we need to generate one after the whole struct has been processed.
8282           // Nonetheless, generateInfoForComponentList must be called to take
8283           // the pointer into account for the calculation of the range of the
8284           // partial struct.
8285           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8286                   /*ReturnDevicePointer=*/false, C->isImplicit());
8287           DeferredInfo[nullptr].emplace_back(IE, VD);
8288         } else {
8289           llvm::Value *Ptr =
8290               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8291           BasePointers.emplace_back(Ptr, VD);
8292           Pointers.push_back(Ptr);
8293           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8294           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8295         }
8296       }
8297     }
8298 
8299     for (const auto &M : Info) {
8300       // We need to know when we generate information for the first component
8301       // associated with a capture, because the mapping flags depend on it.
8302       bool IsFirstComponentList = true;
8303 
8304       // Temporary versions of arrays
8305       MapBaseValuesArrayTy CurBasePointers;
8306       MapValuesArrayTy CurPointers;
8307       MapValuesArrayTy CurSizes;
8308       MapFlagsArrayTy CurTypes;
8309       StructRangeInfoTy PartialStruct;
8310 
8311       for (const MapInfo &L : M.second) {
8312         assert(!L.Components.empty() &&
8313                "Not expecting declaration with no component lists.");
8314 
8315         // Remember the current base pointer index.
8316         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8317         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8318                                      CurBasePointers, CurPointers, CurSizes,
8319                                      CurTypes, PartialStruct,
8320                                      IsFirstComponentList, L.IsImplicit);
8321 
8322         // If this entry relates with a device pointer, set the relevant
8323         // declaration and add the 'return pointer' flag.
8324         if (L.ReturnDevicePointer) {
8325           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8326                  "Unexpected number of mapped base pointers.");
8327 
8328           const ValueDecl *RelevantVD =
8329               L.Components.back().getAssociatedDeclaration();
8330           assert(RelevantVD &&
8331                  "No relevant declaration related with device pointer??");
8332 
8333           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8334           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8335         }
8336         IsFirstComponentList = false;
8337       }
8338 
8339       // Append any pending zero-length pointers which are struct members and
8340       // used with use_device_ptr.
8341       auto CI = DeferredInfo.find(M.first);
8342       if (CI != DeferredInfo.end()) {
8343         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8344           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8345           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8346               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8347           CurBasePointers.emplace_back(BasePtr, L.VD);
8348           CurPointers.push_back(Ptr);
8349           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8350           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8351           // value MEMBER_OF=FFFF so that the entry is later updated with the
8352           // correct value of MEMBER_OF.
8353           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8354                              OMP_MAP_MEMBER_OF);
8355         }
8356       }
8357 
8358       // If there is an entry in PartialStruct it means we have a struct with
8359       // individual members mapped. Emit an extra combined entry.
8360       if (PartialStruct.Base.isValid())
8361         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8362                           PartialStruct);
8363 
8364       // We need to append the results of this capture to what we already have.
8365       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8366       Pointers.append(CurPointers.begin(), CurPointers.end());
8367       Sizes.append(CurSizes.begin(), CurSizes.end());
8368       Types.append(CurTypes.begin(), CurTypes.end());
8369     }
8370   }
8371 
8372   /// Generate all the base pointers, section pointers, sizes and map types for
8373   /// the extracted map clauses of user-defined mapper.
8374   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8375                                 MapValuesArrayTy &Pointers,
8376                                 MapValuesArrayTy &Sizes,
8377                                 MapFlagsArrayTy &Types) const {
8378     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8379            "Expect a declare mapper directive");
8380     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8381     // We have to process the component lists that relate with the same
8382     // declaration in a single chunk so that we can generate the map flags
8383     // correctly. Therefore, we organize all lists in a map.
8384     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8385 
8386     // Helper function to fill the information map for the different supported
8387     // clauses.
8388     auto &&InfoGen = [&Info](
8389         const ValueDecl *D,
8390         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8391         OpenMPMapClauseKind MapType,
8392         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8393         bool ReturnDevicePointer, bool IsImplicit) {
8394       const ValueDecl *VD =
8395           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8396       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8397                             IsImplicit);
8398     };
8399 
8400     for (const auto *C : CurMapperDir->clauselists()) {
8401       const auto *MC = cast<OMPMapClause>(C);
8402       for (const auto L : MC->component_lists()) {
8403         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8404                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8405       }
8406     }
8407 
8408     for (const auto &M : Info) {
8409       // We need to know when we generate information for the first component
8410       // associated with a capture, because the mapping flags depend on it.
8411       bool IsFirstComponentList = true;
8412 
8413       // Temporary versions of arrays
8414       MapBaseValuesArrayTy CurBasePointers;
8415       MapValuesArrayTy CurPointers;
8416       MapValuesArrayTy CurSizes;
8417       MapFlagsArrayTy CurTypes;
8418       StructRangeInfoTy PartialStruct;
8419 
8420       for (const MapInfo &L : M.second) {
8421         assert(!L.Components.empty() &&
8422                "Not expecting declaration with no component lists.");
8423         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8424                                      CurBasePointers, CurPointers, CurSizes,
8425                                      CurTypes, PartialStruct,
8426                                      IsFirstComponentList, L.IsImplicit);
8427         IsFirstComponentList = false;
8428       }
8429 
8430       // If there is an entry in PartialStruct it means we have a struct with
8431       // individual members mapped. Emit an extra combined entry.
8432       if (PartialStruct.Base.isValid())
8433         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8434                           PartialStruct);
8435 
8436       // We need to append the results of this capture to what we already have.
8437       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8438       Pointers.append(CurPointers.begin(), CurPointers.end());
8439       Sizes.append(CurSizes.begin(), CurSizes.end());
8440       Types.append(CurTypes.begin(), CurTypes.end());
8441     }
8442   }
8443 
8444   /// Emit capture info for lambdas for variables captured by reference.
8445   void generateInfoForLambdaCaptures(
8446       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8447       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8448       MapFlagsArrayTy &Types,
8449       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8450     const auto *RD = VD->getType()
8451                          .getCanonicalType()
8452                          .getNonReferenceType()
8453                          ->getAsCXXRecordDecl();
8454     if (!RD || !RD->isLambda())
8455       return;
8456     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8457     LValue VDLVal = CGF.MakeAddrLValue(
8458         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8459     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8460     FieldDecl *ThisCapture = nullptr;
8461     RD->getCaptureFields(Captures, ThisCapture);
8462     if (ThisCapture) {
8463       LValue ThisLVal =
8464           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8465       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8466       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8467                                  VDLVal.getPointer(CGF));
8468       BasePointers.push_back(ThisLVal.getPointer(CGF));
8469       Pointers.push_back(ThisLValVal.getPointer(CGF));
8470       Sizes.push_back(
8471           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8472                                     CGF.Int64Ty, /*isSigned=*/true));
8473       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8474                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8475     }
8476     for (const LambdaCapture &LC : RD->captures()) {
8477       if (!LC.capturesVariable())
8478         continue;
8479       const VarDecl *VD = LC.getCapturedVar();
8480       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8481         continue;
8482       auto It = Captures.find(VD);
8483       assert(It != Captures.end() && "Found lambda capture without field.");
8484       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8485       if (LC.getCaptureKind() == LCK_ByRef) {
8486         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8487         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8488                                    VDLVal.getPointer(CGF));
8489         BasePointers.push_back(VarLVal.getPointer(CGF));
8490         Pointers.push_back(VarLValVal.getPointer(CGF));
8491         Sizes.push_back(CGF.Builder.CreateIntCast(
8492             CGF.getTypeSize(
8493                 VD->getType().getCanonicalType().getNonReferenceType()),
8494             CGF.Int64Ty, /*isSigned=*/true));
8495       } else {
8496         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8497         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8498                                    VDLVal.getPointer(CGF));
8499         BasePointers.push_back(VarLVal.getPointer(CGF));
8500         Pointers.push_back(VarRVal.getScalarVal());
8501         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8502       }
8503       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8504                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8505     }
8506   }
8507 
8508   /// Set correct indices for lambdas captures.
8509   void adjustMemberOfForLambdaCaptures(
8510       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8511       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8512       MapFlagsArrayTy &Types) const {
8513     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8514       // Set correct member_of idx for all implicit lambda captures.
8515       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8516                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8517         continue;
8518       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8519       assert(BasePtr && "Unable to find base lambda address.");
8520       int TgtIdx = -1;
8521       for (unsigned J = I; J > 0; --J) {
8522         unsigned Idx = J - 1;
8523         if (Pointers[Idx] != BasePtr)
8524           continue;
8525         TgtIdx = Idx;
8526         break;
8527       }
8528       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8529       // All other current entries will be MEMBER_OF the combined entry
8530       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8531       // 0xFFFF in the MEMBER_OF field).
8532       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8533       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8534     }
8535   }
8536 
8537   /// Generate the base pointers, section pointers, sizes and map types
8538   /// associated to a given capture.
8539   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8540                               llvm::Value *Arg,
8541                               MapBaseValuesArrayTy &BasePointers,
8542                               MapValuesArrayTy &Pointers,
8543                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8544                               StructRangeInfoTy &PartialStruct) const {
8545     assert(!Cap->capturesVariableArrayType() &&
8546            "Not expecting to generate map info for a variable array type!");
8547 
8548     // We need to know when we generating information for the first component
8549     const ValueDecl *VD = Cap->capturesThis()
8550                               ? nullptr
8551                               : Cap->getCapturedVar()->getCanonicalDecl();
8552 
8553     // If this declaration appears in a is_device_ptr clause we just have to
8554     // pass the pointer by value. If it is a reference to a declaration, we just
8555     // pass its value.
8556     if (DevPointersMap.count(VD)) {
8557       BasePointers.emplace_back(Arg, VD);
8558       Pointers.push_back(Arg);
8559       Sizes.push_back(
8560           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8561                                     CGF.Int64Ty, /*isSigned=*/true));
8562       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8563       return;
8564     }
8565 
8566     using MapData =
8567         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8568                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8569     SmallVector<MapData, 4> DeclComponentLists;
8570     assert(CurDir.is<const OMPExecutableDirective *>() &&
8571            "Expect a executable directive");
8572     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8573     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8574       for (const auto L : C->decl_component_lists(VD)) {
8575         assert(L.first == VD &&
8576                "We got information for the wrong declaration??");
8577         assert(!L.second.empty() &&
8578                "Not expecting declaration with no component lists.");
8579         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8580                                         C->getMapTypeModifiers(),
8581                                         C->isImplicit());
8582       }
8583     }
8584 
8585     // Find overlapping elements (including the offset from the base element).
8586     llvm::SmallDenseMap<
8587         const MapData *,
8588         llvm::SmallVector<
8589             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8590         4>
8591         OverlappedData;
8592     size_t Count = 0;
8593     for (const MapData &L : DeclComponentLists) {
8594       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8595       OpenMPMapClauseKind MapType;
8596       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8597       bool IsImplicit;
8598       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8599       ++Count;
8600       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8601         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8602         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8603         auto CI = Components.rbegin();
8604         auto CE = Components.rend();
8605         auto SI = Components1.rbegin();
8606         auto SE = Components1.rend();
8607         for (; CI != CE && SI != SE; ++CI, ++SI) {
8608           if (CI->getAssociatedExpression()->getStmtClass() !=
8609               SI->getAssociatedExpression()->getStmtClass())
8610             break;
8611           // Are we dealing with different variables/fields?
8612           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8613             break;
8614         }
8615         // Found overlapping if, at least for one component, reached the head of
8616         // the components list.
8617         if (CI == CE || SI == SE) {
8618           assert((CI != CE || SI != SE) &&
8619                  "Unexpected full match of the mapping components.");
8620           const MapData &BaseData = CI == CE ? L : L1;
8621           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8622               SI == SE ? Components : Components1;
8623           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8624           OverlappedElements.getSecond().push_back(SubData);
8625         }
8626       }
8627     }
8628     // Sort the overlapped elements for each item.
8629     llvm::SmallVector<const FieldDecl *, 4> Layout;
8630     if (!OverlappedData.empty()) {
8631       if (const auto *CRD =
8632               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8633         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8634       else {
8635         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8636         Layout.append(RD->field_begin(), RD->field_end());
8637       }
8638     }
8639     for (auto &Pair : OverlappedData) {
8640       llvm::sort(
8641           Pair.getSecond(),
8642           [&Layout](
8643               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8644               OMPClauseMappableExprCommon::MappableExprComponentListRef
8645                   Second) {
8646             auto CI = First.rbegin();
8647             auto CE = First.rend();
8648             auto SI = Second.rbegin();
8649             auto SE = Second.rend();
8650             for (; CI != CE && SI != SE; ++CI, ++SI) {
8651               if (CI->getAssociatedExpression()->getStmtClass() !=
8652                   SI->getAssociatedExpression()->getStmtClass())
8653                 break;
8654               // Are we dealing with different variables/fields?
8655               if (CI->getAssociatedDeclaration() !=
8656                   SI->getAssociatedDeclaration())
8657                 break;
8658             }
8659 
8660             // Lists contain the same elements.
8661             if (CI == CE && SI == SE)
8662               return false;
8663 
8664             // List with less elements is less than list with more elements.
8665             if (CI == CE || SI == SE)
8666               return CI == CE;
8667 
8668             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8669             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8670             if (FD1->getParent() == FD2->getParent())
8671               return FD1->getFieldIndex() < FD2->getFieldIndex();
8672             const auto It =
8673                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8674                   return FD == FD1 || FD == FD2;
8675                 });
8676             return *It == FD1;
8677           });
8678     }
8679 
8680     // Associated with a capture, because the mapping flags depend on it.
8681     // Go through all of the elements with the overlapped elements.
8682     for (const auto &Pair : OverlappedData) {
8683       const MapData &L = *Pair.getFirst();
8684       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8685       OpenMPMapClauseKind MapType;
8686       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8687       bool IsImplicit;
8688       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8689       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8690           OverlappedComponents = Pair.getSecond();
8691       bool IsFirstComponentList = true;
8692       generateInfoForComponentList(MapType, MapModifiers, Components,
8693                                    BasePointers, Pointers, Sizes, Types,
8694                                    PartialStruct, IsFirstComponentList,
8695                                    IsImplicit, OverlappedComponents);
8696     }
8697     // Go through other elements without overlapped elements.
8698     bool IsFirstComponentList = OverlappedData.empty();
8699     for (const MapData &L : DeclComponentLists) {
8700       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8701       OpenMPMapClauseKind MapType;
8702       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8703       bool IsImplicit;
8704       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8705       auto It = OverlappedData.find(&L);
8706       if (It == OverlappedData.end())
8707         generateInfoForComponentList(MapType, MapModifiers, Components,
8708                                      BasePointers, Pointers, Sizes, Types,
8709                                      PartialStruct, IsFirstComponentList,
8710                                      IsImplicit);
8711       IsFirstComponentList = false;
8712     }
8713   }
8714 
8715   /// Generate the base pointers, section pointers, sizes and map types
8716   /// associated with the declare target link variables.
8717   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8718                                         MapValuesArrayTy &Pointers,
8719                                         MapValuesArrayTy &Sizes,
8720                                         MapFlagsArrayTy &Types) const {
8721     assert(CurDir.is<const OMPExecutableDirective *>() &&
8722            "Expect a executable directive");
8723     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8724     // Map other list items in the map clause which are not captured variables
8725     // but "declare target link" global variables.
8726     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8727       for (const auto L : C->component_lists()) {
8728         if (!L.first)
8729           continue;
8730         const auto *VD = dyn_cast<VarDecl>(L.first);
8731         if (!VD)
8732           continue;
8733         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8734             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8735         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8736             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8737           continue;
8738         StructRangeInfoTy PartialStruct;
8739         generateInfoForComponentList(
8740             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8741             Pointers, Sizes, Types, PartialStruct,
8742             /*IsFirstComponentList=*/true, C->isImplicit());
8743         assert(!PartialStruct.Base.isValid() &&
8744                "No partial structs for declare target link expected.");
8745       }
8746     }
8747   }
8748 
8749   /// Generate the default map information for a given capture \a CI,
8750   /// record field declaration \a RI and captured value \a CV.
8751   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8752                               const FieldDecl &RI, llvm::Value *CV,
8753                               MapBaseValuesArrayTy &CurBasePointers,
8754                               MapValuesArrayTy &CurPointers,
8755                               MapValuesArrayTy &CurSizes,
8756                               MapFlagsArrayTy &CurMapTypes) const {
8757     bool IsImplicit = true;
8758     // Do the default mapping.
8759     if (CI.capturesThis()) {
8760       CurBasePointers.push_back(CV);
8761       CurPointers.push_back(CV);
8762       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8763       CurSizes.push_back(
8764           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8765                                     CGF.Int64Ty, /*isSigned=*/true));
8766       // Default map type.
8767       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8768     } else if (CI.capturesVariableByCopy()) {
8769       CurBasePointers.push_back(CV);
8770       CurPointers.push_back(CV);
8771       if (!RI.getType()->isAnyPointerType()) {
8772         // We have to signal to the runtime captures passed by value that are
8773         // not pointers.
8774         CurMapTypes.push_back(OMP_MAP_LITERAL);
8775         CurSizes.push_back(CGF.Builder.CreateIntCast(
8776             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8777       } else {
8778         // Pointers are implicitly mapped with a zero size and no flags
8779         // (other than first map that is added for all implicit maps).
8780         CurMapTypes.push_back(OMP_MAP_NONE);
8781         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8782       }
8783       const VarDecl *VD = CI.getCapturedVar();
8784       auto I = FirstPrivateDecls.find(VD);
8785       if (I != FirstPrivateDecls.end())
8786         IsImplicit = I->getSecond();
8787     } else {
8788       assert(CI.capturesVariable() && "Expected captured reference.");
8789       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8790       QualType ElementType = PtrTy->getPointeeType();
8791       CurSizes.push_back(CGF.Builder.CreateIntCast(
8792           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8793       // The default map type for a scalar/complex type is 'to' because by
8794       // default the value doesn't have to be retrieved. For an aggregate
8795       // type, the default is 'tofrom'.
8796       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8797       const VarDecl *VD = CI.getCapturedVar();
8798       auto I = FirstPrivateDecls.find(VD);
8799       if (I != FirstPrivateDecls.end() &&
8800           VD->getType().isConstant(CGF.getContext())) {
8801         llvm::Constant *Addr =
8802             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8803         // Copy the value of the original variable to the new global copy.
8804         CGF.Builder.CreateMemCpy(
8805             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8806             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8807             CurSizes.back(), /*IsVolatile=*/false);
8808         // Use new global variable as the base pointers.
8809         CurBasePointers.push_back(Addr);
8810         CurPointers.push_back(Addr);
8811       } else {
8812         CurBasePointers.push_back(CV);
8813         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8814           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8815               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8816               AlignmentSource::Decl));
8817           CurPointers.push_back(PtrAddr.getPointer());
8818         } else {
8819           CurPointers.push_back(CV);
8820         }
8821       }
8822       if (I != FirstPrivateDecls.end())
8823         IsImplicit = I->getSecond();
8824     }
8825     // Every default map produces a single argument which is a target parameter.
8826     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8827 
8828     // Add flag stating this is an implicit map.
8829     if (IsImplicit)
8830       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8831   }
8832 };
8833 } // anonymous namespace
8834 
8835 /// Emit the arrays used to pass the captures and map information to the
8836 /// offloading runtime library. If there is no map or capture information,
8837 /// return nullptr by reference.
8838 static void
8839 emitOffloadingArrays(CodeGenFunction &CGF,
8840                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8841                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8842                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8843                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8844                      CGOpenMPRuntime::TargetDataInfo &Info) {
8845   CodeGenModule &CGM = CGF.CGM;
8846   ASTContext &Ctx = CGF.getContext();
8847 
8848   // Reset the array information.
8849   Info.clearArrayInfo();
8850   Info.NumberOfPtrs = BasePointers.size();
8851 
8852   if (Info.NumberOfPtrs) {
8853     // Detect if we have any capture size requiring runtime evaluation of the
8854     // size so that a constant array could be eventually used.
8855     bool hasRuntimeEvaluationCaptureSize = false;
8856     for (llvm::Value *S : Sizes)
8857       if (!isa<llvm::Constant>(S)) {
8858         hasRuntimeEvaluationCaptureSize = true;
8859         break;
8860       }
8861 
8862     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8863     QualType PointerArrayType = Ctx.getConstantArrayType(
8864         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8865         /*IndexTypeQuals=*/0);
8866 
8867     Info.BasePointersArray =
8868         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8869     Info.PointersArray =
8870         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8871 
8872     // If we don't have any VLA types or other types that require runtime
8873     // evaluation, we can use a constant array for the map sizes, otherwise we
8874     // need to fill up the arrays as we do for the pointers.
8875     QualType Int64Ty =
8876         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8877     if (hasRuntimeEvaluationCaptureSize) {
8878       QualType SizeArrayType = Ctx.getConstantArrayType(
8879           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8880           /*IndexTypeQuals=*/0);
8881       Info.SizesArray =
8882           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8883     } else {
8884       // We expect all the sizes to be constant, so we collect them to create
8885       // a constant array.
8886       SmallVector<llvm::Constant *, 16> ConstSizes;
8887       for (llvm::Value *S : Sizes)
8888         ConstSizes.push_back(cast<llvm::Constant>(S));
8889 
8890       auto *SizesArrayInit = llvm::ConstantArray::get(
8891           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8892       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8893       auto *SizesArrayGbl = new llvm::GlobalVariable(
8894           CGM.getModule(), SizesArrayInit->getType(),
8895           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8896           SizesArrayInit, Name);
8897       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8898       Info.SizesArray = SizesArrayGbl;
8899     }
8900 
8901     // The map types are always constant so we don't need to generate code to
8902     // fill arrays. Instead, we create an array constant.
8903     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8904     llvm::copy(MapTypes, Mapping.begin());
8905     llvm::Constant *MapTypesArrayInit =
8906         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8907     std::string MaptypesName =
8908         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8909     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8910         CGM.getModule(), MapTypesArrayInit->getType(),
8911         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8912         MapTypesArrayInit, MaptypesName);
8913     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8914     Info.MapTypesArray = MapTypesArrayGbl;
8915 
8916     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8917       llvm::Value *BPVal = *BasePointers[I];
8918       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8919           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8920           Info.BasePointersArray, 0, I);
8921       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8922           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8923       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8924       CGF.Builder.CreateStore(BPVal, BPAddr);
8925 
8926       if (Info.requiresDevicePointerInfo())
8927         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8928           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8929 
8930       llvm::Value *PVal = Pointers[I];
8931       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8932           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8933           Info.PointersArray, 0, I);
8934       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8935           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8936       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8937       CGF.Builder.CreateStore(PVal, PAddr);
8938 
8939       if (hasRuntimeEvaluationCaptureSize) {
8940         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8941             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8942             Info.SizesArray,
8943             /*Idx0=*/0,
8944             /*Idx1=*/I);
8945         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8946         CGF.Builder.CreateStore(
8947             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8948             SAddr);
8949       }
8950     }
8951   }
8952 }
8953 
8954 /// Emit the arguments to be passed to the runtime library based on the
8955 /// arrays of pointers, sizes and map types.
8956 static void emitOffloadingArraysArgument(
8957     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8958     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8959     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8960   CodeGenModule &CGM = CGF.CGM;
8961   if (Info.NumberOfPtrs) {
8962     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8963         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8964         Info.BasePointersArray,
8965         /*Idx0=*/0, /*Idx1=*/0);
8966     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8967         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8968         Info.PointersArray,
8969         /*Idx0=*/0,
8970         /*Idx1=*/0);
8971     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8972         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8973         /*Idx0=*/0, /*Idx1=*/0);
8974     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8975         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8976         Info.MapTypesArray,
8977         /*Idx0=*/0,
8978         /*Idx1=*/0);
8979   } else {
8980     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8981     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8982     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8983     MapTypesArrayArg =
8984         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8985   }
8986 }
8987 
8988 /// Check for inner distribute directive.
8989 static const OMPExecutableDirective *
8990 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8991   const auto *CS = D.getInnermostCapturedStmt();
8992   const auto *Body =
8993       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8994   const Stmt *ChildStmt =
8995       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8996 
8997   if (const auto *NestedDir =
8998           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8999     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9000     switch (D.getDirectiveKind()) {
9001     case OMPD_target:
9002       if (isOpenMPDistributeDirective(DKind))
9003         return NestedDir;
9004       if (DKind == OMPD_teams) {
9005         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9006             /*IgnoreCaptured=*/true);
9007         if (!Body)
9008           return nullptr;
9009         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9010         if (const auto *NND =
9011                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9012           DKind = NND->getDirectiveKind();
9013           if (isOpenMPDistributeDirective(DKind))
9014             return NND;
9015         }
9016       }
9017       return nullptr;
9018     case OMPD_target_teams:
9019       if (isOpenMPDistributeDirective(DKind))
9020         return NestedDir;
9021       return nullptr;
9022     case OMPD_target_parallel:
9023     case OMPD_target_simd:
9024     case OMPD_target_parallel_for:
9025     case OMPD_target_parallel_for_simd:
9026       return nullptr;
9027     case OMPD_target_teams_distribute:
9028     case OMPD_target_teams_distribute_simd:
9029     case OMPD_target_teams_distribute_parallel_for:
9030     case OMPD_target_teams_distribute_parallel_for_simd:
9031     case OMPD_parallel:
9032     case OMPD_for:
9033     case OMPD_parallel_for:
9034     case OMPD_parallel_master:
9035     case OMPD_parallel_sections:
9036     case OMPD_for_simd:
9037     case OMPD_parallel_for_simd:
9038     case OMPD_cancel:
9039     case OMPD_cancellation_point:
9040     case OMPD_ordered:
9041     case OMPD_threadprivate:
9042     case OMPD_allocate:
9043     case OMPD_task:
9044     case OMPD_simd:
9045     case OMPD_sections:
9046     case OMPD_section:
9047     case OMPD_single:
9048     case OMPD_master:
9049     case OMPD_critical:
9050     case OMPD_taskyield:
9051     case OMPD_barrier:
9052     case OMPD_taskwait:
9053     case OMPD_taskgroup:
9054     case OMPD_atomic:
9055     case OMPD_flush:
9056     case OMPD_depobj:
9057     case OMPD_teams:
9058     case OMPD_target_data:
9059     case OMPD_target_exit_data:
9060     case OMPD_target_enter_data:
9061     case OMPD_distribute:
9062     case OMPD_distribute_simd:
9063     case OMPD_distribute_parallel_for:
9064     case OMPD_distribute_parallel_for_simd:
9065     case OMPD_teams_distribute:
9066     case OMPD_teams_distribute_simd:
9067     case OMPD_teams_distribute_parallel_for:
9068     case OMPD_teams_distribute_parallel_for_simd:
9069     case OMPD_target_update:
9070     case OMPD_declare_simd:
9071     case OMPD_declare_variant:
9072     case OMPD_declare_target:
9073     case OMPD_end_declare_target:
9074     case OMPD_declare_reduction:
9075     case OMPD_declare_mapper:
9076     case OMPD_taskloop:
9077     case OMPD_taskloop_simd:
9078     case OMPD_master_taskloop:
9079     case OMPD_master_taskloop_simd:
9080     case OMPD_parallel_master_taskloop:
9081     case OMPD_parallel_master_taskloop_simd:
9082     case OMPD_requires:
9083     case OMPD_unknown:
9084       llvm_unreachable("Unexpected directive.");
9085     }
9086   }
9087 
9088   return nullptr;
9089 }
9090 
9091 /// Emit the user-defined mapper function. The code generation follows the
9092 /// pattern in the example below.
9093 /// \code
9094 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9095 ///                                           void *base, void *begin,
9096 ///                                           int64_t size, int64_t type) {
9097 ///   // Allocate space for an array section first.
9098 ///   if (size > 1 && !maptype.IsDelete)
9099 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9100 ///                                 size*sizeof(Ty), clearToFrom(type));
9101 ///   // Map members.
9102 ///   for (unsigned i = 0; i < size; i++) {
9103 ///     // For each component specified by this mapper:
9104 ///     for (auto c : all_components) {
9105 ///       if (c.hasMapper())
9106 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9107 ///                       c.arg_type);
9108 ///       else
9109 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9110 ///                                     c.arg_begin, c.arg_size, c.arg_type);
9111 ///     }
9112 ///   }
9113 ///   // Delete the array section.
9114 ///   if (size > 1 && maptype.IsDelete)
9115 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9116 ///                                 size*sizeof(Ty), clearToFrom(type));
9117 /// }
9118 /// \endcode
9119 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9120                                             CodeGenFunction *CGF) {
9121   if (UDMMap.count(D) > 0)
9122     return;
9123   ASTContext &C = CGM.getContext();
9124   QualType Ty = D->getType();
9125   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9126   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9127   auto *MapperVarDecl =
9128       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9129   SourceLocation Loc = D->getLocation();
9130   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9131 
9132   // Prepare mapper function arguments and attributes.
9133   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9134                               C.VoidPtrTy, ImplicitParamDecl::Other);
9135   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9136                             ImplicitParamDecl::Other);
9137   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9138                              C.VoidPtrTy, ImplicitParamDecl::Other);
9139   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9140                             ImplicitParamDecl::Other);
9141   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9142                             ImplicitParamDecl::Other);
9143   FunctionArgList Args;
9144   Args.push_back(&HandleArg);
9145   Args.push_back(&BaseArg);
9146   Args.push_back(&BeginArg);
9147   Args.push_back(&SizeArg);
9148   Args.push_back(&TypeArg);
9149   const CGFunctionInfo &FnInfo =
9150       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9151   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9152   SmallString<64> TyStr;
9153   llvm::raw_svector_ostream Out(TyStr);
9154   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9155   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9156   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9157                                     Name, &CGM.getModule());
9158   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9159   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9160   // Start the mapper function code generation.
9161   CodeGenFunction MapperCGF(CGM);
9162   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9163   // Compute the starting and end addreses of array elements.
9164   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9165       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9166       C.getPointerType(Int64Ty), Loc);
9167   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9168       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9169       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9170   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9171   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9172       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9173       C.getPointerType(Int64Ty), Loc);
9174   // Prepare common arguments for array initiation and deletion.
9175   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9176       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9177       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9178   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9179       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9180       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9181   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9182       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9183       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9184 
9185   // Emit array initiation if this is an array section and \p MapType indicates
9186   // that memory allocation is required.
9187   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9188   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9189                              ElementSize, HeadBB, /*IsInit=*/true);
9190 
9191   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9192 
9193   // Emit the loop header block.
9194   MapperCGF.EmitBlock(HeadBB);
9195   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9196   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9197   // Evaluate whether the initial condition is satisfied.
9198   llvm::Value *IsEmpty =
9199       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9200   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9201   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9202 
9203   // Emit the loop body block.
9204   MapperCGF.EmitBlock(BodyBB);
9205   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9206       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9207   PtrPHI->addIncoming(PtrBegin, EntryBB);
9208   Address PtrCurrent =
9209       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9210                           .getAlignment()
9211                           .alignmentOfArrayElement(ElementSize));
9212   // Privatize the declared variable of mapper to be the current array element.
9213   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9214   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9215     return MapperCGF
9216         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9217         .getAddress(MapperCGF);
9218   });
9219   (void)Scope.Privatize();
9220 
9221   // Get map clause information. Fill up the arrays with all mapped variables.
9222   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9223   MappableExprsHandler::MapValuesArrayTy Pointers;
9224   MappableExprsHandler::MapValuesArrayTy Sizes;
9225   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9226   MappableExprsHandler MEHandler(*D, MapperCGF);
9227   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9228 
9229   // Call the runtime API __tgt_mapper_num_components to get the number of
9230   // pre-existing components.
9231   llvm::Value *OffloadingArgs[] = {Handle};
9232   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9233       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
9234   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9235       PreviousSize,
9236       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9237 
9238   // Fill up the runtime mapper handle for all components.
9239   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9240     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9241         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9242     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9243         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9244     llvm::Value *CurSizeArg = Sizes[I];
9245 
9246     // Extract the MEMBER_OF field from the map type.
9247     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9248     MapperCGF.EmitBlock(MemberBB);
9249     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9250     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9251         OriMapType,
9252         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9253     llvm::BasicBlock *MemberCombineBB =
9254         MapperCGF.createBasicBlock("omp.member.combine");
9255     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9256     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9257     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9258     // Add the number of pre-existing components to the MEMBER_OF field if it
9259     // is valid.
9260     MapperCGF.EmitBlock(MemberCombineBB);
9261     llvm::Value *CombinedMember =
9262         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9263     // Do nothing if it is not a member of previous components.
9264     MapperCGF.EmitBlock(TypeBB);
9265     llvm::PHINode *MemberMapType =
9266         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9267     MemberMapType->addIncoming(OriMapType, MemberBB);
9268     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9269 
9270     // Combine the map type inherited from user-defined mapper with that
9271     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9272     // bits of the \a MapType, which is the input argument of the mapper
9273     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9274     // bits of MemberMapType.
9275     // [OpenMP 5.0], 1.2.6. map-type decay.
9276     //        | alloc |  to   | from  | tofrom | release | delete
9277     // ----------------------------------------------------------
9278     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9279     // to     | alloc |  to   | alloc |   to   | release | delete
9280     // from   | alloc | alloc | from  |  from  | release | delete
9281     // tofrom | alloc |  to   | from  | tofrom | release | delete
9282     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9283         MapType,
9284         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9285                                    MappableExprsHandler::OMP_MAP_FROM));
9286     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9287     llvm::BasicBlock *AllocElseBB =
9288         MapperCGF.createBasicBlock("omp.type.alloc.else");
9289     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9290     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9291     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9292     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9293     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9294     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9295     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9296     MapperCGF.EmitBlock(AllocBB);
9297     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9298         MemberMapType,
9299         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9300                                      MappableExprsHandler::OMP_MAP_FROM)));
9301     MapperCGF.Builder.CreateBr(EndBB);
9302     MapperCGF.EmitBlock(AllocElseBB);
9303     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9304         LeftToFrom,
9305         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9306     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9307     // In case of to, clear OMP_MAP_FROM.
9308     MapperCGF.EmitBlock(ToBB);
9309     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9310         MemberMapType,
9311         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9312     MapperCGF.Builder.CreateBr(EndBB);
9313     MapperCGF.EmitBlock(ToElseBB);
9314     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9315         LeftToFrom,
9316         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9317     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9318     // In case of from, clear OMP_MAP_TO.
9319     MapperCGF.EmitBlock(FromBB);
9320     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9321         MemberMapType,
9322         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9323     // In case of tofrom, do nothing.
9324     MapperCGF.EmitBlock(EndBB);
9325     llvm::PHINode *CurMapType =
9326         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9327     CurMapType->addIncoming(AllocMapType, AllocBB);
9328     CurMapType->addIncoming(ToMapType, ToBB);
9329     CurMapType->addIncoming(FromMapType, FromBB);
9330     CurMapType->addIncoming(MemberMapType, ToElseBB);
9331 
9332     // TODO: call the corresponding mapper function if a user-defined mapper is
9333     // associated with this map clause.
9334     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9335     // data structure.
9336     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9337                                      CurSizeArg, CurMapType};
9338     MapperCGF.EmitRuntimeCall(
9339         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9340         OffloadingArgs);
9341   }
9342 
9343   // Update the pointer to point to the next element that needs to be mapped,
9344   // and check whether we have mapped all elements.
9345   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9346       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9347   PtrPHI->addIncoming(PtrNext, BodyBB);
9348   llvm::Value *IsDone =
9349       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9350   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9351   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9352 
9353   MapperCGF.EmitBlock(ExitBB);
9354   // Emit array deletion if this is an array section and \p MapType indicates
9355   // that deletion is required.
9356   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9357                              ElementSize, DoneBB, /*IsInit=*/false);
9358 
9359   // Emit the function exit block.
9360   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9361   MapperCGF.FinishFunction();
9362   UDMMap.try_emplace(D, Fn);
9363   if (CGF) {
9364     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9365     Decls.second.push_back(D);
9366   }
9367 }
9368 
9369 /// Emit the array initialization or deletion portion for user-defined mapper
9370 /// code generation. First, it evaluates whether an array section is mapped and
9371 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9372 /// true, and \a MapType indicates to not delete this array, array
9373 /// initialization code is generated. If \a IsInit is false, and \a MapType
9374 /// indicates to not this array, array deletion code is generated.
9375 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9376     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9377     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9378     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9379   StringRef Prefix = IsInit ? ".init" : ".del";
9380 
9381   // Evaluate if this is an array section.
9382   llvm::BasicBlock *IsDeleteBB =
9383       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9384   llvm::BasicBlock *BodyBB =
9385       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9386   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9387       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9388   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9389 
9390   // Evaluate if we are going to delete this section.
9391   MapperCGF.EmitBlock(IsDeleteBB);
9392   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9393       MapType,
9394       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9395   llvm::Value *DeleteCond;
9396   if (IsInit) {
9397     DeleteCond = MapperCGF.Builder.CreateIsNull(
9398         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9399   } else {
9400     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9401         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9402   }
9403   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9404 
9405   MapperCGF.EmitBlock(BodyBB);
9406   // Get the array size by multiplying element size and element number (i.e., \p
9407   // Size).
9408   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9409       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9410   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9411   // memory allocation/deletion purpose only.
9412   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9413       MapType,
9414       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9415                                    MappableExprsHandler::OMP_MAP_FROM)));
9416   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9417   // data structure.
9418   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9419   MapperCGF.EmitRuntimeCall(
9420       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9421 }
9422 
9423 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9424     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9425     llvm::Value *DeviceID,
9426     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9427                                      const OMPLoopDirective &D)>
9428         SizeEmitter) {
9429   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9430   const OMPExecutableDirective *TD = &D;
9431   // Get nested teams distribute kind directive, if any.
9432   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9433     TD = getNestedDistributeDirective(CGM.getContext(), D);
9434   if (!TD)
9435     return;
9436   const auto *LD = cast<OMPLoopDirective>(TD);
9437   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9438                                                      PrePostActionTy &) {
9439     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9440       llvm::Value *Args[] = {DeviceID, NumIterations};
9441       CGF.EmitRuntimeCall(
9442           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9443     }
9444   };
9445   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9446 }
9447 
9448 void CGOpenMPRuntime::emitTargetCall(
9449     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9450     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9451     const Expr *Device,
9452     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9453                                      const OMPLoopDirective &D)>
9454         SizeEmitter) {
9455   if (!CGF.HaveInsertPoint())
9456     return;
9457 
9458   assert(OutlinedFn && "Invalid outlined function!");
9459 
9460   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9461   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9462   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9463   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9464                                             PrePostActionTy &) {
9465     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9466   };
9467   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9468 
9469   CodeGenFunction::OMPTargetDataInfo InputInfo;
9470   llvm::Value *MapTypesArray = nullptr;
9471   // Fill up the pointer arrays and transfer execution to the device.
9472   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9473                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9474                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9475     // On top of the arrays that were filled up, the target offloading call
9476     // takes as arguments the device id as well as the host pointer. The host
9477     // pointer is used by the runtime library to identify the current target
9478     // region, so it only has to be unique and not necessarily point to
9479     // anything. It could be the pointer to the outlined function that
9480     // implements the target region, but we aren't using that so that the
9481     // compiler doesn't need to keep that, and could therefore inline the host
9482     // function if proven worthwhile during optimization.
9483 
9484     // From this point on, we need to have an ID of the target region defined.
9485     assert(OutlinedFnID && "Invalid outlined function ID!");
9486 
9487     // Emit device ID if any.
9488     llvm::Value *DeviceID;
9489     if (Device) {
9490       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9491                                            CGF.Int64Ty, /*isSigned=*/true);
9492     } else {
9493       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9494     }
9495 
9496     // Emit the number of elements in the offloading arrays.
9497     llvm::Value *PointerNum =
9498         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9499 
9500     // Return value of the runtime offloading call.
9501     llvm::Value *Return;
9502 
9503     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9504     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9505 
9506     // Emit tripcount for the target loop-based directive.
9507     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9508 
9509     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9510     // The target region is an outlined function launched by the runtime
9511     // via calls __tgt_target() or __tgt_target_teams().
9512     //
9513     // __tgt_target() launches a target region with one team and one thread,
9514     // executing a serial region.  This master thread may in turn launch
9515     // more threads within its team upon encountering a parallel region,
9516     // however, no additional teams can be launched on the device.
9517     //
9518     // __tgt_target_teams() launches a target region with one or more teams,
9519     // each with one or more threads.  This call is required for target
9520     // constructs such as:
9521     //  'target teams'
9522     //  'target' / 'teams'
9523     //  'target teams distribute parallel for'
9524     //  'target parallel'
9525     // and so on.
9526     //
9527     // Note that on the host and CPU targets, the runtime implementation of
9528     // these calls simply call the outlined function without forking threads.
9529     // The outlined functions themselves have runtime calls to
9530     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9531     // the compiler in emitTeamsCall() and emitParallelCall().
9532     //
9533     // In contrast, on the NVPTX target, the implementation of
9534     // __tgt_target_teams() launches a GPU kernel with the requested number
9535     // of teams and threads so no additional calls to the runtime are required.
9536     if (NumTeams) {
9537       // If we have NumTeams defined this means that we have an enclosed teams
9538       // region. Therefore we also expect to have NumThreads defined. These two
9539       // values should be defined in the presence of a teams directive,
9540       // regardless of having any clauses associated. If the user is using teams
9541       // but no clauses, these two values will be the default that should be
9542       // passed to the runtime library - a 32-bit integer with the value zero.
9543       assert(NumThreads && "Thread limit expression should be available along "
9544                            "with number of teams.");
9545       llvm::Value *OffloadingArgs[] = {DeviceID,
9546                                        OutlinedFnID,
9547                                        PointerNum,
9548                                        InputInfo.BasePointersArray.getPointer(),
9549                                        InputInfo.PointersArray.getPointer(),
9550                                        InputInfo.SizesArray.getPointer(),
9551                                        MapTypesArray,
9552                                        NumTeams,
9553                                        NumThreads};
9554       Return = CGF.EmitRuntimeCall(
9555           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9556                                           : OMPRTL__tgt_target_teams),
9557           OffloadingArgs);
9558     } else {
9559       llvm::Value *OffloadingArgs[] = {DeviceID,
9560                                        OutlinedFnID,
9561                                        PointerNum,
9562                                        InputInfo.BasePointersArray.getPointer(),
9563                                        InputInfo.PointersArray.getPointer(),
9564                                        InputInfo.SizesArray.getPointer(),
9565                                        MapTypesArray};
9566       Return = CGF.EmitRuntimeCall(
9567           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9568                                           : OMPRTL__tgt_target),
9569           OffloadingArgs);
9570     }
9571 
9572     // Check the error code and execute the host version if required.
9573     llvm::BasicBlock *OffloadFailedBlock =
9574         CGF.createBasicBlock("omp_offload.failed");
9575     llvm::BasicBlock *OffloadContBlock =
9576         CGF.createBasicBlock("omp_offload.cont");
9577     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9578     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9579 
9580     CGF.EmitBlock(OffloadFailedBlock);
9581     if (RequiresOuterTask) {
9582       CapturedVars.clear();
9583       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9584     }
9585     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9586     CGF.EmitBranch(OffloadContBlock);
9587 
9588     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9589   };
9590 
9591   // Notify that the host version must be executed.
9592   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9593                     RequiresOuterTask](CodeGenFunction &CGF,
9594                                        PrePostActionTy &) {
9595     if (RequiresOuterTask) {
9596       CapturedVars.clear();
9597       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9598     }
9599     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9600   };
9601 
9602   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9603                           &CapturedVars, RequiresOuterTask,
9604                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9605     // Fill up the arrays with all the captured variables.
9606     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9607     MappableExprsHandler::MapValuesArrayTy Pointers;
9608     MappableExprsHandler::MapValuesArrayTy Sizes;
9609     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9610 
9611     // Get mappable expression information.
9612     MappableExprsHandler MEHandler(D, CGF);
9613     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9614 
9615     auto RI = CS.getCapturedRecordDecl()->field_begin();
9616     auto CV = CapturedVars.begin();
9617     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9618                                               CE = CS.capture_end();
9619          CI != CE; ++CI, ++RI, ++CV) {
9620       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9621       MappableExprsHandler::MapValuesArrayTy CurPointers;
9622       MappableExprsHandler::MapValuesArrayTy CurSizes;
9623       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9624       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9625 
9626       // VLA sizes are passed to the outlined region by copy and do not have map
9627       // information associated.
9628       if (CI->capturesVariableArrayType()) {
9629         CurBasePointers.push_back(*CV);
9630         CurPointers.push_back(*CV);
9631         CurSizes.push_back(CGF.Builder.CreateIntCast(
9632             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9633         // Copy to the device as an argument. No need to retrieve it.
9634         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9635                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9636                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9637       } else {
9638         // If we have any information in the map clause, we use it, otherwise we
9639         // just do a default mapping.
9640         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9641                                          CurSizes, CurMapTypes, PartialStruct);
9642         if (CurBasePointers.empty())
9643           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9644                                            CurPointers, CurSizes, CurMapTypes);
9645         // Generate correct mapping for variables captured by reference in
9646         // lambdas.
9647         if (CI->capturesVariable())
9648           MEHandler.generateInfoForLambdaCaptures(
9649               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9650               CurMapTypes, LambdaPointers);
9651       }
9652       // We expect to have at least an element of information for this capture.
9653       assert(!CurBasePointers.empty() &&
9654              "Non-existing map pointer for capture!");
9655       assert(CurBasePointers.size() == CurPointers.size() &&
9656              CurBasePointers.size() == CurSizes.size() &&
9657              CurBasePointers.size() == CurMapTypes.size() &&
9658              "Inconsistent map information sizes!");
9659 
9660       // If there is an entry in PartialStruct it means we have a struct with
9661       // individual members mapped. Emit an extra combined entry.
9662       if (PartialStruct.Base.isValid())
9663         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9664                                     CurMapTypes, PartialStruct);
9665 
9666       // We need to append the results of this capture to what we already have.
9667       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9668       Pointers.append(CurPointers.begin(), CurPointers.end());
9669       Sizes.append(CurSizes.begin(), CurSizes.end());
9670       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9671     }
9672     // Adjust MEMBER_OF flags for the lambdas captures.
9673     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9674                                               Pointers, MapTypes);
9675     // Map other list items in the map clause which are not captured variables
9676     // but "declare target link" global variables.
9677     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9678                                                MapTypes);
9679 
9680     TargetDataInfo Info;
9681     // Fill up the arrays and create the arguments.
9682     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9683     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9684                                  Info.PointersArray, Info.SizesArray,
9685                                  Info.MapTypesArray, Info);
9686     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9687     InputInfo.BasePointersArray =
9688         Address(Info.BasePointersArray, CGM.getPointerAlign());
9689     InputInfo.PointersArray =
9690         Address(Info.PointersArray, CGM.getPointerAlign());
9691     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9692     MapTypesArray = Info.MapTypesArray;
9693     if (RequiresOuterTask)
9694       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9695     else
9696       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9697   };
9698 
9699   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9700                              CodeGenFunction &CGF, PrePostActionTy &) {
9701     if (RequiresOuterTask) {
9702       CodeGenFunction::OMPTargetDataInfo InputInfo;
9703       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9704     } else {
9705       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9706     }
9707   };
9708 
9709   // If we have a target function ID it means that we need to support
9710   // offloading, otherwise, just execute on the host. We need to execute on host
9711   // regardless of the conditional in the if clause if, e.g., the user do not
9712   // specify target triples.
9713   if (OutlinedFnID) {
9714     if (IfCond) {
9715       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9716     } else {
9717       RegionCodeGenTy ThenRCG(TargetThenGen);
9718       ThenRCG(CGF);
9719     }
9720   } else {
9721     RegionCodeGenTy ElseRCG(TargetElseGen);
9722     ElseRCG(CGF);
9723   }
9724 }
9725 
9726 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9727                                                     StringRef ParentName) {
9728   if (!S)
9729     return;
9730 
9731   // Codegen OMP target directives that offload compute to the device.
9732   bool RequiresDeviceCodegen =
9733       isa<OMPExecutableDirective>(S) &&
9734       isOpenMPTargetExecutionDirective(
9735           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9736 
9737   if (RequiresDeviceCodegen) {
9738     const auto &E = *cast<OMPExecutableDirective>(S);
9739     unsigned DeviceID;
9740     unsigned FileID;
9741     unsigned Line;
9742     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9743                              FileID, Line);
9744 
9745     // Is this a target region that should not be emitted as an entry point? If
9746     // so just signal we are done with this target region.
9747     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9748                                                             ParentName, Line))
9749       return;
9750 
9751     switch (E.getDirectiveKind()) {
9752     case OMPD_target:
9753       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9754                                                    cast<OMPTargetDirective>(E));
9755       break;
9756     case OMPD_target_parallel:
9757       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9758           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9759       break;
9760     case OMPD_target_teams:
9761       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9762           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9763       break;
9764     case OMPD_target_teams_distribute:
9765       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9766           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9767       break;
9768     case OMPD_target_teams_distribute_simd:
9769       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9770           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9771       break;
9772     case OMPD_target_parallel_for:
9773       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9774           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9775       break;
9776     case OMPD_target_parallel_for_simd:
9777       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9778           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9779       break;
9780     case OMPD_target_simd:
9781       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9782           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9783       break;
9784     case OMPD_target_teams_distribute_parallel_for:
9785       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9786           CGM, ParentName,
9787           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9788       break;
9789     case OMPD_target_teams_distribute_parallel_for_simd:
9790       CodeGenFunction::
9791           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9792               CGM, ParentName,
9793               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9794       break;
9795     case OMPD_parallel:
9796     case OMPD_for:
9797     case OMPD_parallel_for:
9798     case OMPD_parallel_master:
9799     case OMPD_parallel_sections:
9800     case OMPD_for_simd:
9801     case OMPD_parallel_for_simd:
9802     case OMPD_cancel:
9803     case OMPD_cancellation_point:
9804     case OMPD_ordered:
9805     case OMPD_threadprivate:
9806     case OMPD_allocate:
9807     case OMPD_task:
9808     case OMPD_simd:
9809     case OMPD_sections:
9810     case OMPD_section:
9811     case OMPD_single:
9812     case OMPD_master:
9813     case OMPD_critical:
9814     case OMPD_taskyield:
9815     case OMPD_barrier:
9816     case OMPD_taskwait:
9817     case OMPD_taskgroup:
9818     case OMPD_atomic:
9819     case OMPD_flush:
9820     case OMPD_depobj:
9821     case OMPD_teams:
9822     case OMPD_target_data:
9823     case OMPD_target_exit_data:
9824     case OMPD_target_enter_data:
9825     case OMPD_distribute:
9826     case OMPD_distribute_simd:
9827     case OMPD_distribute_parallel_for:
9828     case OMPD_distribute_parallel_for_simd:
9829     case OMPD_teams_distribute:
9830     case OMPD_teams_distribute_simd:
9831     case OMPD_teams_distribute_parallel_for:
9832     case OMPD_teams_distribute_parallel_for_simd:
9833     case OMPD_target_update:
9834     case OMPD_declare_simd:
9835     case OMPD_declare_variant:
9836     case OMPD_declare_target:
9837     case OMPD_end_declare_target:
9838     case OMPD_declare_reduction:
9839     case OMPD_declare_mapper:
9840     case OMPD_taskloop:
9841     case OMPD_taskloop_simd:
9842     case OMPD_master_taskloop:
9843     case OMPD_master_taskloop_simd:
9844     case OMPD_parallel_master_taskloop:
9845     case OMPD_parallel_master_taskloop_simd:
9846     case OMPD_requires:
9847     case OMPD_unknown:
9848       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9849     }
9850     return;
9851   }
9852 
9853   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9854     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9855       return;
9856 
9857     scanForTargetRegionsFunctions(
9858         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9859     return;
9860   }
9861 
9862   // If this is a lambda function, look into its body.
9863   if (const auto *L = dyn_cast<LambdaExpr>(S))
9864     S = L->getBody();
9865 
9866   // Keep looking for target regions recursively.
9867   for (const Stmt *II : S->children())
9868     scanForTargetRegionsFunctions(II, ParentName);
9869 }
9870 
9871 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9872   // If emitting code for the host, we do not process FD here. Instead we do
9873   // the normal code generation.
9874   if (!CGM.getLangOpts().OpenMPIsDevice) {
9875     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9876       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9877           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9878       // Do not emit device_type(nohost) functions for the host.
9879       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9880         return true;
9881     }
9882     return false;
9883   }
9884 
9885   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9886   // Try to detect target regions in the function.
9887   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9888     StringRef Name = CGM.getMangledName(GD);
9889     scanForTargetRegionsFunctions(FD->getBody(), Name);
9890     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9891         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9892     // Do not emit device_type(nohost) functions for the host.
9893     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9894       return true;
9895   }
9896 
9897   // Do not to emit function if it is not marked as declare target.
9898   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9899          AlreadyEmittedTargetDecls.count(VD) == 0;
9900 }
9901 
9902 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9903   if (!CGM.getLangOpts().OpenMPIsDevice)
9904     return false;
9905 
9906   // Check if there are Ctors/Dtors in this declaration and look for target
9907   // regions in it. We use the complete variant to produce the kernel name
9908   // mangling.
9909   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9910   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9911     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9912       StringRef ParentName =
9913           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9914       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9915     }
9916     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9917       StringRef ParentName =
9918           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9919       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9920     }
9921   }
9922 
9923   // Do not to emit variable if it is not marked as declare target.
9924   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9925       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9926           cast<VarDecl>(GD.getDecl()));
9927   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9928       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9929        HasRequiresUnifiedSharedMemory)) {
9930     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9931     return true;
9932   }
9933   return false;
9934 }
9935 
9936 llvm::Constant *
9937 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9938                                                 const VarDecl *VD) {
9939   assert(VD->getType().isConstant(CGM.getContext()) &&
9940          "Expected constant variable.");
9941   StringRef VarName;
9942   llvm::Constant *Addr;
9943   llvm::GlobalValue::LinkageTypes Linkage;
9944   QualType Ty = VD->getType();
9945   SmallString<128> Buffer;
9946   {
9947     unsigned DeviceID;
9948     unsigned FileID;
9949     unsigned Line;
9950     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9951                              FileID, Line);
9952     llvm::raw_svector_ostream OS(Buffer);
9953     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9954        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9955     VarName = OS.str();
9956   }
9957   Linkage = llvm::GlobalValue::InternalLinkage;
9958   Addr =
9959       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9960                                   getDefaultFirstprivateAddressSpace());
9961   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9962   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9963   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9964   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9965       VarName, Addr, VarSize,
9966       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9967   return Addr;
9968 }
9969 
9970 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9971                                                    llvm::Constant *Addr) {
9972   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9973       !CGM.getLangOpts().OpenMPIsDevice)
9974     return;
9975   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9976       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9977   if (!Res) {
9978     if (CGM.getLangOpts().OpenMPIsDevice) {
9979       // Register non-target variables being emitted in device code (debug info
9980       // may cause this).
9981       StringRef VarName = CGM.getMangledName(VD);
9982       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9983     }
9984     return;
9985   }
9986   // Register declare target variables.
9987   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9988   StringRef VarName;
9989   CharUnits VarSize;
9990   llvm::GlobalValue::LinkageTypes Linkage;
9991 
9992   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9993       !HasRequiresUnifiedSharedMemory) {
9994     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9995     VarName = CGM.getMangledName(VD);
9996     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9997       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9998       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9999     } else {
10000       VarSize = CharUnits::Zero();
10001     }
10002     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10003     // Temp solution to prevent optimizations of the internal variables.
10004     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10005       std::string RefName = getName({VarName, "ref"});
10006       if (!CGM.GetGlobalValue(RefName)) {
10007         llvm::Constant *AddrRef =
10008             getOrCreateInternalVariable(Addr->getType(), RefName);
10009         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10010         GVAddrRef->setConstant(/*Val=*/true);
10011         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10012         GVAddrRef->setInitializer(Addr);
10013         CGM.addCompilerUsedGlobal(GVAddrRef);
10014       }
10015     }
10016   } else {
10017     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10018             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10019              HasRequiresUnifiedSharedMemory)) &&
10020            "Declare target attribute must link or to with unified memory.");
10021     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10022       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10023     else
10024       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10025 
10026     if (CGM.getLangOpts().OpenMPIsDevice) {
10027       VarName = Addr->getName();
10028       Addr = nullptr;
10029     } else {
10030       VarName = getAddrOfDeclareTargetVar(VD).getName();
10031       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10032     }
10033     VarSize = CGM.getPointerSize();
10034     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10035   }
10036 
10037   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10038       VarName, Addr, VarSize, Flags, Linkage);
10039 }
10040 
10041 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10042   if (isa<FunctionDecl>(GD.getDecl()) ||
10043       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10044     return emitTargetFunctions(GD);
10045 
10046   return emitTargetGlobalVariable(GD);
10047 }
10048 
10049 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10050   for (const VarDecl *VD : DeferredGlobalVariables) {
10051     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10052         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10053     if (!Res)
10054       continue;
10055     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10056         !HasRequiresUnifiedSharedMemory) {
10057       CGM.EmitGlobal(VD);
10058     } else {
10059       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10060               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10061                HasRequiresUnifiedSharedMemory)) &&
10062              "Expected link clause or to clause with unified memory.");
10063       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10064     }
10065   }
10066 }
10067 
10068 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10069     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10070   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10071          " Expected target-based directive.");
10072 }
10073 
10074 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10075   for (const OMPClause *Clause : D->clauselists()) {
10076     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10077       HasRequiresUnifiedSharedMemory = true;
10078     } else if (const auto *AC =
10079                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10080       switch (AC->getAtomicDefaultMemOrderKind()) {
10081       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10082         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10083         break;
10084       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10085         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10086         break;
10087       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10088         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10089         break;
10090       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10091         break;
10092       }
10093     }
10094   }
10095 }
10096 
10097 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10098   return RequiresAtomicOrdering;
10099 }
10100 
10101 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10102                                                        LangAS &AS) {
10103   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10104     return false;
10105   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10106   switch(A->getAllocatorType()) {
10107   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10108   // Not supported, fallback to the default mem space.
10109   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10110   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10111   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10112   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10113   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10114   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10115   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10116     AS = LangAS::Default;
10117     return true;
10118   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10119     llvm_unreachable("Expected predefined allocator for the variables with the "
10120                      "static storage.");
10121   }
10122   return false;
10123 }
10124 
10125 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10126   return HasRequiresUnifiedSharedMemory;
10127 }
10128 
10129 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10130     CodeGenModule &CGM)
10131     : CGM(CGM) {
10132   if (CGM.getLangOpts().OpenMPIsDevice) {
10133     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10134     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10135   }
10136 }
10137 
10138 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10139   if (CGM.getLangOpts().OpenMPIsDevice)
10140     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10141 }
10142 
10143 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10144   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10145     return true;
10146 
10147   const auto *D = cast<FunctionDecl>(GD.getDecl());
10148   // Do not to emit function if it is marked as declare target as it was already
10149   // emitted.
10150   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10151     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10152       if (auto *F = dyn_cast_or_null<llvm::Function>(
10153               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10154         return !F->isDeclaration();
10155       return false;
10156     }
10157     return true;
10158   }
10159 
10160   return !AlreadyEmittedTargetDecls.insert(D).second;
10161 }
10162 
10163 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10164   // If we don't have entries or if we are emitting code for the device, we
10165   // don't need to do anything.
10166   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10167       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10168       (OffloadEntriesInfoManager.empty() &&
10169        !HasEmittedDeclareTargetRegion &&
10170        !HasEmittedTargetRegion))
10171     return nullptr;
10172 
10173   // Create and register the function that handles the requires directives.
10174   ASTContext &C = CGM.getContext();
10175 
10176   llvm::Function *RequiresRegFn;
10177   {
10178     CodeGenFunction CGF(CGM);
10179     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10180     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10181     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10182     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
10183     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10184     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10185     // TODO: check for other requires clauses.
10186     // The requires directive takes effect only when a target region is
10187     // present in the compilation unit. Otherwise it is ignored and not
10188     // passed to the runtime. This avoids the runtime from throwing an error
10189     // for mismatching requires clauses across compilation units that don't
10190     // contain at least 1 target region.
10191     assert((HasEmittedTargetRegion ||
10192             HasEmittedDeclareTargetRegion ||
10193             !OffloadEntriesInfoManager.empty()) &&
10194            "Target or declare target region expected.");
10195     if (HasRequiresUnifiedSharedMemory)
10196       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10197     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
10198         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10199     CGF.FinishFunction();
10200   }
10201   return RequiresRegFn;
10202 }
10203 
10204 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10205                                     const OMPExecutableDirective &D,
10206                                     SourceLocation Loc,
10207                                     llvm::Function *OutlinedFn,
10208                                     ArrayRef<llvm::Value *> CapturedVars) {
10209   if (!CGF.HaveInsertPoint())
10210     return;
10211 
10212   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10213   CodeGenFunction::RunCleanupsScope Scope(CGF);
10214 
10215   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10216   llvm::Value *Args[] = {
10217       RTLoc,
10218       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10219       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10220   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10221   RealArgs.append(std::begin(Args), std::end(Args));
10222   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10223 
10224   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
10225   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10226 }
10227 
10228 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10229                                          const Expr *NumTeams,
10230                                          const Expr *ThreadLimit,
10231                                          SourceLocation Loc) {
10232   if (!CGF.HaveInsertPoint())
10233     return;
10234 
10235   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10236 
10237   llvm::Value *NumTeamsVal =
10238       NumTeams
10239           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10240                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10241           : CGF.Builder.getInt32(0);
10242 
10243   llvm::Value *ThreadLimitVal =
10244       ThreadLimit
10245           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10246                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10247           : CGF.Builder.getInt32(0);
10248 
10249   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10250   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10251                                      ThreadLimitVal};
10252   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
10253                       PushNumTeamsArgs);
10254 }
10255 
10256 void CGOpenMPRuntime::emitTargetDataCalls(
10257     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10258     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10259   if (!CGF.HaveInsertPoint())
10260     return;
10261 
10262   // Action used to replace the default codegen action and turn privatization
10263   // off.
10264   PrePostActionTy NoPrivAction;
10265 
10266   // Generate the code for the opening of the data environment. Capture all the
10267   // arguments of the runtime call by reference because they are used in the
10268   // closing of the region.
10269   auto &&BeginThenGen = [this, &D, Device, &Info,
10270                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10271     // Fill up the arrays with all the mapped variables.
10272     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10273     MappableExprsHandler::MapValuesArrayTy Pointers;
10274     MappableExprsHandler::MapValuesArrayTy Sizes;
10275     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10276 
10277     // Get map clause information.
10278     MappableExprsHandler MCHandler(D, CGF);
10279     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10280 
10281     // Fill up the arrays and create the arguments.
10282     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10283 
10284     llvm::Value *BasePointersArrayArg = nullptr;
10285     llvm::Value *PointersArrayArg = nullptr;
10286     llvm::Value *SizesArrayArg = nullptr;
10287     llvm::Value *MapTypesArrayArg = nullptr;
10288     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10289                                  SizesArrayArg, MapTypesArrayArg, Info);
10290 
10291     // Emit device ID if any.
10292     llvm::Value *DeviceID = nullptr;
10293     if (Device) {
10294       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10295                                            CGF.Int64Ty, /*isSigned=*/true);
10296     } else {
10297       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10298     }
10299 
10300     // Emit the number of elements in the offloading arrays.
10301     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10302 
10303     llvm::Value *OffloadingArgs[] = {
10304         DeviceID,         PointerNum,    BasePointersArrayArg,
10305         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10306     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10307                         OffloadingArgs);
10308 
10309     // If device pointer privatization is required, emit the body of the region
10310     // here. It will have to be duplicated: with and without privatization.
10311     if (!Info.CaptureDeviceAddrMap.empty())
10312       CodeGen(CGF);
10313   };
10314 
10315   // Generate code for the closing of the data region.
10316   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10317                                             PrePostActionTy &) {
10318     assert(Info.isValid() && "Invalid data environment closing arguments.");
10319 
10320     llvm::Value *BasePointersArrayArg = nullptr;
10321     llvm::Value *PointersArrayArg = nullptr;
10322     llvm::Value *SizesArrayArg = nullptr;
10323     llvm::Value *MapTypesArrayArg = nullptr;
10324     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10325                                  SizesArrayArg, MapTypesArrayArg, Info);
10326 
10327     // Emit device ID if any.
10328     llvm::Value *DeviceID = nullptr;
10329     if (Device) {
10330       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10331                                            CGF.Int64Ty, /*isSigned=*/true);
10332     } else {
10333       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10334     }
10335 
10336     // Emit the number of elements in the offloading arrays.
10337     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10338 
10339     llvm::Value *OffloadingArgs[] = {
10340         DeviceID,         PointerNum,    BasePointersArrayArg,
10341         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10342     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10343                         OffloadingArgs);
10344   };
10345 
10346   // If we need device pointer privatization, we need to emit the body of the
10347   // region with no privatization in the 'else' branch of the conditional.
10348   // Otherwise, we don't have to do anything.
10349   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10350                                                          PrePostActionTy &) {
10351     if (!Info.CaptureDeviceAddrMap.empty()) {
10352       CodeGen.setAction(NoPrivAction);
10353       CodeGen(CGF);
10354     }
10355   };
10356 
10357   // We don't have to do anything to close the region if the if clause evaluates
10358   // to false.
10359   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10360 
10361   if (IfCond) {
10362     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10363   } else {
10364     RegionCodeGenTy RCG(BeginThenGen);
10365     RCG(CGF);
10366   }
10367 
10368   // If we don't require privatization of device pointers, we emit the body in
10369   // between the runtime calls. This avoids duplicating the body code.
10370   if (Info.CaptureDeviceAddrMap.empty()) {
10371     CodeGen.setAction(NoPrivAction);
10372     CodeGen(CGF);
10373   }
10374 
10375   if (IfCond) {
10376     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10377   } else {
10378     RegionCodeGenTy RCG(EndThenGen);
10379     RCG(CGF);
10380   }
10381 }
10382 
10383 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10384     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10385     const Expr *Device) {
10386   if (!CGF.HaveInsertPoint())
10387     return;
10388 
10389   assert((isa<OMPTargetEnterDataDirective>(D) ||
10390           isa<OMPTargetExitDataDirective>(D) ||
10391           isa<OMPTargetUpdateDirective>(D)) &&
10392          "Expecting either target enter, exit data, or update directives.");
10393 
10394   CodeGenFunction::OMPTargetDataInfo InputInfo;
10395   llvm::Value *MapTypesArray = nullptr;
10396   // Generate the code for the opening of the data environment.
10397   auto &&ThenGen = [this, &D, Device, &InputInfo,
10398                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10399     // Emit device ID if any.
10400     llvm::Value *DeviceID = nullptr;
10401     if (Device) {
10402       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10403                                            CGF.Int64Ty, /*isSigned=*/true);
10404     } else {
10405       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10406     }
10407 
10408     // Emit the number of elements in the offloading arrays.
10409     llvm::Constant *PointerNum =
10410         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10411 
10412     llvm::Value *OffloadingArgs[] = {DeviceID,
10413                                      PointerNum,
10414                                      InputInfo.BasePointersArray.getPointer(),
10415                                      InputInfo.PointersArray.getPointer(),
10416                                      InputInfo.SizesArray.getPointer(),
10417                                      MapTypesArray};
10418 
10419     // Select the right runtime function call for each expected standalone
10420     // directive.
10421     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10422     OpenMPRTLFunction RTLFn;
10423     switch (D.getDirectiveKind()) {
10424     case OMPD_target_enter_data:
10425       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10426                         : OMPRTL__tgt_target_data_begin;
10427       break;
10428     case OMPD_target_exit_data:
10429       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10430                         : OMPRTL__tgt_target_data_end;
10431       break;
10432     case OMPD_target_update:
10433       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10434                         : OMPRTL__tgt_target_data_update;
10435       break;
10436     case OMPD_parallel:
10437     case OMPD_for:
10438     case OMPD_parallel_for:
10439     case OMPD_parallel_master:
10440     case OMPD_parallel_sections:
10441     case OMPD_for_simd:
10442     case OMPD_parallel_for_simd:
10443     case OMPD_cancel:
10444     case OMPD_cancellation_point:
10445     case OMPD_ordered:
10446     case OMPD_threadprivate:
10447     case OMPD_allocate:
10448     case OMPD_task:
10449     case OMPD_simd:
10450     case OMPD_sections:
10451     case OMPD_section:
10452     case OMPD_single:
10453     case OMPD_master:
10454     case OMPD_critical:
10455     case OMPD_taskyield:
10456     case OMPD_barrier:
10457     case OMPD_taskwait:
10458     case OMPD_taskgroup:
10459     case OMPD_atomic:
10460     case OMPD_flush:
10461     case OMPD_depobj:
10462     case OMPD_teams:
10463     case OMPD_target_data:
10464     case OMPD_distribute:
10465     case OMPD_distribute_simd:
10466     case OMPD_distribute_parallel_for:
10467     case OMPD_distribute_parallel_for_simd:
10468     case OMPD_teams_distribute:
10469     case OMPD_teams_distribute_simd:
10470     case OMPD_teams_distribute_parallel_for:
10471     case OMPD_teams_distribute_parallel_for_simd:
10472     case OMPD_declare_simd:
10473     case OMPD_declare_variant:
10474     case OMPD_declare_target:
10475     case OMPD_end_declare_target:
10476     case OMPD_declare_reduction:
10477     case OMPD_declare_mapper:
10478     case OMPD_taskloop:
10479     case OMPD_taskloop_simd:
10480     case OMPD_master_taskloop:
10481     case OMPD_master_taskloop_simd:
10482     case OMPD_parallel_master_taskloop:
10483     case OMPD_parallel_master_taskloop_simd:
10484     case OMPD_target:
10485     case OMPD_target_simd:
10486     case OMPD_target_teams_distribute:
10487     case OMPD_target_teams_distribute_simd:
10488     case OMPD_target_teams_distribute_parallel_for:
10489     case OMPD_target_teams_distribute_parallel_for_simd:
10490     case OMPD_target_teams:
10491     case OMPD_target_parallel:
10492     case OMPD_target_parallel_for:
10493     case OMPD_target_parallel_for_simd:
10494     case OMPD_requires:
10495     case OMPD_unknown:
10496       llvm_unreachable("Unexpected standalone target data directive.");
10497       break;
10498     }
10499     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10500   };
10501 
10502   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10503                              CodeGenFunction &CGF, PrePostActionTy &) {
10504     // Fill up the arrays with all the mapped variables.
10505     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10506     MappableExprsHandler::MapValuesArrayTy Pointers;
10507     MappableExprsHandler::MapValuesArrayTy Sizes;
10508     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10509 
10510     // Get map clause information.
10511     MappableExprsHandler MEHandler(D, CGF);
10512     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10513 
10514     TargetDataInfo Info;
10515     // Fill up the arrays and create the arguments.
10516     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10517     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10518                                  Info.PointersArray, Info.SizesArray,
10519                                  Info.MapTypesArray, Info);
10520     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10521     InputInfo.BasePointersArray =
10522         Address(Info.BasePointersArray, CGM.getPointerAlign());
10523     InputInfo.PointersArray =
10524         Address(Info.PointersArray, CGM.getPointerAlign());
10525     InputInfo.SizesArray =
10526         Address(Info.SizesArray, CGM.getPointerAlign());
10527     MapTypesArray = Info.MapTypesArray;
10528     if (D.hasClausesOfKind<OMPDependClause>())
10529       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10530     else
10531       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10532   };
10533 
10534   if (IfCond) {
10535     emitIfClause(CGF, IfCond, TargetThenGen,
10536                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10537   } else {
10538     RegionCodeGenTy ThenRCG(TargetThenGen);
10539     ThenRCG(CGF);
10540   }
10541 }
10542 
10543 namespace {
10544   /// Kind of parameter in a function with 'declare simd' directive.
10545   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10546   /// Attribute set of the parameter.
10547   struct ParamAttrTy {
10548     ParamKindTy Kind = Vector;
10549     llvm::APSInt StrideOrArg;
10550     llvm::APSInt Alignment;
10551   };
10552 } // namespace
10553 
10554 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10555                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10556   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10557   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10558   // of that clause. The VLEN value must be power of 2.
10559   // In other case the notion of the function`s "characteristic data type" (CDT)
10560   // is used to compute the vector length.
10561   // CDT is defined in the following order:
10562   //   a) For non-void function, the CDT is the return type.
10563   //   b) If the function has any non-uniform, non-linear parameters, then the
10564   //   CDT is the type of the first such parameter.
10565   //   c) If the CDT determined by a) or b) above is struct, union, or class
10566   //   type which is pass-by-value (except for the type that maps to the
10567   //   built-in complex data type), the characteristic data type is int.
10568   //   d) If none of the above three cases is applicable, the CDT is int.
10569   // The VLEN is then determined based on the CDT and the size of vector
10570   // register of that ISA for which current vector version is generated. The
10571   // VLEN is computed using the formula below:
10572   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10573   // where vector register size specified in section 3.2.1 Registers and the
10574   // Stack Frame of original AMD64 ABI document.
10575   QualType RetType = FD->getReturnType();
10576   if (RetType.isNull())
10577     return 0;
10578   ASTContext &C = FD->getASTContext();
10579   QualType CDT;
10580   if (!RetType.isNull() && !RetType->isVoidType()) {
10581     CDT = RetType;
10582   } else {
10583     unsigned Offset = 0;
10584     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10585       if (ParamAttrs[Offset].Kind == Vector)
10586         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10587       ++Offset;
10588     }
10589     if (CDT.isNull()) {
10590       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10591         if (ParamAttrs[I + Offset].Kind == Vector) {
10592           CDT = FD->getParamDecl(I)->getType();
10593           break;
10594         }
10595       }
10596     }
10597   }
10598   if (CDT.isNull())
10599     CDT = C.IntTy;
10600   CDT = CDT->getCanonicalTypeUnqualified();
10601   if (CDT->isRecordType() || CDT->isUnionType())
10602     CDT = C.IntTy;
10603   return C.getTypeSize(CDT);
10604 }
10605 
10606 static void
10607 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10608                            const llvm::APSInt &VLENVal,
10609                            ArrayRef<ParamAttrTy> ParamAttrs,
10610                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10611   struct ISADataTy {
10612     char ISA;
10613     unsigned VecRegSize;
10614   };
10615   ISADataTy ISAData[] = {
10616       {
10617           'b', 128
10618       }, // SSE
10619       {
10620           'c', 256
10621       }, // AVX
10622       {
10623           'd', 256
10624       }, // AVX2
10625       {
10626           'e', 512
10627       }, // AVX512
10628   };
10629   llvm::SmallVector<char, 2> Masked;
10630   switch (State) {
10631   case OMPDeclareSimdDeclAttr::BS_Undefined:
10632     Masked.push_back('N');
10633     Masked.push_back('M');
10634     break;
10635   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10636     Masked.push_back('N');
10637     break;
10638   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10639     Masked.push_back('M');
10640     break;
10641   }
10642   for (char Mask : Masked) {
10643     for (const ISADataTy &Data : ISAData) {
10644       SmallString<256> Buffer;
10645       llvm::raw_svector_ostream Out(Buffer);
10646       Out << "_ZGV" << Data.ISA << Mask;
10647       if (!VLENVal) {
10648         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10649         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10650         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10651       } else {
10652         Out << VLENVal;
10653       }
10654       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10655         switch (ParamAttr.Kind){
10656         case LinearWithVarStride:
10657           Out << 's' << ParamAttr.StrideOrArg;
10658           break;
10659         case Linear:
10660           Out << 'l';
10661           if (!!ParamAttr.StrideOrArg)
10662             Out << ParamAttr.StrideOrArg;
10663           break;
10664         case Uniform:
10665           Out << 'u';
10666           break;
10667         case Vector:
10668           Out << 'v';
10669           break;
10670         }
10671         if (!!ParamAttr.Alignment)
10672           Out << 'a' << ParamAttr.Alignment;
10673       }
10674       Out << '_' << Fn->getName();
10675       Fn->addFnAttr(Out.str());
10676     }
10677   }
10678 }
10679 
10680 // This are the Functions that are needed to mangle the name of the
10681 // vector functions generated by the compiler, according to the rules
10682 // defined in the "Vector Function ABI specifications for AArch64",
10683 // available at
10684 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10685 
10686 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10687 ///
10688 /// TODO: Need to implement the behavior for reference marked with a
10689 /// var or no linear modifiers (1.b in the section). For this, we
10690 /// need to extend ParamKindTy to support the linear modifiers.
10691 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10692   QT = QT.getCanonicalType();
10693 
10694   if (QT->isVoidType())
10695     return false;
10696 
10697   if (Kind == ParamKindTy::Uniform)
10698     return false;
10699 
10700   if (Kind == ParamKindTy::Linear)
10701     return false;
10702 
10703   // TODO: Handle linear references with modifiers
10704 
10705   if (Kind == ParamKindTy::LinearWithVarStride)
10706     return false;
10707 
10708   return true;
10709 }
10710 
10711 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10712 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10713   QT = QT.getCanonicalType();
10714   unsigned Size = C.getTypeSize(QT);
10715 
10716   // Only scalars and complex within 16 bytes wide set PVB to true.
10717   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10718     return false;
10719 
10720   if (QT->isFloatingType())
10721     return true;
10722 
10723   if (QT->isIntegerType())
10724     return true;
10725 
10726   if (QT->isPointerType())
10727     return true;
10728 
10729   // TODO: Add support for complex types (section 3.1.2, item 2).
10730 
10731   return false;
10732 }
10733 
10734 /// Computes the lane size (LS) of a return type or of an input parameter,
10735 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10736 /// TODO: Add support for references, section 3.2.1, item 1.
10737 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10738   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10739     QualType PTy = QT.getCanonicalType()->getPointeeType();
10740     if (getAArch64PBV(PTy, C))
10741       return C.getTypeSize(PTy);
10742   }
10743   if (getAArch64PBV(QT, C))
10744     return C.getTypeSize(QT);
10745 
10746   return C.getTypeSize(C.getUIntPtrType());
10747 }
10748 
10749 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10750 // signature of the scalar function, as defined in 3.2.2 of the
10751 // AAVFABI.
10752 static std::tuple<unsigned, unsigned, bool>
10753 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10754   QualType RetType = FD->getReturnType().getCanonicalType();
10755 
10756   ASTContext &C = FD->getASTContext();
10757 
10758   bool OutputBecomesInput = false;
10759 
10760   llvm::SmallVector<unsigned, 8> Sizes;
10761   if (!RetType->isVoidType()) {
10762     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10763     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10764       OutputBecomesInput = true;
10765   }
10766   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10767     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10768     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10769   }
10770 
10771   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10772   // The LS of a function parameter / return value can only be a power
10773   // of 2, starting from 8 bits, up to 128.
10774   assert(std::all_of(Sizes.begin(), Sizes.end(),
10775                      [](unsigned Size) {
10776                        return Size == 8 || Size == 16 || Size == 32 ||
10777                               Size == 64 || Size == 128;
10778                      }) &&
10779          "Invalid size");
10780 
10781   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10782                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10783                          OutputBecomesInput);
10784 }
10785 
10786 /// Mangle the parameter part of the vector function name according to
10787 /// their OpenMP classification. The mangling function is defined in
10788 /// section 3.5 of the AAVFABI.
10789 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10790   SmallString<256> Buffer;
10791   llvm::raw_svector_ostream Out(Buffer);
10792   for (const auto &ParamAttr : ParamAttrs) {
10793     switch (ParamAttr.Kind) {
10794     case LinearWithVarStride:
10795       Out << "ls" << ParamAttr.StrideOrArg;
10796       break;
10797     case Linear:
10798       Out << 'l';
10799       // Don't print the step value if it is not present or if it is
10800       // equal to 1.
10801       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10802         Out << ParamAttr.StrideOrArg;
10803       break;
10804     case Uniform:
10805       Out << 'u';
10806       break;
10807     case Vector:
10808       Out << 'v';
10809       break;
10810     }
10811 
10812     if (!!ParamAttr.Alignment)
10813       Out << 'a' << ParamAttr.Alignment;
10814   }
10815 
10816   return std::string(Out.str());
10817 }
10818 
10819 // Function used to add the attribute. The parameter `VLEN` is
10820 // templated to allow the use of "x" when targeting scalable functions
10821 // for SVE.
10822 template <typename T>
10823 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10824                                  char ISA, StringRef ParSeq,
10825                                  StringRef MangledName, bool OutputBecomesInput,
10826                                  llvm::Function *Fn) {
10827   SmallString<256> Buffer;
10828   llvm::raw_svector_ostream Out(Buffer);
10829   Out << Prefix << ISA << LMask << VLEN;
10830   if (OutputBecomesInput)
10831     Out << "v";
10832   Out << ParSeq << "_" << MangledName;
10833   Fn->addFnAttr(Out.str());
10834 }
10835 
10836 // Helper function to generate the Advanced SIMD names depending on
10837 // the value of the NDS when simdlen is not present.
10838 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10839                                       StringRef Prefix, char ISA,
10840                                       StringRef ParSeq, StringRef MangledName,
10841                                       bool OutputBecomesInput,
10842                                       llvm::Function *Fn) {
10843   switch (NDS) {
10844   case 8:
10845     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10846                          OutputBecomesInput, Fn);
10847     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10848                          OutputBecomesInput, Fn);
10849     break;
10850   case 16:
10851     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10852                          OutputBecomesInput, Fn);
10853     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10854                          OutputBecomesInput, Fn);
10855     break;
10856   case 32:
10857     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10858                          OutputBecomesInput, Fn);
10859     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10860                          OutputBecomesInput, Fn);
10861     break;
10862   case 64:
10863   case 128:
10864     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10865                          OutputBecomesInput, Fn);
10866     break;
10867   default:
10868     llvm_unreachable("Scalar type is too wide.");
10869   }
10870 }
10871 
10872 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10873 static void emitAArch64DeclareSimdFunction(
10874     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10875     ArrayRef<ParamAttrTy> ParamAttrs,
10876     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10877     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10878 
10879   // Get basic data for building the vector signature.
10880   const auto Data = getNDSWDS(FD, ParamAttrs);
10881   const unsigned NDS = std::get<0>(Data);
10882   const unsigned WDS = std::get<1>(Data);
10883   const bool OutputBecomesInput = std::get<2>(Data);
10884 
10885   // Check the values provided via `simdlen` by the user.
10886   // 1. A `simdlen(1)` doesn't produce vector signatures,
10887   if (UserVLEN == 1) {
10888     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10889         DiagnosticsEngine::Warning,
10890         "The clause simdlen(1) has no effect when targeting aarch64.");
10891     CGM.getDiags().Report(SLoc, DiagID);
10892     return;
10893   }
10894 
10895   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10896   // Advanced SIMD output.
10897   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10898     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10899         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10900                                     "power of 2 when targeting Advanced SIMD.");
10901     CGM.getDiags().Report(SLoc, DiagID);
10902     return;
10903   }
10904 
10905   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10906   // limits.
10907   if (ISA == 's' && UserVLEN != 0) {
10908     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10909       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10910           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10911                                       "lanes in the architectural constraints "
10912                                       "for SVE (min is 128-bit, max is "
10913                                       "2048-bit, by steps of 128-bit)");
10914       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10915       return;
10916     }
10917   }
10918 
10919   // Sort out parameter sequence.
10920   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10921   StringRef Prefix = "_ZGV";
10922   // Generate simdlen from user input (if any).
10923   if (UserVLEN) {
10924     if (ISA == 's') {
10925       // SVE generates only a masked function.
10926       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10927                            OutputBecomesInput, Fn);
10928     } else {
10929       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10930       // Advanced SIMD generates one or two functions, depending on
10931       // the `[not]inbranch` clause.
10932       switch (State) {
10933       case OMPDeclareSimdDeclAttr::BS_Undefined:
10934         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10935                              OutputBecomesInput, Fn);
10936         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10937                              OutputBecomesInput, Fn);
10938         break;
10939       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10940         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10941                              OutputBecomesInput, Fn);
10942         break;
10943       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10944         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10945                              OutputBecomesInput, Fn);
10946         break;
10947       }
10948     }
10949   } else {
10950     // If no user simdlen is provided, follow the AAVFABI rules for
10951     // generating the vector length.
10952     if (ISA == 's') {
10953       // SVE, section 3.4.1, item 1.
10954       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10955                            OutputBecomesInput, Fn);
10956     } else {
10957       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10958       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10959       // two vector names depending on the use of the clause
10960       // `[not]inbranch`.
10961       switch (State) {
10962       case OMPDeclareSimdDeclAttr::BS_Undefined:
10963         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10964                                   OutputBecomesInput, Fn);
10965         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10966                                   OutputBecomesInput, Fn);
10967         break;
10968       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10969         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10970                                   OutputBecomesInput, Fn);
10971         break;
10972       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10973         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10974                                   OutputBecomesInput, Fn);
10975         break;
10976       }
10977     }
10978   }
10979 }
10980 
10981 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10982                                               llvm::Function *Fn) {
10983   ASTContext &C = CGM.getContext();
10984   FD = FD->getMostRecentDecl();
10985   // Map params to their positions in function decl.
10986   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10987   if (isa<CXXMethodDecl>(FD))
10988     ParamPositions.try_emplace(FD, 0);
10989   unsigned ParamPos = ParamPositions.size();
10990   for (const ParmVarDecl *P : FD->parameters()) {
10991     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10992     ++ParamPos;
10993   }
10994   while (FD) {
10995     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10996       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10997       // Mark uniform parameters.
10998       for (const Expr *E : Attr->uniforms()) {
10999         E = E->IgnoreParenImpCasts();
11000         unsigned Pos;
11001         if (isa<CXXThisExpr>(E)) {
11002           Pos = ParamPositions[FD];
11003         } else {
11004           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11005                                 ->getCanonicalDecl();
11006           Pos = ParamPositions[PVD];
11007         }
11008         ParamAttrs[Pos].Kind = Uniform;
11009       }
11010       // Get alignment info.
11011       auto NI = Attr->alignments_begin();
11012       for (const Expr *E : Attr->aligneds()) {
11013         E = E->IgnoreParenImpCasts();
11014         unsigned Pos;
11015         QualType ParmTy;
11016         if (isa<CXXThisExpr>(E)) {
11017           Pos = ParamPositions[FD];
11018           ParmTy = E->getType();
11019         } else {
11020           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11021                                 ->getCanonicalDecl();
11022           Pos = ParamPositions[PVD];
11023           ParmTy = PVD->getType();
11024         }
11025         ParamAttrs[Pos].Alignment =
11026             (*NI)
11027                 ? (*NI)->EvaluateKnownConstInt(C)
11028                 : llvm::APSInt::getUnsigned(
11029                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11030                           .getQuantity());
11031         ++NI;
11032       }
11033       // Mark linear parameters.
11034       auto SI = Attr->steps_begin();
11035       auto MI = Attr->modifiers_begin();
11036       for (const Expr *E : Attr->linears()) {
11037         E = E->IgnoreParenImpCasts();
11038         unsigned Pos;
11039         if (isa<CXXThisExpr>(E)) {
11040           Pos = ParamPositions[FD];
11041         } else {
11042           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11043                                 ->getCanonicalDecl();
11044           Pos = ParamPositions[PVD];
11045         }
11046         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11047         ParamAttr.Kind = Linear;
11048         if (*SI) {
11049           Expr::EvalResult Result;
11050           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11051             if (const auto *DRE =
11052                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11053               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11054                 ParamAttr.Kind = LinearWithVarStride;
11055                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11056                     ParamPositions[StridePVD->getCanonicalDecl()]);
11057               }
11058             }
11059           } else {
11060             ParamAttr.StrideOrArg = Result.Val.getInt();
11061           }
11062         }
11063         ++SI;
11064         ++MI;
11065       }
11066       llvm::APSInt VLENVal;
11067       SourceLocation ExprLoc;
11068       const Expr *VLENExpr = Attr->getSimdlen();
11069       if (VLENExpr) {
11070         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11071         ExprLoc = VLENExpr->getExprLoc();
11072       }
11073       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11074       if (CGM.getTriple().isX86()) {
11075         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11076       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11077         unsigned VLEN = VLENVal.getExtValue();
11078         StringRef MangledName = Fn->getName();
11079         if (CGM.getTarget().hasFeature("sve"))
11080           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11081                                          MangledName, 's', 128, Fn, ExprLoc);
11082         if (CGM.getTarget().hasFeature("neon"))
11083           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11084                                          MangledName, 'n', 128, Fn, ExprLoc);
11085       }
11086     }
11087     FD = FD->getPreviousDecl();
11088   }
11089 }
11090 
11091 namespace {
11092 /// Cleanup action for doacross support.
11093 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11094 public:
11095   static const int DoacrossFinArgs = 2;
11096 
11097 private:
11098   llvm::FunctionCallee RTLFn;
11099   llvm::Value *Args[DoacrossFinArgs];
11100 
11101 public:
11102   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11103                     ArrayRef<llvm::Value *> CallArgs)
11104       : RTLFn(RTLFn) {
11105     assert(CallArgs.size() == DoacrossFinArgs);
11106     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11107   }
11108   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11109     if (!CGF.HaveInsertPoint())
11110       return;
11111     CGF.EmitRuntimeCall(RTLFn, Args);
11112   }
11113 };
11114 } // namespace
11115 
11116 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11117                                        const OMPLoopDirective &D,
11118                                        ArrayRef<Expr *> NumIterations) {
11119   if (!CGF.HaveInsertPoint())
11120     return;
11121 
11122   ASTContext &C = CGM.getContext();
11123   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11124   RecordDecl *RD;
11125   if (KmpDimTy.isNull()) {
11126     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11127     //  kmp_int64 lo; // lower
11128     //  kmp_int64 up; // upper
11129     //  kmp_int64 st; // stride
11130     // };
11131     RD = C.buildImplicitRecord("kmp_dim");
11132     RD->startDefinition();
11133     addFieldToRecordDecl(C, RD, Int64Ty);
11134     addFieldToRecordDecl(C, RD, Int64Ty);
11135     addFieldToRecordDecl(C, RD, Int64Ty);
11136     RD->completeDefinition();
11137     KmpDimTy = C.getRecordType(RD);
11138   } else {
11139     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11140   }
11141   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11142   QualType ArrayTy =
11143       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11144 
11145   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11146   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11147   enum { LowerFD = 0, UpperFD, StrideFD };
11148   // Fill dims with data.
11149   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11150     LValue DimsLVal = CGF.MakeAddrLValue(
11151         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11152     // dims.upper = num_iterations;
11153     LValue UpperLVal = CGF.EmitLValueForField(
11154         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11155     llvm::Value *NumIterVal =
11156         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
11157                                  D.getNumIterations()->getType(), Int64Ty,
11158                                  D.getNumIterations()->getExprLoc());
11159     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11160     // dims.stride = 1;
11161     LValue StrideLVal = CGF.EmitLValueForField(
11162         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11163     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11164                           StrideLVal);
11165   }
11166 
11167   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11168   // kmp_int32 num_dims, struct kmp_dim * dims);
11169   llvm::Value *Args[] = {
11170       emitUpdateLocation(CGF, D.getBeginLoc()),
11171       getThreadID(CGF, D.getBeginLoc()),
11172       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11173       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11174           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11175           CGM.VoidPtrTy)};
11176 
11177   llvm::FunctionCallee RTLFn =
11178       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
11179   CGF.EmitRuntimeCall(RTLFn, Args);
11180   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11181       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11182   llvm::FunctionCallee FiniRTLFn =
11183       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
11184   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11185                                              llvm::makeArrayRef(FiniArgs));
11186 }
11187 
11188 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11189                                           const OMPDependClause *C) {
11190   QualType Int64Ty =
11191       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11192   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11193   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11194       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11195   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11196   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11197     const Expr *CounterVal = C->getLoopData(I);
11198     assert(CounterVal);
11199     llvm::Value *CntVal = CGF.EmitScalarConversion(
11200         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11201         CounterVal->getExprLoc());
11202     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11203                           /*Volatile=*/false, Int64Ty);
11204   }
11205   llvm::Value *Args[] = {
11206       emitUpdateLocation(CGF, C->getBeginLoc()),
11207       getThreadID(CGF, C->getBeginLoc()),
11208       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11209   llvm::FunctionCallee RTLFn;
11210   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11211     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
11212   } else {
11213     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11214     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
11215   }
11216   CGF.EmitRuntimeCall(RTLFn, Args);
11217 }
11218 
11219 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11220                                llvm::FunctionCallee Callee,
11221                                ArrayRef<llvm::Value *> Args) const {
11222   assert(Loc.isValid() && "Outlined function call location must be valid.");
11223   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11224 
11225   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11226     if (Fn->doesNotThrow()) {
11227       CGF.EmitNounwindRuntimeCall(Fn, Args);
11228       return;
11229     }
11230   }
11231   CGF.EmitRuntimeCall(Callee, Args);
11232 }
11233 
11234 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11235     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11236     ArrayRef<llvm::Value *> Args) const {
11237   emitCall(CGF, Loc, OutlinedFn, Args);
11238 }
11239 
11240 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11241   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11242     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11243       HasEmittedDeclareTargetRegion = true;
11244 }
11245 
11246 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11247                                              const VarDecl *NativeParam,
11248                                              const VarDecl *TargetParam) const {
11249   return CGF.GetAddrOfLocalVar(NativeParam);
11250 }
11251 
11252 namespace {
11253 /// Cleanup action for allocate support.
11254 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11255 public:
11256   static const int CleanupArgs = 3;
11257 
11258 private:
11259   llvm::FunctionCallee RTLFn;
11260   llvm::Value *Args[CleanupArgs];
11261 
11262 public:
11263   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11264                        ArrayRef<llvm::Value *> CallArgs)
11265       : RTLFn(RTLFn) {
11266     assert(CallArgs.size() == CleanupArgs &&
11267            "Size of arguments does not match.");
11268     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11269   }
11270   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11271     if (!CGF.HaveInsertPoint())
11272       return;
11273     CGF.EmitRuntimeCall(RTLFn, Args);
11274   }
11275 };
11276 } // namespace
11277 
11278 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11279                                                    const VarDecl *VD) {
11280   if (!VD)
11281     return Address::invalid();
11282   const VarDecl *CVD = VD->getCanonicalDecl();
11283   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11284     return Address::invalid();
11285   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11286   // Use the default allocation.
11287   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
11288       !AA->getAllocator())
11289     return Address::invalid();
11290   llvm::Value *Size;
11291   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11292   if (CVD->getType()->isVariablyModifiedType()) {
11293     Size = CGF.getTypeSize(CVD->getType());
11294     // Align the size: ((size + align - 1) / align) * align
11295     Size = CGF.Builder.CreateNUWAdd(
11296         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11297     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11298     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11299   } else {
11300     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11301     Size = CGM.getSize(Sz.alignTo(Align));
11302   }
11303   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11304   assert(AA->getAllocator() &&
11305          "Expected allocator expression for non-default allocator.");
11306   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11307   // According to the standard, the original allocator type is a enum (integer).
11308   // Convert to pointer type, if required.
11309   if (Allocator->getType()->isIntegerTy())
11310     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11311   else if (Allocator->getType()->isPointerTy())
11312     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11313                                                                 CGM.VoidPtrTy);
11314   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11315 
11316   llvm::Value *Addr =
11317       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11318                           getName({CVD->getName(), ".void.addr"}));
11319   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11320                                                               Allocator};
11321   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11322 
11323   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11324                                                 llvm::makeArrayRef(FiniArgs));
11325   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11326       Addr,
11327       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11328       getName({CVD->getName(), ".addr"}));
11329   return Address(Addr, Align);
11330 }
11331 
11332 /// Finds the variant function that matches current context with its context
11333 /// selector.
11334 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11335                                                      const FunctionDecl *FD) {
11336   if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11337     return FD;
11338 
11339   SmallVector<Expr *, 8> VariantExprs;
11340   SmallVector<VariantMatchInfo, 8> VMIs;
11341   for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11342     const OMPTraitInfo &TI = A->getTraitInfos();
11343     VMIs.push_back(VariantMatchInfo());
11344     TI.getAsVariantMatchInfo(CGM.getContext(), VMIs.back());
11345     VariantExprs.push_back(A->getVariantFuncRef());
11346   }
11347 
11348   OMPContext Ctx(CGM.getLangOpts().OpenMPIsDevice, CGM.getTriple());
11349   // FIXME: Keep the context in the OMPIRBuilder so we can add constructs as we
11350   //        build them.
11351 
11352   int BestMatchIdx = getBestVariantMatchForContext(VMIs, Ctx);
11353   if (BestMatchIdx < 0)
11354     return FD;
11355 
11356   return cast<FunctionDecl>(
11357       cast<DeclRefExpr>(VariantExprs[BestMatchIdx]->IgnoreParenImpCasts())
11358           ->getDecl());
11359 }
11360 
11361 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11362   const auto *D = cast<FunctionDecl>(GD.getDecl());
11363   // If the original function is defined already, use its definition.
11364   StringRef MangledName = CGM.getMangledName(GD);
11365   llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11366   if (Orig && !Orig->isDeclaration())
11367     return false;
11368   const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11369   // Emit original function if it does not have declare variant attribute or the
11370   // context does not match.
11371   if (NewFD == D)
11372     return false;
11373   GlobalDecl NewGD = GD.getWithDecl(NewFD);
11374   if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11375     DeferredVariantFunction.erase(D);
11376     return true;
11377   }
11378   DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11379   return true;
11380 }
11381 
11382 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11383     CodeGenModule &CGM, const OMPLoopDirective &S)
11384     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11385   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11386   if (!NeedToPush)
11387     return;
11388   NontemporalDeclsSet &DS =
11389       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11390   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11391     for (const Stmt *Ref : C->private_refs()) {
11392       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11393       const ValueDecl *VD;
11394       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11395         VD = DRE->getDecl();
11396       } else {
11397         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11398         assert((ME->isImplicitCXXThis() ||
11399                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11400                "Expected member of current class.");
11401         VD = ME->getMemberDecl();
11402       }
11403       DS.insert(VD);
11404     }
11405   }
11406 }
11407 
11408 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11409   if (!NeedToPush)
11410     return;
11411   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11412 }
11413 
11414 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11415   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11416 
11417   return llvm::any_of(
11418       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11419       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11420 }
11421 
11422 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11423     const OMPExecutableDirective &S,
11424     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11425     const {
11426   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11427   // Vars in target/task regions must be excluded completely.
11428   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11429       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11430     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11431     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11432     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11433     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11434       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11435         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11436     }
11437   }
11438   // Exclude vars in private clauses.
11439   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11440     for (const Expr *Ref : C->varlists()) {
11441       if (!Ref->getType()->isScalarType())
11442         continue;
11443       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11444       if (!DRE)
11445         continue;
11446       NeedToCheckForLPCs.insert(DRE->getDecl());
11447     }
11448   }
11449   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11450     for (const Expr *Ref : C->varlists()) {
11451       if (!Ref->getType()->isScalarType())
11452         continue;
11453       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11454       if (!DRE)
11455         continue;
11456       NeedToCheckForLPCs.insert(DRE->getDecl());
11457     }
11458   }
11459   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11460     for (const Expr *Ref : C->varlists()) {
11461       if (!Ref->getType()->isScalarType())
11462         continue;
11463       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11464       if (!DRE)
11465         continue;
11466       NeedToCheckForLPCs.insert(DRE->getDecl());
11467     }
11468   }
11469   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11470     for (const Expr *Ref : C->varlists()) {
11471       if (!Ref->getType()->isScalarType())
11472         continue;
11473       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11474       if (!DRE)
11475         continue;
11476       NeedToCheckForLPCs.insert(DRE->getDecl());
11477     }
11478   }
11479   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11480     for (const Expr *Ref : C->varlists()) {
11481       if (!Ref->getType()->isScalarType())
11482         continue;
11483       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11484       if (!DRE)
11485         continue;
11486       NeedToCheckForLPCs.insert(DRE->getDecl());
11487     }
11488   }
11489   for (const Decl *VD : NeedToCheckForLPCs) {
11490     for (const LastprivateConditionalData &Data :
11491          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11492       if (Data.DeclToUniqueName.count(VD) > 0) {
11493         if (!Data.Disabled)
11494           NeedToAddForLPCsAsDisabled.insert(VD);
11495         break;
11496       }
11497     }
11498   }
11499 }
11500 
11501 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11502     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11503     : CGM(CGF.CGM),
11504       Action((CGM.getLangOpts().OpenMP >= 50 &&
11505               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11506                            [](const OMPLastprivateClause *C) {
11507                              return C->getKind() ==
11508                                     OMPC_LASTPRIVATE_conditional;
11509                            }))
11510                  ? ActionToDo::PushAsLastprivateConditional
11511                  : ActionToDo::DoNotPush) {
11512   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11513   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11514     return;
11515   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11516          "Expected a push action.");
11517   LastprivateConditionalData &Data =
11518       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11519   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11520     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11521       continue;
11522 
11523     for (const Expr *Ref : C->varlists()) {
11524       Data.DeclToUniqueName.insert(std::make_pair(
11525           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11526           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11527     }
11528   }
11529   Data.IVLVal = IVLVal;
11530   Data.Fn = CGF.CurFn;
11531 }
11532 
11533 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11534     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11535     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11536   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11537   if (CGM.getLangOpts().OpenMP < 50)
11538     return;
11539   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11540   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11541   if (!NeedToAddForLPCsAsDisabled.empty()) {
11542     Action = ActionToDo::DisableLastprivateConditional;
11543     LastprivateConditionalData &Data =
11544         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11545     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11546       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11547     Data.Fn = CGF.CurFn;
11548     Data.Disabled = true;
11549   }
11550 }
11551 
11552 CGOpenMPRuntime::LastprivateConditionalRAII
11553 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11554     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11555   return LastprivateConditionalRAII(CGF, S);
11556 }
11557 
11558 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11559   if (CGM.getLangOpts().OpenMP < 50)
11560     return;
11561   if (Action == ActionToDo::DisableLastprivateConditional) {
11562     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11563            "Expected list of disabled private vars.");
11564     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11565   }
11566   if (Action == ActionToDo::PushAsLastprivateConditional) {
11567     assert(
11568         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11569         "Expected list of lastprivate conditional vars.");
11570     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11571   }
11572 }
11573 
11574 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11575                                                         const VarDecl *VD) {
11576   ASTContext &C = CGM.getContext();
11577   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11578   if (I == LastprivateConditionalToTypes.end())
11579     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11580   QualType NewType;
11581   const FieldDecl *VDField;
11582   const FieldDecl *FiredField;
11583   LValue BaseLVal;
11584   auto VI = I->getSecond().find(VD);
11585   if (VI == I->getSecond().end()) {
11586     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11587     RD->startDefinition();
11588     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11589     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11590     RD->completeDefinition();
11591     NewType = C.getRecordType(RD);
11592     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11593     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11594     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11595   } else {
11596     NewType = std::get<0>(VI->getSecond());
11597     VDField = std::get<1>(VI->getSecond());
11598     FiredField = std::get<2>(VI->getSecond());
11599     BaseLVal = std::get<3>(VI->getSecond());
11600   }
11601   LValue FiredLVal =
11602       CGF.EmitLValueForField(BaseLVal, FiredField);
11603   CGF.EmitStoreOfScalar(
11604       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11605       FiredLVal);
11606   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11607 }
11608 
11609 namespace {
11610 /// Checks if the lastprivate conditional variable is referenced in LHS.
11611 class LastprivateConditionalRefChecker final
11612     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11613   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11614   const Expr *FoundE = nullptr;
11615   const Decl *FoundD = nullptr;
11616   StringRef UniqueDeclName;
11617   LValue IVLVal;
11618   llvm::Function *FoundFn = nullptr;
11619   SourceLocation Loc;
11620 
11621 public:
11622   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11623     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11624          llvm::reverse(LPM)) {
11625       auto It = D.DeclToUniqueName.find(E->getDecl());
11626       if (It == D.DeclToUniqueName.end())
11627         continue;
11628       if (D.Disabled)
11629         return false;
11630       FoundE = E;
11631       FoundD = E->getDecl()->getCanonicalDecl();
11632       UniqueDeclName = It->second;
11633       IVLVal = D.IVLVal;
11634       FoundFn = D.Fn;
11635       break;
11636     }
11637     return FoundE == E;
11638   }
11639   bool VisitMemberExpr(const MemberExpr *E) {
11640     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11641       return false;
11642     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11643          llvm::reverse(LPM)) {
11644       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11645       if (It == D.DeclToUniqueName.end())
11646         continue;
11647       if (D.Disabled)
11648         return false;
11649       FoundE = E;
11650       FoundD = E->getMemberDecl()->getCanonicalDecl();
11651       UniqueDeclName = It->second;
11652       IVLVal = D.IVLVal;
11653       FoundFn = D.Fn;
11654       break;
11655     }
11656     return FoundE == E;
11657   }
11658   bool VisitStmt(const Stmt *S) {
11659     for (const Stmt *Child : S->children()) {
11660       if (!Child)
11661         continue;
11662       if (const auto *E = dyn_cast<Expr>(Child))
11663         if (!E->isGLValue())
11664           continue;
11665       if (Visit(Child))
11666         return true;
11667     }
11668     return false;
11669   }
11670   explicit LastprivateConditionalRefChecker(
11671       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11672       : LPM(LPM) {}
11673   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11674   getFoundData() const {
11675     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11676   }
11677 };
11678 } // namespace
11679 
11680 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11681                                                        LValue IVLVal,
11682                                                        StringRef UniqueDeclName,
11683                                                        LValue LVal,
11684                                                        SourceLocation Loc) {
11685   // Last updated loop counter for the lastprivate conditional var.
11686   // int<xx> last_iv = 0;
11687   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11688   llvm::Constant *LastIV =
11689       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11690   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11691       IVLVal.getAlignment().getAsAlign());
11692   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11693 
11694   // Last value of the lastprivate conditional.
11695   // decltype(priv_a) last_a;
11696   llvm::Constant *Last = getOrCreateInternalVariable(
11697       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11698   cast<llvm::GlobalVariable>(Last)->setAlignment(
11699       LVal.getAlignment().getAsAlign());
11700   LValue LastLVal =
11701       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11702 
11703   // Global loop counter. Required to handle inner parallel-for regions.
11704   // iv
11705   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11706 
11707   // #pragma omp critical(a)
11708   // if (last_iv <= iv) {
11709   //   last_iv = iv;
11710   //   last_a = priv_a;
11711   // }
11712   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11713                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11714     Action.Enter(CGF);
11715     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11716     // (last_iv <= iv) ? Check if the variable is updated and store new
11717     // value in global var.
11718     llvm::Value *CmpRes;
11719     if (IVLVal.getType()->isSignedIntegerType()) {
11720       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11721     } else {
11722       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11723              "Loop iteration variable must be integer.");
11724       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11725     }
11726     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11727     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11728     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11729     // {
11730     CGF.EmitBlock(ThenBB);
11731 
11732     //   last_iv = iv;
11733     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11734 
11735     //   last_a = priv_a;
11736     switch (CGF.getEvaluationKind(LVal.getType())) {
11737     case TEK_Scalar: {
11738       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11739       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11740       break;
11741     }
11742     case TEK_Complex: {
11743       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11744       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11745       break;
11746     }
11747     case TEK_Aggregate:
11748       llvm_unreachable(
11749           "Aggregates are not supported in lastprivate conditional.");
11750     }
11751     // }
11752     CGF.EmitBranch(ExitBB);
11753     // There is no need to emit line number for unconditional branch.
11754     (void)ApplyDebugLocation::CreateEmpty(CGF);
11755     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11756   };
11757 
11758   if (CGM.getLangOpts().OpenMPSimd) {
11759     // Do not emit as a critical region as no parallel region could be emitted.
11760     RegionCodeGenTy ThenRCG(CodeGen);
11761     ThenRCG(CGF);
11762   } else {
11763     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11764   }
11765 }
11766 
11767 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11768                                                          const Expr *LHS) {
11769   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11770     return;
11771   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11772   if (!Checker.Visit(LHS))
11773     return;
11774   const Expr *FoundE;
11775   const Decl *FoundD;
11776   StringRef UniqueDeclName;
11777   LValue IVLVal;
11778   llvm::Function *FoundFn;
11779   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11780       Checker.getFoundData();
11781   if (FoundFn != CGF.CurFn) {
11782     // Special codegen for inner parallel regions.
11783     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11784     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11785     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11786            "Lastprivate conditional is not found in outer region.");
11787     QualType StructTy = std::get<0>(It->getSecond());
11788     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11789     LValue PrivLVal = CGF.EmitLValue(FoundE);
11790     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11791         PrivLVal.getAddress(CGF),
11792         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11793     LValue BaseLVal =
11794         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11795     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11796     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11797                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11798                         FiredLVal, llvm::AtomicOrdering::Unordered,
11799                         /*IsVolatile=*/true, /*isInit=*/false);
11800     return;
11801   }
11802 
11803   // Private address of the lastprivate conditional in the current context.
11804   // priv_a
11805   LValue LVal = CGF.EmitLValue(FoundE);
11806   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11807                                    FoundE->getExprLoc());
11808 }
11809 
11810 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11811     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11812     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11813   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11814     return;
11815   auto Range = llvm::reverse(LastprivateConditionalStack);
11816   auto It = llvm::find_if(
11817       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11818   if (It == Range.end() || It->Fn != CGF.CurFn)
11819     return;
11820   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11821   assert(LPCI != LastprivateConditionalToTypes.end() &&
11822          "Lastprivates must be registered already.");
11823   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11824   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11825   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11826   for (const auto &Pair : It->DeclToUniqueName) {
11827     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11828     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11829       continue;
11830     auto I = LPCI->getSecond().find(Pair.first);
11831     assert(I != LPCI->getSecond().end() &&
11832            "Lastprivate must be rehistered already.");
11833     // bool Cmp = priv_a.Fired != 0;
11834     LValue BaseLVal = std::get<3>(I->getSecond());
11835     LValue FiredLVal =
11836         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11837     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11838     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11839     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11840     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11841     // if (Cmp) {
11842     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11843     CGF.EmitBlock(ThenBB);
11844     Address Addr = CGF.GetAddrOfLocalVar(VD);
11845     LValue LVal;
11846     if (VD->getType()->isReferenceType())
11847       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11848                                            AlignmentSource::Decl);
11849     else
11850       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11851                                 AlignmentSource::Decl);
11852     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11853                                      D.getBeginLoc());
11854     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11855     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11856     // }
11857   }
11858 }
11859 
11860 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11861     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11862     SourceLocation Loc) {
11863   if (CGF.getLangOpts().OpenMP < 50)
11864     return;
11865   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11866   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11867          "Unknown lastprivate conditional variable.");
11868   StringRef UniqueName = It->second;
11869   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11870   // The variable was not updated in the region - exit.
11871   if (!GV)
11872     return;
11873   LValue LPLVal = CGF.MakeAddrLValue(
11874       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11875   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11876   CGF.EmitStoreOfScalar(Res, PrivLVal);
11877 }
11878 
11879 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11880     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11881     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11882   llvm_unreachable("Not supported in SIMD-only mode");
11883 }
11884 
11885 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11886     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11887     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11888   llvm_unreachable("Not supported in SIMD-only mode");
11889 }
11890 
11891 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11892     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11893     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11894     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11895     bool Tied, unsigned &NumberOfParts) {
11896   llvm_unreachable("Not supported in SIMD-only mode");
11897 }
11898 
11899 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11900                                            SourceLocation Loc,
11901                                            llvm::Function *OutlinedFn,
11902                                            ArrayRef<llvm::Value *> CapturedVars,
11903                                            const Expr *IfCond) {
11904   llvm_unreachable("Not supported in SIMD-only mode");
11905 }
11906 
11907 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11908     CodeGenFunction &CGF, StringRef CriticalName,
11909     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11910     const Expr *Hint) {
11911   llvm_unreachable("Not supported in SIMD-only mode");
11912 }
11913 
11914 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11915                                            const RegionCodeGenTy &MasterOpGen,
11916                                            SourceLocation Loc) {
11917   llvm_unreachable("Not supported in SIMD-only mode");
11918 }
11919 
11920 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11921                                             SourceLocation Loc) {
11922   llvm_unreachable("Not supported in SIMD-only mode");
11923 }
11924 
11925 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11926     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11927     SourceLocation Loc) {
11928   llvm_unreachable("Not supported in SIMD-only mode");
11929 }
11930 
11931 void CGOpenMPSIMDRuntime::emitSingleRegion(
11932     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11933     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11934     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11935     ArrayRef<const Expr *> AssignmentOps) {
11936   llvm_unreachable("Not supported in SIMD-only mode");
11937 }
11938 
11939 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11940                                             const RegionCodeGenTy &OrderedOpGen,
11941                                             SourceLocation Loc,
11942                                             bool IsThreads) {
11943   llvm_unreachable("Not supported in SIMD-only mode");
11944 }
11945 
11946 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11947                                           SourceLocation Loc,
11948                                           OpenMPDirectiveKind Kind,
11949                                           bool EmitChecks,
11950                                           bool ForceSimpleCall) {
11951   llvm_unreachable("Not supported in SIMD-only mode");
11952 }
11953 
11954 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11955     CodeGenFunction &CGF, SourceLocation Loc,
11956     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11957     bool Ordered, const DispatchRTInput &DispatchValues) {
11958   llvm_unreachable("Not supported in SIMD-only mode");
11959 }
11960 
11961 void CGOpenMPSIMDRuntime::emitForStaticInit(
11962     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11963     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11964   llvm_unreachable("Not supported in SIMD-only mode");
11965 }
11966 
11967 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11968     CodeGenFunction &CGF, SourceLocation Loc,
11969     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11970   llvm_unreachable("Not supported in SIMD-only mode");
11971 }
11972 
11973 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11974                                                      SourceLocation Loc,
11975                                                      unsigned IVSize,
11976                                                      bool IVSigned) {
11977   llvm_unreachable("Not supported in SIMD-only mode");
11978 }
11979 
11980 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11981                                               SourceLocation Loc,
11982                                               OpenMPDirectiveKind DKind) {
11983   llvm_unreachable("Not supported in SIMD-only mode");
11984 }
11985 
11986 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11987                                               SourceLocation Loc,
11988                                               unsigned IVSize, bool IVSigned,
11989                                               Address IL, Address LB,
11990                                               Address UB, Address ST) {
11991   llvm_unreachable("Not supported in SIMD-only mode");
11992 }
11993 
11994 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11995                                                llvm::Value *NumThreads,
11996                                                SourceLocation Loc) {
11997   llvm_unreachable("Not supported in SIMD-only mode");
11998 }
11999 
12000 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12001                                              ProcBindKind ProcBind,
12002                                              SourceLocation Loc) {
12003   llvm_unreachable("Not supported in SIMD-only mode");
12004 }
12005 
12006 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12007                                                     const VarDecl *VD,
12008                                                     Address VDAddr,
12009                                                     SourceLocation Loc) {
12010   llvm_unreachable("Not supported in SIMD-only mode");
12011 }
12012 
12013 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12014     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12015     CodeGenFunction *CGF) {
12016   llvm_unreachable("Not supported in SIMD-only mode");
12017 }
12018 
12019 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12020     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12021   llvm_unreachable("Not supported in SIMD-only mode");
12022 }
12023 
12024 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12025                                     ArrayRef<const Expr *> Vars,
12026                                     SourceLocation Loc,
12027                                     llvm::AtomicOrdering AO) {
12028   llvm_unreachable("Not supported in SIMD-only mode");
12029 }
12030 
12031 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12032                                        const OMPExecutableDirective &D,
12033                                        llvm::Function *TaskFunction,
12034                                        QualType SharedsTy, Address Shareds,
12035                                        const Expr *IfCond,
12036                                        const OMPTaskDataTy &Data) {
12037   llvm_unreachable("Not supported in SIMD-only mode");
12038 }
12039 
12040 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12041     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12042     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12043     const Expr *IfCond, const OMPTaskDataTy &Data) {
12044   llvm_unreachable("Not supported in SIMD-only mode");
12045 }
12046 
12047 void CGOpenMPSIMDRuntime::emitReduction(
12048     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12049     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12050     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12051   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12052   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12053                                  ReductionOps, Options);
12054 }
12055 
12056 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12057     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12058     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12059   llvm_unreachable("Not supported in SIMD-only mode");
12060 }
12061 
12062 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12063                                                   SourceLocation Loc,
12064                                                   ReductionCodeGen &RCG,
12065                                                   unsigned N) {
12066   llvm_unreachable("Not supported in SIMD-only mode");
12067 }
12068 
12069 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12070                                                   SourceLocation Loc,
12071                                                   llvm::Value *ReductionsPtr,
12072                                                   LValue SharedLVal) {
12073   llvm_unreachable("Not supported in SIMD-only mode");
12074 }
12075 
12076 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12077                                            SourceLocation Loc) {
12078   llvm_unreachable("Not supported in SIMD-only mode");
12079 }
12080 
12081 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12082     CodeGenFunction &CGF, SourceLocation Loc,
12083     OpenMPDirectiveKind CancelRegion) {
12084   llvm_unreachable("Not supported in SIMD-only mode");
12085 }
12086 
12087 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12088                                          SourceLocation Loc, const Expr *IfCond,
12089                                          OpenMPDirectiveKind CancelRegion) {
12090   llvm_unreachable("Not supported in SIMD-only mode");
12091 }
12092 
12093 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12094     const OMPExecutableDirective &D, StringRef ParentName,
12095     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12096     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12097   llvm_unreachable("Not supported in SIMD-only mode");
12098 }
12099 
12100 void CGOpenMPSIMDRuntime::emitTargetCall(
12101     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12102     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12103     const Expr *Device,
12104     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12105                                      const OMPLoopDirective &D)>
12106         SizeEmitter) {
12107   llvm_unreachable("Not supported in SIMD-only mode");
12108 }
12109 
12110 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12111   llvm_unreachable("Not supported in SIMD-only mode");
12112 }
12113 
12114 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12115   llvm_unreachable("Not supported in SIMD-only mode");
12116 }
12117 
12118 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12119   return false;
12120 }
12121 
12122 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12123                                         const OMPExecutableDirective &D,
12124                                         SourceLocation Loc,
12125                                         llvm::Function *OutlinedFn,
12126                                         ArrayRef<llvm::Value *> CapturedVars) {
12127   llvm_unreachable("Not supported in SIMD-only mode");
12128 }
12129 
12130 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12131                                              const Expr *NumTeams,
12132                                              const Expr *ThreadLimit,
12133                                              SourceLocation Loc) {
12134   llvm_unreachable("Not supported in SIMD-only mode");
12135 }
12136 
12137 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12138     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12139     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12140   llvm_unreachable("Not supported in SIMD-only mode");
12141 }
12142 
12143 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12144     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12145     const Expr *Device) {
12146   llvm_unreachable("Not supported in SIMD-only mode");
12147 }
12148 
12149 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12150                                            const OMPLoopDirective &D,
12151                                            ArrayRef<Expr *> NumIterations) {
12152   llvm_unreachable("Not supported in SIMD-only mode");
12153 }
12154 
12155 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12156                                               const OMPDependClause *C) {
12157   llvm_unreachable("Not supported in SIMD-only mode");
12158 }
12159 
12160 const VarDecl *
12161 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12162                                         const VarDecl *NativeParam) const {
12163   llvm_unreachable("Not supported in SIMD-only mode");
12164 }
12165 
12166 Address
12167 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12168                                          const VarDecl *NativeParam,
12169                                          const VarDecl *TargetParam) const {
12170   llvm_unreachable("Not supported in SIMD-only mode");
12171 }
12172