1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/CodeGen/ConstantInitBuilder.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetOperations.h"
27 #include "llvm/Bitcode/BitcodeReader.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <cassert>
35 
36 using namespace clang;
37 using namespace CodeGen;
38 using namespace llvm::omp;
39 
40 namespace {
41 /// Base class for handling code generation inside OpenMP regions.
42 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
43 public:
44   /// Kinds of OpenMP regions used in codegen.
45   enum CGOpenMPRegionKind {
46     /// Region with outlined function for standalone 'parallel'
47     /// directive.
48     ParallelOutlinedRegion,
49     /// Region with outlined function for standalone 'task' directive.
50     TaskOutlinedRegion,
51     /// Region for constructs that do not require function outlining,
52     /// like 'for', 'sections', 'atomic' etc. directives.
53     InlinedRegion,
54     /// Region with outlined function for standalone 'target' directive.
55     TargetRegion,
56   };
57 
58   CGOpenMPRegionInfo(const CapturedStmt &CS,
59                      const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
63         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
64 
65   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
66                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                      bool HasCancel)
68       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
69         Kind(Kind), HasCancel(HasCancel) {}
70 
71   /// Get a variable or parameter for storing global thread id
72   /// inside OpenMP construct.
73   virtual const VarDecl *getThreadIDVariable() const = 0;
74 
75   /// Emit the captured statement body.
76   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
77 
78   /// Get an LValue for the current ThreadID variable.
79   /// \return LValue for thread id variable. This LValue always has type int32*.
80   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
81 
82   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
83 
84   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
85 
86   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
87 
88   bool hasCancel() const { return HasCancel; }
89 
90   static bool classof(const CGCapturedStmtInfo *Info) {
91     return Info->getKind() == CR_OpenMP;
92   }
93 
94   ~CGOpenMPRegionInfo() override = default;
95 
96 protected:
97   CGOpenMPRegionKind RegionKind;
98   RegionCodeGenTy CodeGen;
99   OpenMPDirectiveKind Kind;
100   bool HasCancel;
101 };
102 
103 /// API for captured statement code generation in OpenMP constructs.
104 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
105 public:
106   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
107                              const RegionCodeGenTy &CodeGen,
108                              OpenMPDirectiveKind Kind, bool HasCancel,
109                              StringRef HelperName)
110       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
111                            HasCancel),
112         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
113     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
114   }
115 
116   /// Get a variable or parameter for storing global thread id
117   /// inside OpenMP construct.
118   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
119 
120   /// Get the name of the capture helper.
121   StringRef getHelperName() const override { return HelperName; }
122 
123   static bool classof(const CGCapturedStmtInfo *Info) {
124     return CGOpenMPRegionInfo::classof(Info) &&
125            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
126                ParallelOutlinedRegion;
127   }
128 
129 private:
130   /// A variable or parameter storing global thread id for OpenMP
131   /// constructs.
132   const VarDecl *ThreadIDVar;
133   StringRef HelperName;
134 };
135 
136 /// API for captured statement code generation in OpenMP constructs.
137 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
138 public:
139   class UntiedTaskActionTy final : public PrePostActionTy {
140     bool Untied;
141     const VarDecl *PartIDVar;
142     const RegionCodeGenTy UntiedCodeGen;
143     llvm::SwitchInst *UntiedSwitch = nullptr;
144 
145   public:
146     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
147                        const RegionCodeGenTy &UntiedCodeGen)
148         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
149     void Enter(CodeGenFunction &CGF) override {
150       if (Untied) {
151         // Emit task switching point.
152         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
153             CGF.GetAddrOfLocalVar(PartIDVar),
154             PartIDVar->getType()->castAs<PointerType>());
155         llvm::Value *Res =
156             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
157         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
158         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
159         CGF.EmitBlock(DoneBB);
160         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
161         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
162         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
163                               CGF.Builder.GetInsertBlock());
164         emitUntiedSwitch(CGF);
165       }
166     }
167     void emitUntiedSwitch(CodeGenFunction &CGF) const {
168       if (Untied) {
169         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
170             CGF.GetAddrOfLocalVar(PartIDVar),
171             PartIDVar->getType()->castAs<PointerType>());
172         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
173                               PartIdLVal);
174         UntiedCodeGen(CGF);
175         CodeGenFunction::JumpDest CurPoint =
176             CGF.getJumpDestInCurrentScope(".untied.next.");
177         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
178         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
179         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               CGF.Builder.GetInsertBlock());
181         CGF.EmitBranchThroughCleanup(CurPoint);
182         CGF.EmitBlock(CurPoint.getBlock());
183       }
184     }
185     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
186   };
187   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
188                                  const VarDecl *ThreadIDVar,
189                                  const RegionCodeGenTy &CodeGen,
190                                  OpenMPDirectiveKind Kind, bool HasCancel,
191                                  const UntiedTaskActionTy &Action)
192       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
193         ThreadIDVar(ThreadIDVar), Action(Action) {
194     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
195   }
196 
197   /// Get a variable or parameter for storing global thread id
198   /// inside OpenMP construct.
199   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
200 
201   /// Get an LValue for the current ThreadID variable.
202   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
203 
204   /// Get the name of the capture helper.
205   StringRef getHelperName() const override { return ".omp_outlined."; }
206 
207   void emitUntiedSwitch(CodeGenFunction &CGF) override {
208     Action.emitUntiedSwitch(CGF);
209   }
210 
211   static bool classof(const CGCapturedStmtInfo *Info) {
212     return CGOpenMPRegionInfo::classof(Info) &&
213            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
214                TaskOutlinedRegion;
215   }
216 
217 private:
218   /// A variable or parameter storing global thread id for OpenMP
219   /// constructs.
220   const VarDecl *ThreadIDVar;
221   /// Action for emitting code for untied tasks.
222   const UntiedTaskActionTy &Action;
223 };
224 
225 /// API for inlined captured statement code generation in OpenMP
226 /// constructs.
227 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
228 public:
229   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
230                             const RegionCodeGenTy &CodeGen,
231                             OpenMPDirectiveKind Kind, bool HasCancel)
232       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
233         OldCSI(OldCSI),
234         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
235 
236   // Retrieve the value of the context parameter.
237   llvm::Value *getContextValue() const override {
238     if (OuterRegionInfo)
239       return OuterRegionInfo->getContextValue();
240     llvm_unreachable("No context value for inlined OpenMP region");
241   }
242 
243   void setContextValue(llvm::Value *V) override {
244     if (OuterRegionInfo) {
245       OuterRegionInfo->setContextValue(V);
246       return;
247     }
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   /// Lookup the captured field decl for a variable.
252   const FieldDecl *lookup(const VarDecl *VD) const override {
253     if (OuterRegionInfo)
254       return OuterRegionInfo->lookup(VD);
255     // If there is no outer outlined region,no need to lookup in a list of
256     // captured variables, we can use the original one.
257     return nullptr;
258   }
259 
260   FieldDecl *getThisFieldDecl() const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->getThisFieldDecl();
263     return nullptr;
264   }
265 
266   /// Get a variable or parameter for storing global thread id
267   /// inside OpenMP construct.
268   const VarDecl *getThreadIDVariable() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThreadIDVariable();
271     return nullptr;
272   }
273 
274   /// Get an LValue for the current ThreadID variable.
275   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
278     llvm_unreachable("No LValue for inlined OpenMP construct");
279   }
280 
281   /// Get the name of the capture helper.
282   StringRef getHelperName() const override {
283     if (auto *OuterRegionInfo = getOldCSI())
284       return OuterRegionInfo->getHelperName();
285     llvm_unreachable("No helper name for inlined OpenMP construct");
286   }
287 
288   void emitUntiedSwitch(CodeGenFunction &CGF) override {
289     if (OuterRegionInfo)
290       OuterRegionInfo->emitUntiedSwitch(CGF);
291   }
292 
293   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
294 
295   static bool classof(const CGCapturedStmtInfo *Info) {
296     return CGOpenMPRegionInfo::classof(Info) &&
297            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
298   }
299 
300   ~CGOpenMPInlinedRegionInfo() override = default;
301 
302 private:
303   /// CodeGen info about outer OpenMP region.
304   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
305   CGOpenMPRegionInfo *OuterRegionInfo;
306 };
307 
308 /// API for captured statement code generation in OpenMP target
309 /// constructs. For this captures, implicit parameters are used instead of the
310 /// captured fields. The name of the target region has to be unique in a given
311 /// application so it is provided by the client, because only the client has
312 /// the information to generate that.
313 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
314 public:
315   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
316                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
317       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
318                            /*HasCancel=*/false),
319         HelperName(HelperName) {}
320 
321   /// This is unused for target regions because each starts executing
322   /// with a single thread.
323   const VarDecl *getThreadIDVariable() const override { return nullptr; }
324 
325   /// Get the name of the capture helper.
326   StringRef getHelperName() const override { return HelperName; }
327 
328   static bool classof(const CGCapturedStmtInfo *Info) {
329     return CGOpenMPRegionInfo::classof(Info) &&
330            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
331   }
332 
333 private:
334   StringRef HelperName;
335 };
336 
337 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
338   llvm_unreachable("No codegen for expressions");
339 }
340 /// API for generation of expressions captured in a innermost OpenMP
341 /// region.
342 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
343 public:
344   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
345       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
346                                   OMPD_unknown,
347                                   /*HasCancel=*/false),
348         PrivScope(CGF) {
349     // Make sure the globals captured in the provided statement are local by
350     // using the privatization logic. We assume the same variable is not
351     // captured more than once.
352     for (const auto &C : CS.captures()) {
353       if (!C.capturesVariable() && !C.capturesVariableByCopy())
354         continue;
355 
356       const VarDecl *VD = C.getCapturedVar();
357       if (VD->isLocalVarDeclOrParm())
358         continue;
359 
360       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
361                       /*RefersToEnclosingVariableOrCapture=*/false,
362                       VD->getType().getNonReferenceType(), VK_LValue,
363                       C.getLocation());
364       PrivScope.addPrivate(
365           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
366     }
367     (void)PrivScope.Privatize();
368   }
369 
370   /// Lookup the captured field decl for a variable.
371   const FieldDecl *lookup(const VarDecl *VD) const override {
372     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
373       return FD;
374     return nullptr;
375   }
376 
377   /// Emit the captured statement body.
378   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
379     llvm_unreachable("No body for expressions");
380   }
381 
382   /// Get a variable or parameter for storing global thread id
383   /// inside OpenMP construct.
384   const VarDecl *getThreadIDVariable() const override {
385     llvm_unreachable("No thread id for expressions");
386   }
387 
388   /// Get the name of the capture helper.
389   StringRef getHelperName() const override {
390     llvm_unreachable("No helper name for expressions");
391   }
392 
393   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
394 
395 private:
396   /// Private scope to capture global variables.
397   CodeGenFunction::OMPPrivateScope PrivScope;
398 };
399 
400 /// RAII for emitting code of OpenMP constructs.
401 class InlinedOpenMPRegionRAII {
402   CodeGenFunction &CGF;
403   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
404   FieldDecl *LambdaThisCaptureField = nullptr;
405   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
406 
407 public:
408   /// Constructs region for combined constructs.
409   /// \param CodeGen Code generation sequence for combined directives. Includes
410   /// a list of functions used for code generation of implicitly inlined
411   /// regions.
412   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
413                           OpenMPDirectiveKind Kind, bool HasCancel)
414       : CGF(CGF) {
415     // Start emission for the construct.
416     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
417         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
418     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
419     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
420     CGF.LambdaThisCaptureField = nullptr;
421     BlockInfo = CGF.BlockInfo;
422     CGF.BlockInfo = nullptr;
423   }
424 
425   ~InlinedOpenMPRegionRAII() {
426     // Restore original CapturedStmtInfo only if we're done with code emission.
427     auto *OldCSI =
428         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
429     delete CGF.CapturedStmtInfo;
430     CGF.CapturedStmtInfo = OldCSI;
431     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
433     CGF.BlockInfo = BlockInfo;
434   }
435 };
436 
437 /// Values for bit flags used in the ident_t to describe the fields.
438 /// All enumeric elements are named and described in accordance with the code
439 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
440 enum OpenMPLocationFlags : unsigned {
441   /// Use trampoline for internal microtask.
442   OMP_IDENT_IMD = 0x01,
443   /// Use c-style ident structure.
444   OMP_IDENT_KMPC = 0x02,
445   /// Atomic reduction option for kmpc_reduce.
446   OMP_ATOMIC_REDUCE = 0x10,
447   /// Explicit 'barrier' directive.
448   OMP_IDENT_BARRIER_EXPL = 0x20,
449   /// Implicit barrier in code.
450   OMP_IDENT_BARRIER_IMPL = 0x40,
451   /// Implicit barrier in 'for' directive.
452   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
453   /// Implicit barrier in 'sections' directive.
454   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
455   /// Implicit barrier in 'single' directive.
456   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
457   /// Call of __kmp_for_static_init for static loop.
458   OMP_IDENT_WORK_LOOP = 0x200,
459   /// Call of __kmp_for_static_init for sections.
460   OMP_IDENT_WORK_SECTIONS = 0x400,
461   /// Call of __kmp_for_static_init for distribute.
462   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
463   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
464 };
465 
466 namespace {
467 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
468 /// Values for bit flags for marking which requires clauses have been used.
469 enum OpenMPOffloadingRequiresDirFlags : int64_t {
470   /// flag undefined.
471   OMP_REQ_UNDEFINED               = 0x000,
472   /// no requires clause present.
473   OMP_REQ_NONE                    = 0x001,
474   /// reverse_offload clause.
475   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
476   /// unified_address clause.
477   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
478   /// unified_shared_memory clause.
479   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
480   /// dynamic_allocators clause.
481   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
482   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
483 };
484 
485 enum OpenMPOffloadingReservedDeviceIDs {
486   /// Device ID if the device was not defined, runtime should get it
487   /// from environment variables in the spec.
488   OMP_DEVICEID_UNDEF = -1,
489 };
490 } // anonymous namespace
491 
492 /// Describes ident structure that describes a source location.
493 /// All descriptions are taken from
494 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
495 /// Original structure:
496 /// typedef struct ident {
497 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
498 ///                                  see above  */
499 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
500 ///                                  KMP_IDENT_KMPC identifies this union
501 ///                                  member  */
502 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
503 ///                                  see above */
504 ///#if USE_ITT_BUILD
505 ///                            /*  but currently used for storing
506 ///                                region-specific ITT */
507 ///                            /*  contextual information. */
508 ///#endif /* USE_ITT_BUILD */
509 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
510 ///                                 C++  */
511 ///    char const *psource;    /**< String describing the source location.
512 ///                            The string is composed of semi-colon separated
513 //                             fields which describe the source file,
514 ///                            the function and a pair of line numbers that
515 ///                            delimit the construct.
516 ///                             */
517 /// } ident_t;
518 enum IdentFieldIndex {
519   /// might be used in Fortran
520   IdentField_Reserved_1,
521   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
522   IdentField_Flags,
523   /// Not really used in Fortran any more
524   IdentField_Reserved_2,
525   /// Source[4] in Fortran, do not use for C++
526   IdentField_Reserved_3,
527   /// String describing the source location. The string is composed of
528   /// semi-colon separated fields which describe the source file, the function
529   /// and a pair of line numbers that delimit the construct.
530   IdentField_PSource
531 };
532 
533 /// Schedule types for 'omp for' loops (these enumerators are taken from
534 /// the enum sched_type in kmp.h).
535 enum OpenMPSchedType {
536   /// Lower bound for default (unordered) versions.
537   OMP_sch_lower = 32,
538   OMP_sch_static_chunked = 33,
539   OMP_sch_static = 34,
540   OMP_sch_dynamic_chunked = 35,
541   OMP_sch_guided_chunked = 36,
542   OMP_sch_runtime = 37,
543   OMP_sch_auto = 38,
544   /// static with chunk adjustment (e.g., simd)
545   OMP_sch_static_balanced_chunked = 45,
546   /// Lower bound for 'ordered' versions.
547   OMP_ord_lower = 64,
548   OMP_ord_static_chunked = 65,
549   OMP_ord_static = 66,
550   OMP_ord_dynamic_chunked = 67,
551   OMP_ord_guided_chunked = 68,
552   OMP_ord_runtime = 69,
553   OMP_ord_auto = 70,
554   OMP_sch_default = OMP_sch_static,
555   /// dist_schedule types
556   OMP_dist_sch_static_chunked = 91,
557   OMP_dist_sch_static = 92,
558   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
559   /// Set if the monotonic schedule modifier was present.
560   OMP_sch_modifier_monotonic = (1 << 29),
561   /// Set if the nonmonotonic schedule modifier was present.
562   OMP_sch_modifier_nonmonotonic = (1 << 30),
563 };
564 
565 enum OpenMPRTLFunction {
566   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
567   /// kmpc_micro microtask, ...);
568   OMPRTL__kmpc_fork_call,
569   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
570   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
571   OMPRTL__kmpc_threadprivate_cached,
572   /// Call to void __kmpc_threadprivate_register( ident_t *,
573   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
574   OMPRTL__kmpc_threadprivate_register,
575   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
576   OMPRTL__kmpc_global_thread_num,
577   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_critical,
580   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
581   // global_tid, kmp_critical_name *crit, uintptr_t hint);
582   OMPRTL__kmpc_critical_with_hint,
583   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
584   // kmp_critical_name *crit);
585   OMPRTL__kmpc_end_critical,
586   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
587   // global_tid);
588   OMPRTL__kmpc_cancel_barrier,
589   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
590   OMPRTL__kmpc_barrier,
591   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
592   OMPRTL__kmpc_for_static_fini,
593   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
594   // global_tid);
595   OMPRTL__kmpc_serialized_parallel,
596   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
597   // global_tid);
598   OMPRTL__kmpc_end_serialized_parallel,
599   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
600   // kmp_int32 num_threads);
601   OMPRTL__kmpc_push_num_threads,
602   // Call to void __kmpc_flush(ident_t *loc);
603   OMPRTL__kmpc_flush,
604   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
605   OMPRTL__kmpc_master,
606   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
607   OMPRTL__kmpc_end_master,
608   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
609   // int end_part);
610   OMPRTL__kmpc_omp_taskyield,
611   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
612   OMPRTL__kmpc_single,
613   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
614   OMPRTL__kmpc_end_single,
615   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
616   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
617   // kmp_routine_entry_t *task_entry);
618   OMPRTL__kmpc_omp_task_alloc,
619   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
620   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
621   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
622   // kmp_int64 device_id);
623   OMPRTL__kmpc_omp_target_task_alloc,
624   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
625   // new_task);
626   OMPRTL__kmpc_omp_task,
627   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
628   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
629   // kmp_int32 didit);
630   OMPRTL__kmpc_copyprivate,
631   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
632   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
633   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
634   OMPRTL__kmpc_reduce,
635   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
636   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
637   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
638   // *lck);
639   OMPRTL__kmpc_reduce_nowait,
640   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
641   // kmp_critical_name *lck);
642   OMPRTL__kmpc_end_reduce,
643   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
644   // kmp_critical_name *lck);
645   OMPRTL__kmpc_end_reduce_nowait,
646   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
647   // kmp_task_t * new_task);
648   OMPRTL__kmpc_omp_task_begin_if0,
649   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
650   // kmp_task_t * new_task);
651   OMPRTL__kmpc_omp_task_complete_if0,
652   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
653   OMPRTL__kmpc_ordered,
654   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
655   OMPRTL__kmpc_end_ordered,
656   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
657   // global_tid);
658   OMPRTL__kmpc_omp_taskwait,
659   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
660   OMPRTL__kmpc_taskgroup,
661   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
662   OMPRTL__kmpc_end_taskgroup,
663   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
664   // int proc_bind);
665   OMPRTL__kmpc_push_proc_bind,
666   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
667   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
668   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
669   OMPRTL__kmpc_omp_task_with_deps,
670   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
671   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
672   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
673   OMPRTL__kmpc_omp_wait_deps,
674   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
675   // global_tid, kmp_int32 cncl_kind);
676   OMPRTL__kmpc_cancellationpoint,
677   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
678   // kmp_int32 cncl_kind);
679   OMPRTL__kmpc_cancel,
680   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
681   // kmp_int32 num_teams, kmp_int32 thread_limit);
682   OMPRTL__kmpc_push_num_teams,
683   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
684   // microtask, ...);
685   OMPRTL__kmpc_fork_teams,
686   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
687   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
688   // sched, kmp_uint64 grainsize, void *task_dup);
689   OMPRTL__kmpc_taskloop,
690   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
691   // num_dims, struct kmp_dim *dims);
692   OMPRTL__kmpc_doacross_init,
693   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
694   OMPRTL__kmpc_doacross_fini,
695   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
696   // *vec);
697   OMPRTL__kmpc_doacross_post,
698   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
699   // *vec);
700   OMPRTL__kmpc_doacross_wait,
701   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
702   // *data);
703   OMPRTL__kmpc_task_reduction_init,
704   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
705   // *d);
706   OMPRTL__kmpc_task_reduction_get_th_data,
707   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
708   OMPRTL__kmpc_alloc,
709   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
710   OMPRTL__kmpc_free,
711 
712   //
713   // Offloading related calls
714   //
715   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
716   // size);
717   OMPRTL__kmpc_push_target_tripcount,
718   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
719   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
720   // *arg_types);
721   OMPRTL__tgt_target,
722   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
723   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
724   // *arg_types);
725   OMPRTL__tgt_target_nowait,
726   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
727   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
728   // *arg_types, int32_t num_teams, int32_t thread_limit);
729   OMPRTL__tgt_target_teams,
730   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
731   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
732   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
733   OMPRTL__tgt_target_teams_nowait,
734   // Call to void __tgt_register_requires(int64_t flags);
735   OMPRTL__tgt_register_requires,
736   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
737   OMPRTL__tgt_register_lib,
738   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
739   OMPRTL__tgt_unregister_lib,
740   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
741   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
742   OMPRTL__tgt_target_data_begin,
743   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
744   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
745   // *arg_types);
746   OMPRTL__tgt_target_data_begin_nowait,
747   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
748   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
749   OMPRTL__tgt_target_data_end,
750   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
751   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
752   // *arg_types);
753   OMPRTL__tgt_target_data_end_nowait,
754   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
755   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
756   OMPRTL__tgt_target_data_update,
757   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
758   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
759   // *arg_types);
760   OMPRTL__tgt_target_data_update_nowait,
761   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
762   OMPRTL__tgt_mapper_num_components,
763   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
764   // *base, void *begin, int64_t size, int64_t type);
765   OMPRTL__tgt_push_mapper_component,
766 };
767 
768 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
769 /// region.
770 class CleanupTy final : public EHScopeStack::Cleanup {
771   PrePostActionTy *Action;
772 
773 public:
774   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
775   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
776     if (!CGF.HaveInsertPoint())
777       return;
778     Action->Exit(CGF);
779   }
780 };
781 
782 } // anonymous namespace
783 
784 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
785   CodeGenFunction::RunCleanupsScope Scope(CGF);
786   if (PrePostAction) {
787     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
788     Callback(CodeGen, CGF, *PrePostAction);
789   } else {
790     PrePostActionTy Action;
791     Callback(CodeGen, CGF, Action);
792   }
793 }
794 
795 /// Check if the combiner is a call to UDR combiner and if it is so return the
796 /// UDR decl used for reduction.
797 static const OMPDeclareReductionDecl *
798 getReductionInit(const Expr *ReductionOp) {
799   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
800     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
801       if (const auto *DRE =
802               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
803         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
804           return DRD;
805   return nullptr;
806 }
807 
808 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
809                                              const OMPDeclareReductionDecl *DRD,
810                                              const Expr *InitOp,
811                                              Address Private, Address Original,
812                                              QualType Ty) {
813   if (DRD->getInitializer()) {
814     std::pair<llvm::Function *, llvm::Function *> Reduction =
815         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
816     const auto *CE = cast<CallExpr>(InitOp);
817     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
818     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
819     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
820     const auto *LHSDRE =
821         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
822     const auto *RHSDRE =
823         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
824     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
825     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
826                             [=]() { return Private; });
827     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
828                             [=]() { return Original; });
829     (void)PrivateScope.Privatize();
830     RValue Func = RValue::get(Reduction.second);
831     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
832     CGF.EmitIgnoredExpr(InitOp);
833   } else {
834     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
835     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
836     auto *GV = new llvm::GlobalVariable(
837         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
838         llvm::GlobalValue::PrivateLinkage, Init, Name);
839     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
840     RValue InitRVal;
841     switch (CGF.getEvaluationKind(Ty)) {
842     case TEK_Scalar:
843       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
844       break;
845     case TEK_Complex:
846       InitRVal =
847           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
848       break;
849     case TEK_Aggregate:
850       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
851       break;
852     }
853     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
854     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
855     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
856                          /*IsInitializer=*/false);
857   }
858 }
859 
860 /// Emit initialization of arrays of complex types.
861 /// \param DestAddr Address of the array.
862 /// \param Type Type of array.
863 /// \param Init Initial expression of array.
864 /// \param SrcAddr Address of the original array.
865 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
866                                  QualType Type, bool EmitDeclareReductionInit,
867                                  const Expr *Init,
868                                  const OMPDeclareReductionDecl *DRD,
869                                  Address SrcAddr = Address::invalid()) {
870   // Perform element-by-element initialization.
871   QualType ElementTy;
872 
873   // Drill down to the base element type on both arrays.
874   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
875   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
876   DestAddr =
877       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
878   if (DRD)
879     SrcAddr =
880         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
881 
882   llvm::Value *SrcBegin = nullptr;
883   if (DRD)
884     SrcBegin = SrcAddr.getPointer();
885   llvm::Value *DestBegin = DestAddr.getPointer();
886   // Cast from pointer to array type to pointer to single element.
887   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
888   // The basic structure here is a while-do loop.
889   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
890   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
891   llvm::Value *IsEmpty =
892       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
893   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
894 
895   // Enter the loop body, making that address the current address.
896   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
897   CGF.EmitBlock(BodyBB);
898 
899   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
900 
901   llvm::PHINode *SrcElementPHI = nullptr;
902   Address SrcElementCurrent = Address::invalid();
903   if (DRD) {
904     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
905                                           "omp.arraycpy.srcElementPast");
906     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
907     SrcElementCurrent =
908         Address(SrcElementPHI,
909                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
910   }
911   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
912       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
913   DestElementPHI->addIncoming(DestBegin, EntryBB);
914   Address DestElementCurrent =
915       Address(DestElementPHI,
916               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
917 
918   // Emit copy.
919   {
920     CodeGenFunction::RunCleanupsScope InitScope(CGF);
921     if (EmitDeclareReductionInit) {
922       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
923                                        SrcElementCurrent, ElementTy);
924     } else
925       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
926                            /*IsInitializer=*/false);
927   }
928 
929   if (DRD) {
930     // Shift the address forward by one element.
931     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
932         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
933     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
934   }
935 
936   // Shift the address forward by one element.
937   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
938       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
939   // Check whether we've reached the end.
940   llvm::Value *Done =
941       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
942   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
943   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
944 
945   // Done.
946   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
947 }
948 
949 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
950   return CGF.EmitOMPSharedLValue(E);
951 }
952 
953 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
954                                             const Expr *E) {
955   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
956     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
957   return LValue();
958 }
959 
960 void ReductionCodeGen::emitAggregateInitialization(
961     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
962     const OMPDeclareReductionDecl *DRD) {
963   // Emit VarDecl with copy init for arrays.
964   // Get the address of the original variable captured in current
965   // captured region.
966   const auto *PrivateVD =
967       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
968   bool EmitDeclareReductionInit =
969       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
970   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
971                        EmitDeclareReductionInit,
972                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
973                                                 : PrivateVD->getInit(),
974                        DRD, SharedLVal.getAddress(CGF));
975 }
976 
977 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
978                                    ArrayRef<const Expr *> Privates,
979                                    ArrayRef<const Expr *> ReductionOps) {
980   ClausesData.reserve(Shareds.size());
981   SharedAddresses.reserve(Shareds.size());
982   Sizes.reserve(Shareds.size());
983   BaseDecls.reserve(Shareds.size());
984   auto IPriv = Privates.begin();
985   auto IRed = ReductionOps.begin();
986   for (const Expr *Ref : Shareds) {
987     ClausesData.emplace_back(Ref, *IPriv, *IRed);
988     std::advance(IPriv, 1);
989     std::advance(IRed, 1);
990   }
991 }
992 
993 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
994   assert(SharedAddresses.size() == N &&
995          "Number of generated lvalues must be exactly N.");
996   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
997   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
998   SharedAddresses.emplace_back(First, Second);
999 }
1000 
1001 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
1002   const auto *PrivateVD =
1003       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1004   QualType PrivateType = PrivateVD->getType();
1005   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1006   if (!PrivateType->isVariablyModifiedType()) {
1007     Sizes.emplace_back(
1008         CGF.getTypeSize(
1009             SharedAddresses[N].first.getType().getNonReferenceType()),
1010         nullptr);
1011     return;
1012   }
1013   llvm::Value *Size;
1014   llvm::Value *SizeInChars;
1015   auto *ElemType = cast<llvm::PointerType>(
1016                        SharedAddresses[N].first.getPointer(CGF)->getType())
1017                        ->getElementType();
1018   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1019   if (AsArraySection) {
1020     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1021                                      SharedAddresses[N].first.getPointer(CGF));
1022     Size = CGF.Builder.CreateNUWAdd(
1023         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1024     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1025   } else {
1026     SizeInChars = CGF.getTypeSize(
1027         SharedAddresses[N].first.getType().getNonReferenceType());
1028     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1029   }
1030   Sizes.emplace_back(SizeInChars, Size);
1031   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1032       CGF,
1033       cast<OpaqueValueExpr>(
1034           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1035       RValue::get(Size));
1036   CGF.EmitVariablyModifiedType(PrivateType);
1037 }
1038 
1039 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1040                                          llvm::Value *Size) {
1041   const auto *PrivateVD =
1042       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1043   QualType PrivateType = PrivateVD->getType();
1044   if (!PrivateType->isVariablyModifiedType()) {
1045     assert(!Size && !Sizes[N].second &&
1046            "Size should be nullptr for non-variably modified reduction "
1047            "items.");
1048     return;
1049   }
1050   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1051       CGF,
1052       cast<OpaqueValueExpr>(
1053           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1054       RValue::get(Size));
1055   CGF.EmitVariablyModifiedType(PrivateType);
1056 }
1057 
1058 void ReductionCodeGen::emitInitialization(
1059     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1060     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1061   assert(SharedAddresses.size() > N && "No variable was generated");
1062   const auto *PrivateVD =
1063       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1064   const OMPDeclareReductionDecl *DRD =
1065       getReductionInit(ClausesData[N].ReductionOp);
1066   QualType PrivateType = PrivateVD->getType();
1067   PrivateAddr = CGF.Builder.CreateElementBitCast(
1068       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1069   QualType SharedType = SharedAddresses[N].first.getType();
1070   SharedLVal = CGF.MakeAddrLValue(
1071       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1072                                        CGF.ConvertTypeForMem(SharedType)),
1073       SharedType, SharedAddresses[N].first.getBaseInfo(),
1074       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1075   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1076     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1077   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1078     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1079                                      PrivateAddr, SharedLVal.getAddress(CGF),
1080                                      SharedLVal.getType());
1081   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1082              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1083     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1084                          PrivateVD->getType().getQualifiers(),
1085                          /*IsInitializer=*/false);
1086   }
1087 }
1088 
1089 bool ReductionCodeGen::needCleanups(unsigned N) {
1090   const auto *PrivateVD =
1091       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1092   QualType PrivateType = PrivateVD->getType();
1093   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1094   return DTorKind != QualType::DK_none;
1095 }
1096 
1097 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1098                                     Address PrivateAddr) {
1099   const auto *PrivateVD =
1100       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1101   QualType PrivateType = PrivateVD->getType();
1102   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1103   if (needCleanups(N)) {
1104     PrivateAddr = CGF.Builder.CreateElementBitCast(
1105         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1106     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1107   }
1108 }
1109 
1110 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1111                           LValue BaseLV) {
1112   BaseTy = BaseTy.getNonReferenceType();
1113   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1114          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1115     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1116       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1117     } else {
1118       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1119       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1120     }
1121     BaseTy = BaseTy->getPointeeType();
1122   }
1123   return CGF.MakeAddrLValue(
1124       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1125                                        CGF.ConvertTypeForMem(ElTy)),
1126       BaseLV.getType(), BaseLV.getBaseInfo(),
1127       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1128 }
1129 
1130 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1131                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1132                           llvm::Value *Addr) {
1133   Address Tmp = Address::invalid();
1134   Address TopTmp = Address::invalid();
1135   Address MostTopTmp = Address::invalid();
1136   BaseTy = BaseTy.getNonReferenceType();
1137   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1138          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1139     Tmp = CGF.CreateMemTemp(BaseTy);
1140     if (TopTmp.isValid())
1141       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1142     else
1143       MostTopTmp = Tmp;
1144     TopTmp = Tmp;
1145     BaseTy = BaseTy->getPointeeType();
1146   }
1147   llvm::Type *Ty = BaseLVType;
1148   if (Tmp.isValid())
1149     Ty = Tmp.getElementType();
1150   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1151   if (Tmp.isValid()) {
1152     CGF.Builder.CreateStore(Addr, Tmp);
1153     return MostTopTmp;
1154   }
1155   return Address(Addr, BaseLVAlignment);
1156 }
1157 
1158 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1159   const VarDecl *OrigVD = nullptr;
1160   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1161     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1162     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1163       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1164     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1165       Base = TempASE->getBase()->IgnoreParenImpCasts();
1166     DE = cast<DeclRefExpr>(Base);
1167     OrigVD = cast<VarDecl>(DE->getDecl());
1168   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1169     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1170     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1171       Base = TempASE->getBase()->IgnoreParenImpCasts();
1172     DE = cast<DeclRefExpr>(Base);
1173     OrigVD = cast<VarDecl>(DE->getDecl());
1174   }
1175   return OrigVD;
1176 }
1177 
1178 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1179                                                Address PrivateAddr) {
1180   const DeclRefExpr *DE;
1181   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1182     BaseDecls.emplace_back(OrigVD);
1183     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1184     LValue BaseLValue =
1185         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1186                     OriginalBaseLValue);
1187     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1188         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1189     llvm::Value *PrivatePointer =
1190         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1191             PrivateAddr.getPointer(),
1192             SharedAddresses[N].first.getAddress(CGF).getType());
1193     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1194     return castToBase(CGF, OrigVD->getType(),
1195                       SharedAddresses[N].first.getType(),
1196                       OriginalBaseLValue.getAddress(CGF).getType(),
1197                       OriginalBaseLValue.getAlignment(), Ptr);
1198   }
1199   BaseDecls.emplace_back(
1200       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1201   return PrivateAddr;
1202 }
1203 
1204 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1205   const OMPDeclareReductionDecl *DRD =
1206       getReductionInit(ClausesData[N].ReductionOp);
1207   return DRD && DRD->getInitializer();
1208 }
1209 
1210 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1211   return CGF.EmitLoadOfPointerLValue(
1212       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1213       getThreadIDVariable()->getType()->castAs<PointerType>());
1214 }
1215 
1216 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1217   if (!CGF.HaveInsertPoint())
1218     return;
1219   // 1.2.2 OpenMP Language Terminology
1220   // Structured block - An executable statement with a single entry at the
1221   // top and a single exit at the bottom.
1222   // The point of exit cannot be a branch out of the structured block.
1223   // longjmp() and throw() must not violate the entry/exit criteria.
1224   CGF.EHStack.pushTerminate();
1225   CodeGen(CGF);
1226   CGF.EHStack.popTerminate();
1227 }
1228 
1229 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1230     CodeGenFunction &CGF) {
1231   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1232                             getThreadIDVariable()->getType(),
1233                             AlignmentSource::Decl);
1234 }
1235 
1236 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1237                                        QualType FieldTy) {
1238   auto *Field = FieldDecl::Create(
1239       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1240       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1241       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1242   Field->setAccess(AS_public);
1243   DC->addDecl(Field);
1244   return Field;
1245 }
1246 
1247 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1248                                  StringRef Separator)
1249     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1250       OffloadEntriesInfoManager(CGM) {
1251   ASTContext &C = CGM.getContext();
1252   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1253   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1254   RD->startDefinition();
1255   // reserved_1
1256   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257   // flags
1258   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1259   // reserved_2
1260   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1261   // reserved_3
1262   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1263   // psource
1264   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1265   RD->completeDefinition();
1266   IdentQTy = C.getRecordType(RD);
1267   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1268   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1269 
1270   loadOffloadInfoMetadata();
1271 }
1272 
1273 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1274                                             const GlobalDecl &OldGD,
1275                                             llvm::GlobalValue *OrigAddr,
1276                                             bool IsForDefinition) {
1277   // Emit at least a definition for the aliasee if the the address of the
1278   // original function is requested.
1279   if (IsForDefinition || OrigAddr)
1280     (void)CGM.GetAddrOfGlobal(NewGD);
1281   StringRef NewMangledName = CGM.getMangledName(NewGD);
1282   llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1283   if (Addr && !Addr->isDeclaration()) {
1284     const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1285     const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1286     llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1287 
1288     // Create a reference to the named value.  This ensures that it is emitted
1289     // if a deferred decl.
1290     llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1291 
1292     // Create the new alias itself, but don't set a name yet.
1293     auto *GA =
1294         llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1295 
1296     if (OrigAddr) {
1297       assert(OrigAddr->isDeclaration() && "Expected declaration");
1298 
1299       GA->takeName(OrigAddr);
1300       OrigAddr->replaceAllUsesWith(
1301           llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1302       OrigAddr->eraseFromParent();
1303     } else {
1304       GA->setName(CGM.getMangledName(OldGD));
1305     }
1306 
1307     // Set attributes which are particular to an alias; this is a
1308     // specialization of the attributes which may be set on a global function.
1309     if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1310         D->isWeakImported())
1311       GA->setLinkage(llvm::Function::WeakAnyLinkage);
1312 
1313     CGM.SetCommonAttributes(OldGD, GA);
1314     return true;
1315   }
1316   return false;
1317 }
1318 
1319 void CGOpenMPRuntime::clear() {
1320   InternalVars.clear();
1321   // Clean non-target variable declarations possibly used only in debug info.
1322   for (const auto &Data : EmittedNonTargetVariables) {
1323     if (!Data.getValue().pointsToAliveValue())
1324       continue;
1325     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1326     if (!GV)
1327       continue;
1328     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1329       continue;
1330     GV->eraseFromParent();
1331   }
1332   // Emit aliases for the deferred aliasees.
1333   for (const auto &Pair : DeferredVariantFunction) {
1334     StringRef MangledName = CGM.getMangledName(Pair.second.second);
1335     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1336     // If not able to emit alias, just emit original declaration.
1337     (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1338                                 /*IsForDefinition=*/false);
1339   }
1340 }
1341 
1342 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1343   SmallString<128> Buffer;
1344   llvm::raw_svector_ostream OS(Buffer);
1345   StringRef Sep = FirstSeparator;
1346   for (StringRef Part : Parts) {
1347     OS << Sep << Part;
1348     Sep = Separator;
1349   }
1350   return OS.str();
1351 }
1352 
1353 static llvm::Function *
1354 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1355                           const Expr *CombinerInitializer, const VarDecl *In,
1356                           const VarDecl *Out, bool IsCombiner) {
1357   // void .omp_combiner.(Ty *in, Ty *out);
1358   ASTContext &C = CGM.getContext();
1359   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1360   FunctionArgList Args;
1361   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1362                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1363   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1364                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1365   Args.push_back(&OmpOutParm);
1366   Args.push_back(&OmpInParm);
1367   const CGFunctionInfo &FnInfo =
1368       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1369   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1370   std::string Name = CGM.getOpenMPRuntime().getName(
1371       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1372   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1373                                     Name, &CGM.getModule());
1374   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1375   if (CGM.getLangOpts().Optimize) {
1376     Fn->removeFnAttr(llvm::Attribute::NoInline);
1377     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1378     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1379   }
1380   CodeGenFunction CGF(CGM);
1381   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1382   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1383   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1384                     Out->getLocation());
1385   CodeGenFunction::OMPPrivateScope Scope(CGF);
1386   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1387   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1388     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1389         .getAddress(CGF);
1390   });
1391   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1392   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1393     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1394         .getAddress(CGF);
1395   });
1396   (void)Scope.Privatize();
1397   if (!IsCombiner && Out->hasInit() &&
1398       !CGF.isTrivialInitializer(Out->getInit())) {
1399     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1400                          Out->getType().getQualifiers(),
1401                          /*IsInitializer=*/true);
1402   }
1403   if (CombinerInitializer)
1404     CGF.EmitIgnoredExpr(CombinerInitializer);
1405   Scope.ForceCleanup();
1406   CGF.FinishFunction();
1407   return Fn;
1408 }
1409 
1410 void CGOpenMPRuntime::emitUserDefinedReduction(
1411     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1412   if (UDRMap.count(D) > 0)
1413     return;
1414   llvm::Function *Combiner = emitCombinerOrInitializer(
1415       CGM, D->getType(), D->getCombiner(),
1416       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1417       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1418       /*IsCombiner=*/true);
1419   llvm::Function *Initializer = nullptr;
1420   if (const Expr *Init = D->getInitializer()) {
1421     Initializer = emitCombinerOrInitializer(
1422         CGM, D->getType(),
1423         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1424                                                                      : nullptr,
1425         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1426         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1427         /*IsCombiner=*/false);
1428   }
1429   UDRMap.try_emplace(D, Combiner, Initializer);
1430   if (CGF) {
1431     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1432     Decls.second.push_back(D);
1433   }
1434 }
1435 
1436 std::pair<llvm::Function *, llvm::Function *>
1437 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1438   auto I = UDRMap.find(D);
1439   if (I != UDRMap.end())
1440     return I->second;
1441   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1442   return UDRMap.lookup(D);
1443 }
1444 
1445 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1446 // Builder if one is present.
1447 struct PushAndPopStackRAII {
1448   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1449                       bool HasCancel)
1450       : OMPBuilder(OMPBuilder) {
1451     if (!OMPBuilder)
1452       return;
1453 
1454     // The following callback is the crucial part of clangs cleanup process.
1455     //
1456     // NOTE:
1457     // Once the OpenMPIRBuilder is used to create parallel regions (and
1458     // similar), the cancellation destination (Dest below) is determined via
1459     // IP. That means if we have variables to finalize we split the block at IP,
1460     // use the new block (=BB) as destination to build a JumpDest (via
1461     // getJumpDestInCurrentScope(BB)) which then is fed to
1462     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1463     // to push & pop an FinalizationInfo object.
1464     // The FiniCB will still be needed but at the point where the
1465     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1466     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1467       assert(IP.getBlock()->end() == IP.getPoint() &&
1468              "Clang CG should cause non-terminated block!");
1469       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1470       CGF.Builder.restoreIP(IP);
1471       CodeGenFunction::JumpDest Dest =
1472           CGF.getOMPCancelDestination(OMPD_parallel);
1473       CGF.EmitBranchThroughCleanup(Dest);
1474     };
1475 
1476     // TODO: Remove this once we emit parallel regions through the
1477     //       OpenMPIRBuilder as it can do this setup internally.
1478     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1479         {FiniCB, OMPD_parallel, HasCancel});
1480     OMPBuilder->pushFinalizationCB(std::move(FI));
1481   }
1482   ~PushAndPopStackRAII() {
1483     if (OMPBuilder)
1484       OMPBuilder->popFinalizationCB();
1485   }
1486   llvm::OpenMPIRBuilder *OMPBuilder;
1487 };
1488 
1489 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1490     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1491     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1492     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1493   assert(ThreadIDVar->getType()->isPointerType() &&
1494          "thread id variable must be of type kmp_int32 *");
1495   CodeGenFunction CGF(CGM, true);
1496   bool HasCancel = false;
1497   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1498     HasCancel = OPD->hasCancel();
1499   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1500     HasCancel = OPSD->hasCancel();
1501   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1502     HasCancel = OPFD->hasCancel();
1503   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1504     HasCancel = OPFD->hasCancel();
1505   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1506     HasCancel = OPFD->hasCancel();
1507   else if (const auto *OPFD =
1508                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1509     HasCancel = OPFD->hasCancel();
1510   else if (const auto *OPFD =
1511                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1512     HasCancel = OPFD->hasCancel();
1513 
1514   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1515   //       parallel region to make cancellation barriers work properly.
1516   llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1517   PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1518   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1519                                     HasCancel, OutlinedHelperName);
1520   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1521   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1522 }
1523 
1524 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1525     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1526     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1527   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1528   return emitParallelOrTeamsOutlinedFunction(
1529       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1530 }
1531 
1532 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1533     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1534     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1535   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1536   return emitParallelOrTeamsOutlinedFunction(
1537       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1538 }
1539 
1540 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1541     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1542     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1543     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1544     bool Tied, unsigned &NumberOfParts) {
1545   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1546                                               PrePostActionTy &) {
1547     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1548     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1549     llvm::Value *TaskArgs[] = {
1550         UpLoc, ThreadID,
1551         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1552                                     TaskTVar->getType()->castAs<PointerType>())
1553             .getPointer(CGF)};
1554     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1555   };
1556   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1557                                                             UntiedCodeGen);
1558   CodeGen.setAction(Action);
1559   assert(!ThreadIDVar->getType()->isPointerType() &&
1560          "thread id variable must be of type kmp_int32 for tasks");
1561   const OpenMPDirectiveKind Region =
1562       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1563                                                       : OMPD_task;
1564   const CapturedStmt *CS = D.getCapturedStmt(Region);
1565   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1566   CodeGenFunction CGF(CGM, true);
1567   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1568                                         InnermostKind,
1569                                         TD ? TD->hasCancel() : false, Action);
1570   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1571   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1572   if (!Tied)
1573     NumberOfParts = Action.getNumberOfParts();
1574   return Res;
1575 }
1576 
1577 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1578                              const RecordDecl *RD, const CGRecordLayout &RL,
1579                              ArrayRef<llvm::Constant *> Data) {
1580   llvm::StructType *StructTy = RL.getLLVMType();
1581   unsigned PrevIdx = 0;
1582   ConstantInitBuilder CIBuilder(CGM);
1583   auto DI = Data.begin();
1584   for (const FieldDecl *FD : RD->fields()) {
1585     unsigned Idx = RL.getLLVMFieldNo(FD);
1586     // Fill the alignment.
1587     for (unsigned I = PrevIdx; I < Idx; ++I)
1588       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1589     PrevIdx = Idx + 1;
1590     Fields.add(*DI);
1591     ++DI;
1592   }
1593 }
1594 
1595 template <class... As>
1596 static llvm::GlobalVariable *
1597 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1598                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1599                    As &&... Args) {
1600   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1601   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1602   ConstantInitBuilder CIBuilder(CGM);
1603   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1604   buildStructValue(Fields, CGM, RD, RL, Data);
1605   return Fields.finishAndCreateGlobal(
1606       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1607       std::forward<As>(Args)...);
1608 }
1609 
1610 template <typename T>
1611 static void
1612 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1613                                          ArrayRef<llvm::Constant *> Data,
1614                                          T &Parent) {
1615   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1616   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1617   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1618   buildStructValue(Fields, CGM, RD, RL, Data);
1619   Fields.finishAndAddTo(Parent);
1620 }
1621 
1622 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1623   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1624   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1625   FlagsTy FlagsKey(Flags, Reserved2Flags);
1626   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1627   if (!Entry) {
1628     if (!DefaultOpenMPPSource) {
1629       // Initialize default location for psource field of ident_t structure of
1630       // all ident_t objects. Format is ";file;function;line;column;;".
1631       // Taken from
1632       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1633       DefaultOpenMPPSource =
1634           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1635       DefaultOpenMPPSource =
1636           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1637     }
1638 
1639     llvm::Constant *Data[] = {
1640         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1641         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1642         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1643         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1644     llvm::GlobalValue *DefaultOpenMPLocation =
1645         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1646                            llvm::GlobalValue::PrivateLinkage);
1647     DefaultOpenMPLocation->setUnnamedAddr(
1648         llvm::GlobalValue::UnnamedAddr::Global);
1649 
1650     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1651   }
1652   return Address(Entry, Align);
1653 }
1654 
1655 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1656                                              bool AtCurrentPoint) {
1657   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1658   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1659 
1660   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1661   if (AtCurrentPoint) {
1662     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1663         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1664   } else {
1665     Elem.second.ServiceInsertPt =
1666         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1667     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1668   }
1669 }
1670 
1671 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1672   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1673   if (Elem.second.ServiceInsertPt) {
1674     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1675     Elem.second.ServiceInsertPt = nullptr;
1676     Ptr->eraseFromParent();
1677   }
1678 }
1679 
1680 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1681                                                  SourceLocation Loc,
1682                                                  unsigned Flags) {
1683   Flags |= OMP_IDENT_KMPC;
1684   // If no debug info is generated - return global default location.
1685   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1686       Loc.isInvalid())
1687     return getOrCreateDefaultLocation(Flags).getPointer();
1688 
1689   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1690 
1691   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1692   Address LocValue = Address::invalid();
1693   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1694   if (I != OpenMPLocThreadIDMap.end())
1695     LocValue = Address(I->second.DebugLoc, Align);
1696 
1697   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1698   // GetOpenMPThreadID was called before this routine.
1699   if (!LocValue.isValid()) {
1700     // Generate "ident_t .kmpc_loc.addr;"
1701     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1702     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1703     Elem.second.DebugLoc = AI.getPointer();
1704     LocValue = AI;
1705 
1706     if (!Elem.second.ServiceInsertPt)
1707       setLocThreadIdInsertPt(CGF);
1708     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1709     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1710     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1711                              CGF.getTypeSize(IdentQTy));
1712   }
1713 
1714   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1715   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1716   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1717   LValue PSource =
1718       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1719 
1720   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1721   if (OMPDebugLoc == nullptr) {
1722     SmallString<128> Buffer2;
1723     llvm::raw_svector_ostream OS2(Buffer2);
1724     // Build debug location
1725     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1726     OS2 << ";" << PLoc.getFilename() << ";";
1727     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1728       OS2 << FD->getQualifiedNameAsString();
1729     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1730     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1731     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1732   }
1733   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1734   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1735 
1736   // Our callers always pass this to a runtime function, so for
1737   // convenience, go ahead and return a naked pointer.
1738   return LocValue.getPointer();
1739 }
1740 
1741 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1742                                           SourceLocation Loc) {
1743   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1744 
1745   llvm::Value *ThreadID = nullptr;
1746   // Check whether we've already cached a load of the thread id in this
1747   // function.
1748   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1749   if (I != OpenMPLocThreadIDMap.end()) {
1750     ThreadID = I->second.ThreadID;
1751     if (ThreadID != nullptr)
1752       return ThreadID;
1753   }
1754   // If exceptions are enabled, do not use parameter to avoid possible crash.
1755   if (auto *OMPRegionInfo =
1756           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1757     if (OMPRegionInfo->getThreadIDVariable()) {
1758       // Check if this an outlined function with thread id passed as argument.
1759       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1760       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1761       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1762           !CGF.getLangOpts().CXXExceptions ||
1763           CGF.Builder.GetInsertBlock() == TopBlock ||
1764           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1765           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1766               TopBlock ||
1767           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1768               CGF.Builder.GetInsertBlock()) {
1769         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1770         // If value loaded in entry block, cache it and use it everywhere in
1771         // function.
1772         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1773           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1774           Elem.second.ThreadID = ThreadID;
1775         }
1776         return ThreadID;
1777       }
1778     }
1779   }
1780 
1781   // This is not an outlined function region - need to call __kmpc_int32
1782   // kmpc_global_thread_num(ident_t *loc).
1783   // Generate thread id value and cache this value for use across the
1784   // function.
1785   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1786   if (!Elem.second.ServiceInsertPt)
1787     setLocThreadIdInsertPt(CGF);
1788   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1789   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1790   llvm::CallInst *Call = CGF.Builder.CreateCall(
1791       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1792       emitUpdateLocation(CGF, Loc));
1793   Call->setCallingConv(CGF.getRuntimeCC());
1794   Elem.second.ThreadID = Call;
1795   return Call;
1796 }
1797 
1798 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1799   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1800   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1801     clearLocThreadIdInsertPt(CGF);
1802     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1803   }
1804   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1805     for(auto *D : FunctionUDRMap[CGF.CurFn])
1806       UDRMap.erase(D);
1807     FunctionUDRMap.erase(CGF.CurFn);
1808   }
1809   auto I = FunctionUDMMap.find(CGF.CurFn);
1810   if (I != FunctionUDMMap.end()) {
1811     for(auto *D : I->second)
1812       UDMMap.erase(D);
1813     FunctionUDMMap.erase(I);
1814   }
1815 }
1816 
1817 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1818   return IdentTy->getPointerTo();
1819 }
1820 
1821 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1822   if (!Kmpc_MicroTy) {
1823     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1824     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1825                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1826     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1827   }
1828   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1829 }
1830 
1831 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1832   llvm::FunctionCallee RTLFn = nullptr;
1833   switch (static_cast<OpenMPRTLFunction>(Function)) {
1834   case OMPRTL__kmpc_fork_call: {
1835     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1836     // microtask, ...);
1837     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1838                                 getKmpc_MicroPointerTy()};
1839     auto *FnTy =
1840         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1841     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1842     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1843       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1844         llvm::LLVMContext &Ctx = F->getContext();
1845         llvm::MDBuilder MDB(Ctx);
1846         // Annotate the callback behavior of the __kmpc_fork_call:
1847         //  - The callback callee is argument number 2 (microtask).
1848         //  - The first two arguments of the callback callee are unknown (-1).
1849         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1850         //    callback callee.
1851         F->addMetadata(
1852             llvm::LLVMContext::MD_callback,
1853             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1854                                         2, {-1, -1},
1855                                         /* VarArgsArePassed */ true)}));
1856       }
1857     }
1858     break;
1859   }
1860   case OMPRTL__kmpc_global_thread_num: {
1861     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1862     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1863     auto *FnTy =
1864         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1865     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1866     break;
1867   }
1868   case OMPRTL__kmpc_threadprivate_cached: {
1869     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1870     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1871     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1872                                 CGM.VoidPtrTy, CGM.SizeTy,
1873                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1874     auto *FnTy =
1875         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1877     break;
1878   }
1879   case OMPRTL__kmpc_critical: {
1880     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1881     // kmp_critical_name *crit);
1882     llvm::Type *TypeParams[] = {
1883         getIdentTyPointerTy(), CGM.Int32Ty,
1884         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1885     auto *FnTy =
1886         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1887     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1888     break;
1889   }
1890   case OMPRTL__kmpc_critical_with_hint: {
1891     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1892     // kmp_critical_name *crit, uintptr_t hint);
1893     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1894                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1895                                 CGM.IntPtrTy};
1896     auto *FnTy =
1897         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1898     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1899     break;
1900   }
1901   case OMPRTL__kmpc_threadprivate_register: {
1902     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1903     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1904     // typedef void *(*kmpc_ctor)(void *);
1905     auto *KmpcCtorTy =
1906         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1907                                 /*isVarArg*/ false)->getPointerTo();
1908     // typedef void *(*kmpc_cctor)(void *, void *);
1909     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1910     auto *KmpcCopyCtorTy =
1911         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1912                                 /*isVarArg*/ false)
1913             ->getPointerTo();
1914     // typedef void (*kmpc_dtor)(void *);
1915     auto *KmpcDtorTy =
1916         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1917             ->getPointerTo();
1918     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1919                               KmpcCopyCtorTy, KmpcDtorTy};
1920     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1921                                         /*isVarArg*/ false);
1922     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1923     break;
1924   }
1925   case OMPRTL__kmpc_end_critical: {
1926     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1927     // kmp_critical_name *crit);
1928     llvm::Type *TypeParams[] = {
1929         getIdentTyPointerTy(), CGM.Int32Ty,
1930         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1931     auto *FnTy =
1932         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1933     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1934     break;
1935   }
1936   case OMPRTL__kmpc_cancel_barrier: {
1937     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1938     // global_tid);
1939     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1940     auto *FnTy =
1941         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1942     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1943     break;
1944   }
1945   case OMPRTL__kmpc_barrier: {
1946     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1947     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1948     auto *FnTy =
1949         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1950     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1951     break;
1952   }
1953   case OMPRTL__kmpc_for_static_fini: {
1954     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1955     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1956     auto *FnTy =
1957         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1958     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1959     break;
1960   }
1961   case OMPRTL__kmpc_push_num_threads: {
1962     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1963     // kmp_int32 num_threads)
1964     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1965                                 CGM.Int32Ty};
1966     auto *FnTy =
1967         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1968     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1969     break;
1970   }
1971   case OMPRTL__kmpc_serialized_parallel: {
1972     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1973     // global_tid);
1974     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1975     auto *FnTy =
1976         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1977     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1978     break;
1979   }
1980   case OMPRTL__kmpc_end_serialized_parallel: {
1981     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1982     // global_tid);
1983     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1984     auto *FnTy =
1985         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1986     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1987     break;
1988   }
1989   case OMPRTL__kmpc_flush: {
1990     // Build void __kmpc_flush(ident_t *loc);
1991     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1992     auto *FnTy =
1993         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1994     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1995     break;
1996   }
1997   case OMPRTL__kmpc_master: {
1998     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1999     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2000     auto *FnTy =
2001         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2002     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2003     break;
2004   }
2005   case OMPRTL__kmpc_end_master: {
2006     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2007     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2008     auto *FnTy =
2009         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2010     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2011     break;
2012   }
2013   case OMPRTL__kmpc_omp_taskyield: {
2014     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2015     // int end_part);
2016     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2017     auto *FnTy =
2018         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2019     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2020     break;
2021   }
2022   case OMPRTL__kmpc_single: {
2023     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2024     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2025     auto *FnTy =
2026         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2027     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2028     break;
2029   }
2030   case OMPRTL__kmpc_end_single: {
2031     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2032     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2033     auto *FnTy =
2034         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2035     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2036     break;
2037   }
2038   case OMPRTL__kmpc_omp_task_alloc: {
2039     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2040     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2041     // kmp_routine_entry_t *task_entry);
2042     assert(KmpRoutineEntryPtrTy != nullptr &&
2043            "Type kmp_routine_entry_t must be created.");
2044     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2045                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2046     // Return void * and then cast to particular kmp_task_t type.
2047     auto *FnTy =
2048         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2050     break;
2051   }
2052   case OMPRTL__kmpc_omp_target_task_alloc: {
2053     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2054     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2055     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2056     assert(KmpRoutineEntryPtrTy != nullptr &&
2057            "Type kmp_routine_entry_t must be created.");
2058     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2059                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2060                                 CGM.Int64Ty};
2061     // Return void * and then cast to particular kmp_task_t type.
2062     auto *FnTy =
2063         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2064     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2065     break;
2066   }
2067   case OMPRTL__kmpc_omp_task: {
2068     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2069     // *new_task);
2070     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2071                                 CGM.VoidPtrTy};
2072     auto *FnTy =
2073         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2074     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2075     break;
2076   }
2077   case OMPRTL__kmpc_copyprivate: {
2078     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2079     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2080     // kmp_int32 didit);
2081     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2082     auto *CpyFnTy =
2083         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2084     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2085                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2086                                 CGM.Int32Ty};
2087     auto *FnTy =
2088         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2089     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2090     break;
2091   }
2092   case OMPRTL__kmpc_reduce: {
2093     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2094     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2095     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2096     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2097     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2098                                                /*isVarArg=*/false);
2099     llvm::Type *TypeParams[] = {
2100         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2101         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2102         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2103     auto *FnTy =
2104         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2105     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2106     break;
2107   }
2108   case OMPRTL__kmpc_reduce_nowait: {
2109     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2110     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2111     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2112     // *lck);
2113     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2114     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2115                                                /*isVarArg=*/false);
2116     llvm::Type *TypeParams[] = {
2117         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2118         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2119         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2120     auto *FnTy =
2121         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2122     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2123     break;
2124   }
2125   case OMPRTL__kmpc_end_reduce: {
2126     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2127     // kmp_critical_name *lck);
2128     llvm::Type *TypeParams[] = {
2129         getIdentTyPointerTy(), CGM.Int32Ty,
2130         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2131     auto *FnTy =
2132         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2133     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2134     break;
2135   }
2136   case OMPRTL__kmpc_end_reduce_nowait: {
2137     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2138     // kmp_critical_name *lck);
2139     llvm::Type *TypeParams[] = {
2140         getIdentTyPointerTy(), CGM.Int32Ty,
2141         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2142     auto *FnTy =
2143         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2144     RTLFn =
2145         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2146     break;
2147   }
2148   case OMPRTL__kmpc_omp_task_begin_if0: {
2149     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2150     // *new_task);
2151     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2152                                 CGM.VoidPtrTy};
2153     auto *FnTy =
2154         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2155     RTLFn =
2156         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2157     break;
2158   }
2159   case OMPRTL__kmpc_omp_task_complete_if0: {
2160     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2161     // *new_task);
2162     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2163                                 CGM.VoidPtrTy};
2164     auto *FnTy =
2165         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2166     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2167                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2168     break;
2169   }
2170   case OMPRTL__kmpc_ordered: {
2171     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2172     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2173     auto *FnTy =
2174         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2175     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2176     break;
2177   }
2178   case OMPRTL__kmpc_end_ordered: {
2179     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2180     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2181     auto *FnTy =
2182         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2183     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2184     break;
2185   }
2186   case OMPRTL__kmpc_omp_taskwait: {
2187     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2188     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2189     auto *FnTy =
2190         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2191     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2192     break;
2193   }
2194   case OMPRTL__kmpc_taskgroup: {
2195     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2196     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2197     auto *FnTy =
2198         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2199     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2200     break;
2201   }
2202   case OMPRTL__kmpc_end_taskgroup: {
2203     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2204     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2205     auto *FnTy =
2206         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2207     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2208     break;
2209   }
2210   case OMPRTL__kmpc_push_proc_bind: {
2211     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2212     // int proc_bind)
2213     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2214     auto *FnTy =
2215         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2216     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2217     break;
2218   }
2219   case OMPRTL__kmpc_omp_task_with_deps: {
2220     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2221     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2222     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2223     llvm::Type *TypeParams[] = {
2224         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2225         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2226     auto *FnTy =
2227         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2228     RTLFn =
2229         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2230     break;
2231   }
2232   case OMPRTL__kmpc_omp_wait_deps: {
2233     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2234     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2235     // kmp_depend_info_t *noalias_dep_list);
2236     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2237                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2238                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2239     auto *FnTy =
2240         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2241     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2242     break;
2243   }
2244   case OMPRTL__kmpc_cancellationpoint: {
2245     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2246     // global_tid, kmp_int32 cncl_kind)
2247     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2248     auto *FnTy =
2249         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2250     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2251     break;
2252   }
2253   case OMPRTL__kmpc_cancel: {
2254     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2255     // kmp_int32 cncl_kind)
2256     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2257     auto *FnTy =
2258         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2259     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2260     break;
2261   }
2262   case OMPRTL__kmpc_push_num_teams: {
2263     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2264     // kmp_int32 num_teams, kmp_int32 num_threads)
2265     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2266         CGM.Int32Ty};
2267     auto *FnTy =
2268         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2269     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2270     break;
2271   }
2272   case OMPRTL__kmpc_fork_teams: {
2273     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2274     // microtask, ...);
2275     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2276                                 getKmpc_MicroPointerTy()};
2277     auto *FnTy =
2278         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2279     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2280     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2281       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2282         llvm::LLVMContext &Ctx = F->getContext();
2283         llvm::MDBuilder MDB(Ctx);
2284         // Annotate the callback behavior of the __kmpc_fork_teams:
2285         //  - The callback callee is argument number 2 (microtask).
2286         //  - The first two arguments of the callback callee are unknown (-1).
2287         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2288         //    callback callee.
2289         F->addMetadata(
2290             llvm::LLVMContext::MD_callback,
2291             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2292                                         2, {-1, -1},
2293                                         /* VarArgsArePassed */ true)}));
2294       }
2295     }
2296     break;
2297   }
2298   case OMPRTL__kmpc_taskloop: {
2299     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2300     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2301     // sched, kmp_uint64 grainsize, void *task_dup);
2302     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2303                                 CGM.IntTy,
2304                                 CGM.VoidPtrTy,
2305                                 CGM.IntTy,
2306                                 CGM.Int64Ty->getPointerTo(),
2307                                 CGM.Int64Ty->getPointerTo(),
2308                                 CGM.Int64Ty,
2309                                 CGM.IntTy,
2310                                 CGM.IntTy,
2311                                 CGM.Int64Ty,
2312                                 CGM.VoidPtrTy};
2313     auto *FnTy =
2314         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2315     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2316     break;
2317   }
2318   case OMPRTL__kmpc_doacross_init: {
2319     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2320     // num_dims, struct kmp_dim *dims);
2321     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2322                                 CGM.Int32Ty,
2323                                 CGM.Int32Ty,
2324                                 CGM.VoidPtrTy};
2325     auto *FnTy =
2326         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2327     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2328     break;
2329   }
2330   case OMPRTL__kmpc_doacross_fini: {
2331     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2332     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2333     auto *FnTy =
2334         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2335     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2336     break;
2337   }
2338   case OMPRTL__kmpc_doacross_post: {
2339     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2340     // *vec);
2341     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2342                                 CGM.Int64Ty->getPointerTo()};
2343     auto *FnTy =
2344         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2345     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2346     break;
2347   }
2348   case OMPRTL__kmpc_doacross_wait: {
2349     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2350     // *vec);
2351     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2352                                 CGM.Int64Ty->getPointerTo()};
2353     auto *FnTy =
2354         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2355     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2356     break;
2357   }
2358   case OMPRTL__kmpc_task_reduction_init: {
2359     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2360     // *data);
2361     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2362     auto *FnTy =
2363         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2364     RTLFn =
2365         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2366     break;
2367   }
2368   case OMPRTL__kmpc_task_reduction_get_th_data: {
2369     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2370     // *d);
2371     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2372     auto *FnTy =
2373         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2374     RTLFn = CGM.CreateRuntimeFunction(
2375         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2376     break;
2377   }
2378   case OMPRTL__kmpc_alloc: {
2379     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2380     // al); omp_allocator_handle_t type is void *.
2381     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2382     auto *FnTy =
2383         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2384     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2385     break;
2386   }
2387   case OMPRTL__kmpc_free: {
2388     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2389     // al); omp_allocator_handle_t type is void *.
2390     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2391     auto *FnTy =
2392         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2393     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2394     break;
2395   }
2396   case OMPRTL__kmpc_push_target_tripcount: {
2397     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2398     // size);
2399     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2400     llvm::FunctionType *FnTy =
2401         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2402     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2403     break;
2404   }
2405   case OMPRTL__tgt_target: {
2406     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2407     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2408     // *arg_types);
2409     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2410                                 CGM.VoidPtrTy,
2411                                 CGM.Int32Ty,
2412                                 CGM.VoidPtrPtrTy,
2413                                 CGM.VoidPtrPtrTy,
2414                                 CGM.Int64Ty->getPointerTo(),
2415                                 CGM.Int64Ty->getPointerTo()};
2416     auto *FnTy =
2417         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2418     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2419     break;
2420   }
2421   case OMPRTL__tgt_target_nowait: {
2422     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2423     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2424     // int64_t *arg_types);
2425     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2426                                 CGM.VoidPtrTy,
2427                                 CGM.Int32Ty,
2428                                 CGM.VoidPtrPtrTy,
2429                                 CGM.VoidPtrPtrTy,
2430                                 CGM.Int64Ty->getPointerTo(),
2431                                 CGM.Int64Ty->getPointerTo()};
2432     auto *FnTy =
2433         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2434     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2435     break;
2436   }
2437   case OMPRTL__tgt_target_teams: {
2438     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2439     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2440     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2441     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2442                                 CGM.VoidPtrTy,
2443                                 CGM.Int32Ty,
2444                                 CGM.VoidPtrPtrTy,
2445                                 CGM.VoidPtrPtrTy,
2446                                 CGM.Int64Ty->getPointerTo(),
2447                                 CGM.Int64Ty->getPointerTo(),
2448                                 CGM.Int32Ty,
2449                                 CGM.Int32Ty};
2450     auto *FnTy =
2451         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2452     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2453     break;
2454   }
2455   case OMPRTL__tgt_target_teams_nowait: {
2456     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2457     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2458     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2459     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2460                                 CGM.VoidPtrTy,
2461                                 CGM.Int32Ty,
2462                                 CGM.VoidPtrPtrTy,
2463                                 CGM.VoidPtrPtrTy,
2464                                 CGM.Int64Ty->getPointerTo(),
2465                                 CGM.Int64Ty->getPointerTo(),
2466                                 CGM.Int32Ty,
2467                                 CGM.Int32Ty};
2468     auto *FnTy =
2469         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2470     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2471     break;
2472   }
2473   case OMPRTL__tgt_register_requires: {
2474     // Build void __tgt_register_requires(int64_t flags);
2475     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2476     auto *FnTy =
2477         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2478     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2479     break;
2480   }
2481   case OMPRTL__tgt_register_lib: {
2482     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2483     QualType ParamTy =
2484         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2485     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2486     auto *FnTy =
2487         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2488     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2489     break;
2490   }
2491   case OMPRTL__tgt_unregister_lib: {
2492     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2493     QualType ParamTy =
2494         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2495     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2496     auto *FnTy =
2497         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2498     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2499     break;
2500   }
2501   case OMPRTL__tgt_target_data_begin: {
2502     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2503     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2504     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2505                                 CGM.Int32Ty,
2506                                 CGM.VoidPtrPtrTy,
2507                                 CGM.VoidPtrPtrTy,
2508                                 CGM.Int64Ty->getPointerTo(),
2509                                 CGM.Int64Ty->getPointerTo()};
2510     auto *FnTy =
2511         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2512     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2513     break;
2514   }
2515   case OMPRTL__tgt_target_data_begin_nowait: {
2516     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2517     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2518     // *arg_types);
2519     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2520                                 CGM.Int32Ty,
2521                                 CGM.VoidPtrPtrTy,
2522                                 CGM.VoidPtrPtrTy,
2523                                 CGM.Int64Ty->getPointerTo(),
2524                                 CGM.Int64Ty->getPointerTo()};
2525     auto *FnTy =
2526         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2527     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2528     break;
2529   }
2530   case OMPRTL__tgt_target_data_end: {
2531     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2532     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2533     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2534                                 CGM.Int32Ty,
2535                                 CGM.VoidPtrPtrTy,
2536                                 CGM.VoidPtrPtrTy,
2537                                 CGM.Int64Ty->getPointerTo(),
2538                                 CGM.Int64Ty->getPointerTo()};
2539     auto *FnTy =
2540         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2541     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2542     break;
2543   }
2544   case OMPRTL__tgt_target_data_end_nowait: {
2545     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2546     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2547     // *arg_types);
2548     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2549                                 CGM.Int32Ty,
2550                                 CGM.VoidPtrPtrTy,
2551                                 CGM.VoidPtrPtrTy,
2552                                 CGM.Int64Ty->getPointerTo(),
2553                                 CGM.Int64Ty->getPointerTo()};
2554     auto *FnTy =
2555         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2556     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2557     break;
2558   }
2559   case OMPRTL__tgt_target_data_update: {
2560     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2561     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2562     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2563                                 CGM.Int32Ty,
2564                                 CGM.VoidPtrPtrTy,
2565                                 CGM.VoidPtrPtrTy,
2566                                 CGM.Int64Ty->getPointerTo(),
2567                                 CGM.Int64Ty->getPointerTo()};
2568     auto *FnTy =
2569         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2570     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2571     break;
2572   }
2573   case OMPRTL__tgt_target_data_update_nowait: {
2574     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2575     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2576     // *arg_types);
2577     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2578                                 CGM.Int32Ty,
2579                                 CGM.VoidPtrPtrTy,
2580                                 CGM.VoidPtrPtrTy,
2581                                 CGM.Int64Ty->getPointerTo(),
2582                                 CGM.Int64Ty->getPointerTo()};
2583     auto *FnTy =
2584         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2585     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2586     break;
2587   }
2588   case OMPRTL__tgt_mapper_num_components: {
2589     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2590     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2591     auto *FnTy =
2592         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2593     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2594     break;
2595   }
2596   case OMPRTL__tgt_push_mapper_component: {
2597     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2598     // *base, void *begin, int64_t size, int64_t type);
2599     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2600                                 CGM.Int64Ty, CGM.Int64Ty};
2601     auto *FnTy =
2602         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2603     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2604     break;
2605   }
2606   }
2607   assert(RTLFn && "Unable to find OpenMP runtime function");
2608   return RTLFn;
2609 }
2610 
2611 llvm::FunctionCallee
2612 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2613   assert((IVSize == 32 || IVSize == 64) &&
2614          "IV size is not compatible with the omp runtime");
2615   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2616                                             : "__kmpc_for_static_init_4u")
2617                                 : (IVSigned ? "__kmpc_for_static_init_8"
2618                                             : "__kmpc_for_static_init_8u");
2619   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2620   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2621   llvm::Type *TypeParams[] = {
2622     getIdentTyPointerTy(),                     // loc
2623     CGM.Int32Ty,                               // tid
2624     CGM.Int32Ty,                               // schedtype
2625     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2626     PtrTy,                                     // p_lower
2627     PtrTy,                                     // p_upper
2628     PtrTy,                                     // p_stride
2629     ITy,                                       // incr
2630     ITy                                        // chunk
2631   };
2632   auto *FnTy =
2633       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2634   return CGM.CreateRuntimeFunction(FnTy, Name);
2635 }
2636 
2637 llvm::FunctionCallee
2638 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2639   assert((IVSize == 32 || IVSize == 64) &&
2640          "IV size is not compatible with the omp runtime");
2641   StringRef Name =
2642       IVSize == 32
2643           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2644           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2645   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2646   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2647                                CGM.Int32Ty,           // tid
2648                                CGM.Int32Ty,           // schedtype
2649                                ITy,                   // lower
2650                                ITy,                   // upper
2651                                ITy,                   // stride
2652                                ITy                    // chunk
2653   };
2654   auto *FnTy =
2655       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2656   return CGM.CreateRuntimeFunction(FnTy, Name);
2657 }
2658 
2659 llvm::FunctionCallee
2660 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2661   assert((IVSize == 32 || IVSize == 64) &&
2662          "IV size is not compatible with the omp runtime");
2663   StringRef Name =
2664       IVSize == 32
2665           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2666           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2667   llvm::Type *TypeParams[] = {
2668       getIdentTyPointerTy(), // loc
2669       CGM.Int32Ty,           // tid
2670   };
2671   auto *FnTy =
2672       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2673   return CGM.CreateRuntimeFunction(FnTy, Name);
2674 }
2675 
2676 llvm::FunctionCallee
2677 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2678   assert((IVSize == 32 || IVSize == 64) &&
2679          "IV size is not compatible with the omp runtime");
2680   StringRef Name =
2681       IVSize == 32
2682           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2683           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2684   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2685   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2686   llvm::Type *TypeParams[] = {
2687     getIdentTyPointerTy(),                     // loc
2688     CGM.Int32Ty,                               // tid
2689     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2690     PtrTy,                                     // p_lower
2691     PtrTy,                                     // p_upper
2692     PtrTy                                      // p_stride
2693   };
2694   auto *FnTy =
2695       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2696   return CGM.CreateRuntimeFunction(FnTy, Name);
2697 }
2698 
2699 /// Obtain information that uniquely identifies a target entry. This
2700 /// consists of the file and device IDs as well as line number associated with
2701 /// the relevant entry source location.
2702 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2703                                      unsigned &DeviceID, unsigned &FileID,
2704                                      unsigned &LineNum) {
2705   SourceManager &SM = C.getSourceManager();
2706 
2707   // The loc should be always valid and have a file ID (the user cannot use
2708   // #pragma directives in macros)
2709 
2710   assert(Loc.isValid() && "Source location is expected to be always valid.");
2711 
2712   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2713   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2714 
2715   llvm::sys::fs::UniqueID ID;
2716   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2717     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2718         << PLoc.getFilename() << EC.message();
2719 
2720   DeviceID = ID.getDevice();
2721   FileID = ID.getFile();
2722   LineNum = PLoc.getLine();
2723 }
2724 
2725 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2726   if (CGM.getLangOpts().OpenMPSimd)
2727     return Address::invalid();
2728   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2729       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2730   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2731               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2732                HasRequiresUnifiedSharedMemory))) {
2733     SmallString<64> PtrName;
2734     {
2735       llvm::raw_svector_ostream OS(PtrName);
2736       OS << CGM.getMangledName(GlobalDecl(VD));
2737       if (!VD->isExternallyVisible()) {
2738         unsigned DeviceID, FileID, Line;
2739         getTargetEntryUniqueInfo(CGM.getContext(),
2740                                  VD->getCanonicalDecl()->getBeginLoc(),
2741                                  DeviceID, FileID, Line);
2742         OS << llvm::format("_%x", FileID);
2743       }
2744       OS << "_decl_tgt_ref_ptr";
2745     }
2746     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2747     if (!Ptr) {
2748       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2749       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2750                                         PtrName);
2751 
2752       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2753       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2754 
2755       if (!CGM.getLangOpts().OpenMPIsDevice)
2756         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2757       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2758     }
2759     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2760   }
2761   return Address::invalid();
2762 }
2763 
2764 llvm::Constant *
2765 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2766   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2767          !CGM.getContext().getTargetInfo().isTLSSupported());
2768   // Lookup the entry, lazily creating it if necessary.
2769   std::string Suffix = getName({"cache", ""});
2770   return getOrCreateInternalVariable(
2771       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2772 }
2773 
2774 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2775                                                 const VarDecl *VD,
2776                                                 Address VDAddr,
2777                                                 SourceLocation Loc) {
2778   if (CGM.getLangOpts().OpenMPUseTLS &&
2779       CGM.getContext().getTargetInfo().isTLSSupported())
2780     return VDAddr;
2781 
2782   llvm::Type *VarTy = VDAddr.getElementType();
2783   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2784                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2785                                                        CGM.Int8PtrTy),
2786                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2787                          getOrCreateThreadPrivateCache(VD)};
2788   return Address(CGF.EmitRuntimeCall(
2789       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2790                  VDAddr.getAlignment());
2791 }
2792 
2793 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2794     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2795     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2796   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2797   // library.
2798   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2799   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2800                       OMPLoc);
2801   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2802   // to register constructor/destructor for variable.
2803   llvm::Value *Args[] = {
2804       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2805       Ctor, CopyCtor, Dtor};
2806   CGF.EmitRuntimeCall(
2807       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2808 }
2809 
2810 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2811     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2812     bool PerformInit, CodeGenFunction *CGF) {
2813   if (CGM.getLangOpts().OpenMPUseTLS &&
2814       CGM.getContext().getTargetInfo().isTLSSupported())
2815     return nullptr;
2816 
2817   VD = VD->getDefinition(CGM.getContext());
2818   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2819     QualType ASTTy = VD->getType();
2820 
2821     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2822     const Expr *Init = VD->getAnyInitializer();
2823     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2824       // Generate function that re-emits the declaration's initializer into the
2825       // threadprivate copy of the variable VD
2826       CodeGenFunction CtorCGF(CGM);
2827       FunctionArgList Args;
2828       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2829                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2830                             ImplicitParamDecl::Other);
2831       Args.push_back(&Dst);
2832 
2833       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2834           CGM.getContext().VoidPtrTy, Args);
2835       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2836       std::string Name = getName({"__kmpc_global_ctor_", ""});
2837       llvm::Function *Fn =
2838           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2839       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2840                             Args, Loc, Loc);
2841       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2842           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2843           CGM.getContext().VoidPtrTy, Dst.getLocation());
2844       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2845       Arg = CtorCGF.Builder.CreateElementBitCast(
2846           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2847       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2848                                /*IsInitializer=*/true);
2849       ArgVal = CtorCGF.EmitLoadOfScalar(
2850           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2851           CGM.getContext().VoidPtrTy, Dst.getLocation());
2852       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2853       CtorCGF.FinishFunction();
2854       Ctor = Fn;
2855     }
2856     if (VD->getType().isDestructedType() != QualType::DK_none) {
2857       // Generate function that emits destructor call for the threadprivate copy
2858       // of the variable VD
2859       CodeGenFunction DtorCGF(CGM);
2860       FunctionArgList Args;
2861       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2862                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2863                             ImplicitParamDecl::Other);
2864       Args.push_back(&Dst);
2865 
2866       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2867           CGM.getContext().VoidTy, Args);
2868       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2869       std::string Name = getName({"__kmpc_global_dtor_", ""});
2870       llvm::Function *Fn =
2871           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2872       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2873       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2874                             Loc, Loc);
2875       // Create a scope with an artificial location for the body of this function.
2876       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2877       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2878           DtorCGF.GetAddrOfLocalVar(&Dst),
2879           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2880       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2881                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2882                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2883       DtorCGF.FinishFunction();
2884       Dtor = Fn;
2885     }
2886     // Do not emit init function if it is not required.
2887     if (!Ctor && !Dtor)
2888       return nullptr;
2889 
2890     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2891     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2892                                                /*isVarArg=*/false)
2893                            ->getPointerTo();
2894     // Copying constructor for the threadprivate variable.
2895     // Must be NULL - reserved by runtime, but currently it requires that this
2896     // parameter is always NULL. Otherwise it fires assertion.
2897     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2898     if (Ctor == nullptr) {
2899       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2900                                              /*isVarArg=*/false)
2901                          ->getPointerTo();
2902       Ctor = llvm::Constant::getNullValue(CtorTy);
2903     }
2904     if (Dtor == nullptr) {
2905       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2906                                              /*isVarArg=*/false)
2907                          ->getPointerTo();
2908       Dtor = llvm::Constant::getNullValue(DtorTy);
2909     }
2910     if (!CGF) {
2911       auto *InitFunctionTy =
2912           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2913       std::string Name = getName({"__omp_threadprivate_init_", ""});
2914       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2915           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2916       CodeGenFunction InitCGF(CGM);
2917       FunctionArgList ArgList;
2918       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2919                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2920                             Loc, Loc);
2921       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2922       InitCGF.FinishFunction();
2923       return InitFunction;
2924     }
2925     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2926   }
2927   return nullptr;
2928 }
2929 
2930 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2931                                                      llvm::GlobalVariable *Addr,
2932                                                      bool PerformInit) {
2933   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2934       !CGM.getLangOpts().OpenMPIsDevice)
2935     return false;
2936   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2937       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2938   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2939       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2940        HasRequiresUnifiedSharedMemory))
2941     return CGM.getLangOpts().OpenMPIsDevice;
2942   VD = VD->getDefinition(CGM.getContext());
2943   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2944     return CGM.getLangOpts().OpenMPIsDevice;
2945 
2946   QualType ASTTy = VD->getType();
2947 
2948   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2949   // Produce the unique prefix to identify the new target regions. We use
2950   // the source location of the variable declaration which we know to not
2951   // conflict with any target region.
2952   unsigned DeviceID;
2953   unsigned FileID;
2954   unsigned Line;
2955   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2956   SmallString<128> Buffer, Out;
2957   {
2958     llvm::raw_svector_ostream OS(Buffer);
2959     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2960        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2961   }
2962 
2963   const Expr *Init = VD->getAnyInitializer();
2964   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2965     llvm::Constant *Ctor;
2966     llvm::Constant *ID;
2967     if (CGM.getLangOpts().OpenMPIsDevice) {
2968       // Generate function that re-emits the declaration's initializer into
2969       // the threadprivate copy of the variable VD
2970       CodeGenFunction CtorCGF(CGM);
2971 
2972       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2973       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2974       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2975           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2976       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2977       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2978                             FunctionArgList(), Loc, Loc);
2979       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2980       CtorCGF.EmitAnyExprToMem(Init,
2981                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2982                                Init->getType().getQualifiers(),
2983                                /*IsInitializer=*/true);
2984       CtorCGF.FinishFunction();
2985       Ctor = Fn;
2986       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2987       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2988     } else {
2989       Ctor = new llvm::GlobalVariable(
2990           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2991           llvm::GlobalValue::PrivateLinkage,
2992           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2993       ID = Ctor;
2994     }
2995 
2996     // Register the information for the entry associated with the constructor.
2997     Out.clear();
2998     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2999         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
3000         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
3001   }
3002   if (VD->getType().isDestructedType() != QualType::DK_none) {
3003     llvm::Constant *Dtor;
3004     llvm::Constant *ID;
3005     if (CGM.getLangOpts().OpenMPIsDevice) {
3006       // Generate function that emits destructor call for the threadprivate
3007       // copy of the variable VD
3008       CodeGenFunction DtorCGF(CGM);
3009 
3010       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
3011       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3012       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
3013           FTy, Twine(Buffer, "_dtor"), FI, Loc);
3014       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
3015       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
3016                             FunctionArgList(), Loc, Loc);
3017       // Create a scope with an artificial location for the body of this
3018       // function.
3019       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
3020       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
3021                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3022                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3023       DtorCGF.FinishFunction();
3024       Dtor = Fn;
3025       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3026       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3027     } else {
3028       Dtor = new llvm::GlobalVariable(
3029           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3030           llvm::GlobalValue::PrivateLinkage,
3031           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3032       ID = Dtor;
3033     }
3034     // Register the information for the entry associated with the destructor.
3035     Out.clear();
3036     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3037         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3038         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3039   }
3040   return CGM.getLangOpts().OpenMPIsDevice;
3041 }
3042 
3043 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3044                                                           QualType VarType,
3045                                                           StringRef Name) {
3046   std::string Suffix = getName({"artificial", ""});
3047   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3048   llvm::Value *GAddr =
3049       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3050   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3051       CGM.getTarget().isTLSSupported()) {
3052     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3053     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3054   }
3055   std::string CacheSuffix = getName({"cache", ""});
3056   llvm::Value *Args[] = {
3057       emitUpdateLocation(CGF, SourceLocation()),
3058       getThreadID(CGF, SourceLocation()),
3059       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3060       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3061                                 /*isSigned=*/false),
3062       getOrCreateInternalVariable(
3063           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3064   return Address(
3065       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3066           CGF.EmitRuntimeCall(
3067               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3068           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3069       CGM.getContext().getTypeAlignInChars(VarType));
3070 }
3071 
3072 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3073                                    const RegionCodeGenTy &ThenGen,
3074                                    const RegionCodeGenTy &ElseGen) {
3075   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3076 
3077   // If the condition constant folds and can be elided, try to avoid emitting
3078   // the condition and the dead arm of the if/else.
3079   bool CondConstant;
3080   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3081     if (CondConstant)
3082       ThenGen(CGF);
3083     else
3084       ElseGen(CGF);
3085     return;
3086   }
3087 
3088   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3089   // emit the conditional branch.
3090   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3091   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3092   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3093   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3094 
3095   // Emit the 'then' code.
3096   CGF.EmitBlock(ThenBlock);
3097   ThenGen(CGF);
3098   CGF.EmitBranch(ContBlock);
3099   // Emit the 'else' code if present.
3100   // There is no need to emit line number for unconditional branch.
3101   (void)ApplyDebugLocation::CreateEmpty(CGF);
3102   CGF.EmitBlock(ElseBlock);
3103   ElseGen(CGF);
3104   // There is no need to emit line number for unconditional branch.
3105   (void)ApplyDebugLocation::CreateEmpty(CGF);
3106   CGF.EmitBranch(ContBlock);
3107   // Emit the continuation block for code after the if.
3108   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3109 }
3110 
3111 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3112                                        llvm::Function *OutlinedFn,
3113                                        ArrayRef<llvm::Value *> CapturedVars,
3114                                        const Expr *IfCond) {
3115   if (!CGF.HaveInsertPoint())
3116     return;
3117   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3118   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3119                                                      PrePostActionTy &) {
3120     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3121     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3122     llvm::Value *Args[] = {
3123         RTLoc,
3124         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3125         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3126     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3127     RealArgs.append(std::begin(Args), std::end(Args));
3128     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3129 
3130     llvm::FunctionCallee RTLFn =
3131         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3132     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3133   };
3134   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3135                                                           PrePostActionTy &) {
3136     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3137     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3138     // Build calls:
3139     // __kmpc_serialized_parallel(&Loc, GTid);
3140     llvm::Value *Args[] = {RTLoc, ThreadID};
3141     CGF.EmitRuntimeCall(
3142         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3143 
3144     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3145     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3146     Address ZeroAddrBound =
3147         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3148                                          /*Name=*/".bound.zero.addr");
3149     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3150     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3151     // ThreadId for serialized parallels is 0.
3152     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3153     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3154     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3155     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3156 
3157     // __kmpc_end_serialized_parallel(&Loc, GTid);
3158     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3159     CGF.EmitRuntimeCall(
3160         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3161         EndArgs);
3162   };
3163   if (IfCond) {
3164     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3165   } else {
3166     RegionCodeGenTy ThenRCG(ThenGen);
3167     ThenRCG(CGF);
3168   }
3169 }
3170 
3171 // If we're inside an (outlined) parallel region, use the region info's
3172 // thread-ID variable (it is passed in a first argument of the outlined function
3173 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3174 // regular serial code region, get thread ID by calling kmp_int32
3175 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3176 // return the address of that temp.
3177 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3178                                              SourceLocation Loc) {
3179   if (auto *OMPRegionInfo =
3180           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3181     if (OMPRegionInfo->getThreadIDVariable())
3182       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3183 
3184   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3185   QualType Int32Ty =
3186       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3187   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3188   CGF.EmitStoreOfScalar(ThreadID,
3189                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3190 
3191   return ThreadIDTemp;
3192 }
3193 
3194 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3195     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3196   SmallString<256> Buffer;
3197   llvm::raw_svector_ostream Out(Buffer);
3198   Out << Name;
3199   StringRef RuntimeName = Out.str();
3200   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3201   if (Elem.second) {
3202     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3203            "OMP internal variable has different type than requested");
3204     return &*Elem.second;
3205   }
3206 
3207   return Elem.second = new llvm::GlobalVariable(
3208              CGM.getModule(), Ty, /*IsConstant*/ false,
3209              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3210              Elem.first(), /*InsertBefore=*/nullptr,
3211              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3212 }
3213 
3214 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3215   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3216   std::string Name = getName({Prefix, "var"});
3217   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3218 }
3219 
3220 namespace {
3221 /// Common pre(post)-action for different OpenMP constructs.
3222 class CommonActionTy final : public PrePostActionTy {
3223   llvm::FunctionCallee EnterCallee;
3224   ArrayRef<llvm::Value *> EnterArgs;
3225   llvm::FunctionCallee ExitCallee;
3226   ArrayRef<llvm::Value *> ExitArgs;
3227   bool Conditional;
3228   llvm::BasicBlock *ContBlock = nullptr;
3229 
3230 public:
3231   CommonActionTy(llvm::FunctionCallee EnterCallee,
3232                  ArrayRef<llvm::Value *> EnterArgs,
3233                  llvm::FunctionCallee ExitCallee,
3234                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3235       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3236         ExitArgs(ExitArgs), Conditional(Conditional) {}
3237   void Enter(CodeGenFunction &CGF) override {
3238     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3239     if (Conditional) {
3240       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3241       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3242       ContBlock = CGF.createBasicBlock("omp_if.end");
3243       // Generate the branch (If-stmt)
3244       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3245       CGF.EmitBlock(ThenBlock);
3246     }
3247   }
3248   void Done(CodeGenFunction &CGF) {
3249     // Emit the rest of blocks/branches
3250     CGF.EmitBranch(ContBlock);
3251     CGF.EmitBlock(ContBlock, true);
3252   }
3253   void Exit(CodeGenFunction &CGF) override {
3254     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3255   }
3256 };
3257 } // anonymous namespace
3258 
3259 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3260                                          StringRef CriticalName,
3261                                          const RegionCodeGenTy &CriticalOpGen,
3262                                          SourceLocation Loc, const Expr *Hint) {
3263   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3264   // CriticalOpGen();
3265   // __kmpc_end_critical(ident_t *, gtid, Lock);
3266   // Prepare arguments and build a call to __kmpc_critical
3267   if (!CGF.HaveInsertPoint())
3268     return;
3269   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3270                          getCriticalRegionLock(CriticalName)};
3271   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3272                                                 std::end(Args));
3273   if (Hint) {
3274     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3275         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3276   }
3277   CommonActionTy Action(
3278       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3279                                  : OMPRTL__kmpc_critical),
3280       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3281   CriticalOpGen.setAction(Action);
3282   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3283 }
3284 
3285 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3286                                        const RegionCodeGenTy &MasterOpGen,
3287                                        SourceLocation Loc) {
3288   if (!CGF.HaveInsertPoint())
3289     return;
3290   // if(__kmpc_master(ident_t *, gtid)) {
3291   //   MasterOpGen();
3292   //   __kmpc_end_master(ident_t *, gtid);
3293   // }
3294   // Prepare arguments and build a call to __kmpc_master
3295   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3296   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3297                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3298                         /*Conditional=*/true);
3299   MasterOpGen.setAction(Action);
3300   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3301   Action.Done(CGF);
3302 }
3303 
3304 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3305                                         SourceLocation Loc) {
3306   if (!CGF.HaveInsertPoint())
3307     return;
3308   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3309   llvm::Value *Args[] = {
3310       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3311       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3312   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3313   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3314     Region->emitUntiedSwitch(CGF);
3315 }
3316 
3317 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3318                                           const RegionCodeGenTy &TaskgroupOpGen,
3319                                           SourceLocation Loc) {
3320   if (!CGF.HaveInsertPoint())
3321     return;
3322   // __kmpc_taskgroup(ident_t *, gtid);
3323   // TaskgroupOpGen();
3324   // __kmpc_end_taskgroup(ident_t *, gtid);
3325   // Prepare arguments and build a call to __kmpc_taskgroup
3326   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3327   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3328                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3329                         Args);
3330   TaskgroupOpGen.setAction(Action);
3331   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3332 }
3333 
3334 /// Given an array of pointers to variables, project the address of a
3335 /// given variable.
3336 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3337                                       unsigned Index, const VarDecl *Var) {
3338   // Pull out the pointer to the variable.
3339   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3340   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3341 
3342   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3343   Addr = CGF.Builder.CreateElementBitCast(
3344       Addr, CGF.ConvertTypeForMem(Var->getType()));
3345   return Addr;
3346 }
3347 
3348 static llvm::Value *emitCopyprivateCopyFunction(
3349     CodeGenModule &CGM, llvm::Type *ArgsType,
3350     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3351     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3352     SourceLocation Loc) {
3353   ASTContext &C = CGM.getContext();
3354   // void copy_func(void *LHSArg, void *RHSArg);
3355   FunctionArgList Args;
3356   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3357                            ImplicitParamDecl::Other);
3358   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3359                            ImplicitParamDecl::Other);
3360   Args.push_back(&LHSArg);
3361   Args.push_back(&RHSArg);
3362   const auto &CGFI =
3363       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3364   std::string Name =
3365       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3366   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3367                                     llvm::GlobalValue::InternalLinkage, Name,
3368                                     &CGM.getModule());
3369   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3370   Fn->setDoesNotRecurse();
3371   CodeGenFunction CGF(CGM);
3372   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3373   // Dest = (void*[n])(LHSArg);
3374   // Src = (void*[n])(RHSArg);
3375   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3376       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3377       ArgsType), CGF.getPointerAlign());
3378   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3379       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3380       ArgsType), CGF.getPointerAlign());
3381   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3382   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3383   // ...
3384   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3385   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3386     const auto *DestVar =
3387         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3388     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3389 
3390     const auto *SrcVar =
3391         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3392     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3393 
3394     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3395     QualType Type = VD->getType();
3396     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3397   }
3398   CGF.FinishFunction();
3399   return Fn;
3400 }
3401 
3402 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3403                                        const RegionCodeGenTy &SingleOpGen,
3404                                        SourceLocation Loc,
3405                                        ArrayRef<const Expr *> CopyprivateVars,
3406                                        ArrayRef<const Expr *> SrcExprs,
3407                                        ArrayRef<const Expr *> DstExprs,
3408                                        ArrayRef<const Expr *> AssignmentOps) {
3409   if (!CGF.HaveInsertPoint())
3410     return;
3411   assert(CopyprivateVars.size() == SrcExprs.size() &&
3412          CopyprivateVars.size() == DstExprs.size() &&
3413          CopyprivateVars.size() == AssignmentOps.size());
3414   ASTContext &C = CGM.getContext();
3415   // int32 did_it = 0;
3416   // if(__kmpc_single(ident_t *, gtid)) {
3417   //   SingleOpGen();
3418   //   __kmpc_end_single(ident_t *, gtid);
3419   //   did_it = 1;
3420   // }
3421   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3422   // <copy_func>, did_it);
3423 
3424   Address DidIt = Address::invalid();
3425   if (!CopyprivateVars.empty()) {
3426     // int32 did_it = 0;
3427     QualType KmpInt32Ty =
3428         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3429     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3430     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3431   }
3432   // Prepare arguments and build a call to __kmpc_single
3433   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3434   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3435                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3436                         /*Conditional=*/true);
3437   SingleOpGen.setAction(Action);
3438   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3439   if (DidIt.isValid()) {
3440     // did_it = 1;
3441     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3442   }
3443   Action.Done(CGF);
3444   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3445   // <copy_func>, did_it);
3446   if (DidIt.isValid()) {
3447     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3448     QualType CopyprivateArrayTy = C.getConstantArrayType(
3449         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3450         /*IndexTypeQuals=*/0);
3451     // Create a list of all private variables for copyprivate.
3452     Address CopyprivateList =
3453         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3454     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3455       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3456       CGF.Builder.CreateStore(
3457           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3458               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3459               CGF.VoidPtrTy),
3460           Elem);
3461     }
3462     // Build function that copies private values from single region to all other
3463     // threads in the corresponding parallel region.
3464     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3465         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3466         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3467     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3468     Address CL =
3469       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3470                                                       CGF.VoidPtrTy);
3471     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3472     llvm::Value *Args[] = {
3473         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3474         getThreadID(CGF, Loc),        // i32 <gtid>
3475         BufSize,                      // size_t <buf_size>
3476         CL.getPointer(),              // void *<copyprivate list>
3477         CpyFn,                        // void (*) (void *, void *) <copy_func>
3478         DidItVal                      // i32 did_it
3479     };
3480     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3481   }
3482 }
3483 
3484 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3485                                         const RegionCodeGenTy &OrderedOpGen,
3486                                         SourceLocation Loc, bool IsThreads) {
3487   if (!CGF.HaveInsertPoint())
3488     return;
3489   // __kmpc_ordered(ident_t *, gtid);
3490   // OrderedOpGen();
3491   // __kmpc_end_ordered(ident_t *, gtid);
3492   // Prepare arguments and build a call to __kmpc_ordered
3493   if (IsThreads) {
3494     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3495     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3496                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3497                           Args);
3498     OrderedOpGen.setAction(Action);
3499     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3500     return;
3501   }
3502   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3503 }
3504 
3505 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3506   unsigned Flags;
3507   if (Kind == OMPD_for)
3508     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3509   else if (Kind == OMPD_sections)
3510     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3511   else if (Kind == OMPD_single)
3512     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3513   else if (Kind == OMPD_barrier)
3514     Flags = OMP_IDENT_BARRIER_EXPL;
3515   else
3516     Flags = OMP_IDENT_BARRIER_IMPL;
3517   return Flags;
3518 }
3519 
3520 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3521     CodeGenFunction &CGF, const OMPLoopDirective &S,
3522     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3523   // Check if the loop directive is actually a doacross loop directive. In this
3524   // case choose static, 1 schedule.
3525   if (llvm::any_of(
3526           S.getClausesOfKind<OMPOrderedClause>(),
3527           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3528     ScheduleKind = OMPC_SCHEDULE_static;
3529     // Chunk size is 1 in this case.
3530     llvm::APInt ChunkSize(32, 1);
3531     ChunkExpr = IntegerLiteral::Create(
3532         CGF.getContext(), ChunkSize,
3533         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3534         SourceLocation());
3535   }
3536 }
3537 
3538 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3539                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3540                                       bool ForceSimpleCall) {
3541   // Check if we should use the OMPBuilder
3542   auto *OMPRegionInfo =
3543       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3544   llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3545   if (OMPBuilder) {
3546     CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3547         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3548     return;
3549   }
3550 
3551   if (!CGF.HaveInsertPoint())
3552     return;
3553   // Build call __kmpc_cancel_barrier(loc, thread_id);
3554   // Build call __kmpc_barrier(loc, thread_id);
3555   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3556   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3557   // thread_id);
3558   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3559                          getThreadID(CGF, Loc)};
3560   if (OMPRegionInfo) {
3561     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3562       llvm::Value *Result = CGF.EmitRuntimeCall(
3563           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3564       if (EmitChecks) {
3565         // if (__kmpc_cancel_barrier()) {
3566         //   exit from construct;
3567         // }
3568         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3569         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3570         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3571         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3572         CGF.EmitBlock(ExitBB);
3573         //   exit from construct;
3574         CodeGenFunction::JumpDest CancelDestination =
3575             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3576         CGF.EmitBranchThroughCleanup(CancelDestination);
3577         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3578       }
3579       return;
3580     }
3581   }
3582   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3583 }
3584 
3585 /// Map the OpenMP loop schedule to the runtime enumeration.
3586 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3587                                           bool Chunked, bool Ordered) {
3588   switch (ScheduleKind) {
3589   case OMPC_SCHEDULE_static:
3590     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3591                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3592   case OMPC_SCHEDULE_dynamic:
3593     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3594   case OMPC_SCHEDULE_guided:
3595     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3596   case OMPC_SCHEDULE_runtime:
3597     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3598   case OMPC_SCHEDULE_auto:
3599     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3600   case OMPC_SCHEDULE_unknown:
3601     assert(!Chunked && "chunk was specified but schedule kind not known");
3602     return Ordered ? OMP_ord_static : OMP_sch_static;
3603   }
3604   llvm_unreachable("Unexpected runtime schedule");
3605 }
3606 
3607 /// Map the OpenMP distribute schedule to the runtime enumeration.
3608 static OpenMPSchedType
3609 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3610   // only static is allowed for dist_schedule
3611   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3612 }
3613 
3614 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3615                                          bool Chunked) const {
3616   OpenMPSchedType Schedule =
3617       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3618   return Schedule == OMP_sch_static;
3619 }
3620 
3621 bool CGOpenMPRuntime::isStaticNonchunked(
3622     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3623   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3624   return Schedule == OMP_dist_sch_static;
3625 }
3626 
3627 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3628                                       bool Chunked) const {
3629   OpenMPSchedType Schedule =
3630       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3631   return Schedule == OMP_sch_static_chunked;
3632 }
3633 
3634 bool CGOpenMPRuntime::isStaticChunked(
3635     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3636   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3637   return Schedule == OMP_dist_sch_static_chunked;
3638 }
3639 
3640 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3641   OpenMPSchedType Schedule =
3642       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3643   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3644   return Schedule != OMP_sch_static;
3645 }
3646 
3647 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3648                                   OpenMPScheduleClauseModifier M1,
3649                                   OpenMPScheduleClauseModifier M2) {
3650   int Modifier = 0;
3651   switch (M1) {
3652   case OMPC_SCHEDULE_MODIFIER_monotonic:
3653     Modifier = OMP_sch_modifier_monotonic;
3654     break;
3655   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3656     Modifier = OMP_sch_modifier_nonmonotonic;
3657     break;
3658   case OMPC_SCHEDULE_MODIFIER_simd:
3659     if (Schedule == OMP_sch_static_chunked)
3660       Schedule = OMP_sch_static_balanced_chunked;
3661     break;
3662   case OMPC_SCHEDULE_MODIFIER_last:
3663   case OMPC_SCHEDULE_MODIFIER_unknown:
3664     break;
3665   }
3666   switch (M2) {
3667   case OMPC_SCHEDULE_MODIFIER_monotonic:
3668     Modifier = OMP_sch_modifier_monotonic;
3669     break;
3670   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3671     Modifier = OMP_sch_modifier_nonmonotonic;
3672     break;
3673   case OMPC_SCHEDULE_MODIFIER_simd:
3674     if (Schedule == OMP_sch_static_chunked)
3675       Schedule = OMP_sch_static_balanced_chunked;
3676     break;
3677   case OMPC_SCHEDULE_MODIFIER_last:
3678   case OMPC_SCHEDULE_MODIFIER_unknown:
3679     break;
3680   }
3681   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3682   // If the static schedule kind is specified or if the ordered clause is
3683   // specified, and if the nonmonotonic modifier is not specified, the effect is
3684   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3685   // modifier is specified, the effect is as if the nonmonotonic modifier is
3686   // specified.
3687   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3688     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3689           Schedule == OMP_sch_static_balanced_chunked ||
3690           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3691           Schedule == OMP_dist_sch_static_chunked ||
3692           Schedule == OMP_dist_sch_static))
3693       Modifier = OMP_sch_modifier_nonmonotonic;
3694   }
3695   return Schedule | Modifier;
3696 }
3697 
3698 void CGOpenMPRuntime::emitForDispatchInit(
3699     CodeGenFunction &CGF, SourceLocation Loc,
3700     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3701     bool Ordered, const DispatchRTInput &DispatchValues) {
3702   if (!CGF.HaveInsertPoint())
3703     return;
3704   OpenMPSchedType Schedule = getRuntimeSchedule(
3705       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3706   assert(Ordered ||
3707          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3708           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3709           Schedule != OMP_sch_static_balanced_chunked));
3710   // Call __kmpc_dispatch_init(
3711   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3712   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3713   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3714 
3715   // If the Chunk was not specified in the clause - use default value 1.
3716   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3717                                             : CGF.Builder.getIntN(IVSize, 1);
3718   llvm::Value *Args[] = {
3719       emitUpdateLocation(CGF, Loc),
3720       getThreadID(CGF, Loc),
3721       CGF.Builder.getInt32(addMonoNonMonoModifier(
3722           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3723       DispatchValues.LB,                                     // Lower
3724       DispatchValues.UB,                                     // Upper
3725       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3726       Chunk                                                  // Chunk
3727   };
3728   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3729 }
3730 
3731 static void emitForStaticInitCall(
3732     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3733     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3734     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3735     const CGOpenMPRuntime::StaticRTInput &Values) {
3736   if (!CGF.HaveInsertPoint())
3737     return;
3738 
3739   assert(!Values.Ordered);
3740   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3741          Schedule == OMP_sch_static_balanced_chunked ||
3742          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3743          Schedule == OMP_dist_sch_static ||
3744          Schedule == OMP_dist_sch_static_chunked);
3745 
3746   // Call __kmpc_for_static_init(
3747   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3748   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3749   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3750   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3751   llvm::Value *Chunk = Values.Chunk;
3752   if (Chunk == nullptr) {
3753     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3754             Schedule == OMP_dist_sch_static) &&
3755            "expected static non-chunked schedule");
3756     // If the Chunk was not specified in the clause - use default value 1.
3757     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3758   } else {
3759     assert((Schedule == OMP_sch_static_chunked ||
3760             Schedule == OMP_sch_static_balanced_chunked ||
3761             Schedule == OMP_ord_static_chunked ||
3762             Schedule == OMP_dist_sch_static_chunked) &&
3763            "expected static chunked schedule");
3764   }
3765   llvm::Value *Args[] = {
3766       UpdateLocation,
3767       ThreadId,
3768       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3769                                                   M2)), // Schedule type
3770       Values.IL.getPointer(),                           // &isLastIter
3771       Values.LB.getPointer(),                           // &LB
3772       Values.UB.getPointer(),                           // &UB
3773       Values.ST.getPointer(),                           // &Stride
3774       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3775       Chunk                                             // Chunk
3776   };
3777   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3778 }
3779 
3780 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3781                                         SourceLocation Loc,
3782                                         OpenMPDirectiveKind DKind,
3783                                         const OpenMPScheduleTy &ScheduleKind,
3784                                         const StaticRTInput &Values) {
3785   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3786       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3787   assert(isOpenMPWorksharingDirective(DKind) &&
3788          "Expected loop-based or sections-based directive.");
3789   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3790                                              isOpenMPLoopDirective(DKind)
3791                                                  ? OMP_IDENT_WORK_LOOP
3792                                                  : OMP_IDENT_WORK_SECTIONS);
3793   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3794   llvm::FunctionCallee StaticInitFunction =
3795       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3796   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3797                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3798 }
3799 
3800 void CGOpenMPRuntime::emitDistributeStaticInit(
3801     CodeGenFunction &CGF, SourceLocation Loc,
3802     OpenMPDistScheduleClauseKind SchedKind,
3803     const CGOpenMPRuntime::StaticRTInput &Values) {
3804   OpenMPSchedType ScheduleNum =
3805       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3806   llvm::Value *UpdatedLocation =
3807       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3808   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3809   llvm::FunctionCallee StaticInitFunction =
3810       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3811   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3812                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3813                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3814 }
3815 
3816 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3817                                           SourceLocation Loc,
3818                                           OpenMPDirectiveKind DKind) {
3819   if (!CGF.HaveInsertPoint())
3820     return;
3821   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3822   llvm::Value *Args[] = {
3823       emitUpdateLocation(CGF, Loc,
3824                          isOpenMPDistributeDirective(DKind)
3825                              ? OMP_IDENT_WORK_DISTRIBUTE
3826                              : isOpenMPLoopDirective(DKind)
3827                                    ? OMP_IDENT_WORK_LOOP
3828                                    : OMP_IDENT_WORK_SECTIONS),
3829       getThreadID(CGF, Loc)};
3830   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3831                       Args);
3832 }
3833 
3834 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3835                                                  SourceLocation Loc,
3836                                                  unsigned IVSize,
3837                                                  bool IVSigned) {
3838   if (!CGF.HaveInsertPoint())
3839     return;
3840   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3841   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3842   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3843 }
3844 
3845 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3846                                           SourceLocation Loc, unsigned IVSize,
3847                                           bool IVSigned, Address IL,
3848                                           Address LB, Address UB,
3849                                           Address ST) {
3850   // Call __kmpc_dispatch_next(
3851   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3852   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3853   //          kmp_int[32|64] *p_stride);
3854   llvm::Value *Args[] = {
3855       emitUpdateLocation(CGF, Loc),
3856       getThreadID(CGF, Loc),
3857       IL.getPointer(), // &isLastIter
3858       LB.getPointer(), // &Lower
3859       UB.getPointer(), // &Upper
3860       ST.getPointer()  // &Stride
3861   };
3862   llvm::Value *Call =
3863       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3864   return CGF.EmitScalarConversion(
3865       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3866       CGF.getContext().BoolTy, Loc);
3867 }
3868 
3869 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3870                                            llvm::Value *NumThreads,
3871                                            SourceLocation Loc) {
3872   if (!CGF.HaveInsertPoint())
3873     return;
3874   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3875   llvm::Value *Args[] = {
3876       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3877       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3878   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3879                       Args);
3880 }
3881 
3882 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3883                                          ProcBindKind ProcBind,
3884                                          SourceLocation Loc) {
3885   if (!CGF.HaveInsertPoint())
3886     return;
3887   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3888   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3889   llvm::Value *Args[] = {
3890       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3891       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3892   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3893 }
3894 
3895 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3896                                 SourceLocation Loc) {
3897   if (!CGF.HaveInsertPoint())
3898     return;
3899   // Build call void __kmpc_flush(ident_t *loc)
3900   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3901                       emitUpdateLocation(CGF, Loc));
3902 }
3903 
3904 namespace {
3905 /// Indexes of fields for type kmp_task_t.
3906 enum KmpTaskTFields {
3907   /// List of shared variables.
3908   KmpTaskTShareds,
3909   /// Task routine.
3910   KmpTaskTRoutine,
3911   /// Partition id for the untied tasks.
3912   KmpTaskTPartId,
3913   /// Function with call of destructors for private variables.
3914   Data1,
3915   /// Task priority.
3916   Data2,
3917   /// (Taskloops only) Lower bound.
3918   KmpTaskTLowerBound,
3919   /// (Taskloops only) Upper bound.
3920   KmpTaskTUpperBound,
3921   /// (Taskloops only) Stride.
3922   KmpTaskTStride,
3923   /// (Taskloops only) Is last iteration flag.
3924   KmpTaskTLastIter,
3925   /// (Taskloops only) Reduction data.
3926   KmpTaskTReductions,
3927 };
3928 } // anonymous namespace
3929 
3930 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3931   return OffloadEntriesTargetRegion.empty() &&
3932          OffloadEntriesDeviceGlobalVar.empty();
3933 }
3934 
3935 /// Initialize target region entry.
3936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3937     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3938                                     StringRef ParentName, unsigned LineNum,
3939                                     unsigned Order) {
3940   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3941                                              "only required for the device "
3942                                              "code generation.");
3943   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3944       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3945                                    OMPTargetRegionEntryTargetRegion);
3946   ++OffloadingEntriesNum;
3947 }
3948 
3949 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3950     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3951                                   StringRef ParentName, unsigned LineNum,
3952                                   llvm::Constant *Addr, llvm::Constant *ID,
3953                                   OMPTargetRegionEntryKind Flags) {
3954   // If we are emitting code for a target, the entry is already initialized,
3955   // only has to be registered.
3956   if (CGM.getLangOpts().OpenMPIsDevice) {
3957     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3958       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3959           DiagnosticsEngine::Error,
3960           "Unable to find target region on line '%0' in the device code.");
3961       CGM.getDiags().Report(DiagID) << LineNum;
3962       return;
3963     }
3964     auto &Entry =
3965         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3966     assert(Entry.isValid() && "Entry not initialized!");
3967     Entry.setAddress(Addr);
3968     Entry.setID(ID);
3969     Entry.setFlags(Flags);
3970   } else {
3971     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3972     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3973     ++OffloadingEntriesNum;
3974   }
3975 }
3976 
3977 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3978     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3979     unsigned LineNum) const {
3980   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3981   if (PerDevice == OffloadEntriesTargetRegion.end())
3982     return false;
3983   auto PerFile = PerDevice->second.find(FileID);
3984   if (PerFile == PerDevice->second.end())
3985     return false;
3986   auto PerParentName = PerFile->second.find(ParentName);
3987   if (PerParentName == PerFile->second.end())
3988     return false;
3989   auto PerLine = PerParentName->second.find(LineNum);
3990   if (PerLine == PerParentName->second.end())
3991     return false;
3992   // Fail if this entry is already registered.
3993   if (PerLine->second.getAddress() || PerLine->second.getID())
3994     return false;
3995   return true;
3996 }
3997 
3998 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3999     const OffloadTargetRegionEntryInfoActTy &Action) {
4000   // Scan all target region entries and perform the provided action.
4001   for (const auto &D : OffloadEntriesTargetRegion)
4002     for (const auto &F : D.second)
4003       for (const auto &P : F.second)
4004         for (const auto &L : P.second)
4005           Action(D.first, F.first, P.first(), L.first, L.second);
4006 }
4007 
4008 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4009     initializeDeviceGlobalVarEntryInfo(StringRef Name,
4010                                        OMPTargetGlobalVarEntryKind Flags,
4011                                        unsigned Order) {
4012   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
4013                                              "only required for the device "
4014                                              "code generation.");
4015   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
4016   ++OffloadingEntriesNum;
4017 }
4018 
4019 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4020     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4021                                      CharUnits VarSize,
4022                                      OMPTargetGlobalVarEntryKind Flags,
4023                                      llvm::GlobalValue::LinkageTypes Linkage) {
4024   if (CGM.getLangOpts().OpenMPIsDevice) {
4025     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4026     assert(Entry.isValid() && Entry.getFlags() == Flags &&
4027            "Entry not initialized!");
4028     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4029            "Resetting with the new address.");
4030     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4031       if (Entry.getVarSize().isZero()) {
4032         Entry.setVarSize(VarSize);
4033         Entry.setLinkage(Linkage);
4034       }
4035       return;
4036     }
4037     Entry.setVarSize(VarSize);
4038     Entry.setLinkage(Linkage);
4039     Entry.setAddress(Addr);
4040   } else {
4041     if (hasDeviceGlobalVarEntryInfo(VarName)) {
4042       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4043       assert(Entry.isValid() && Entry.getFlags() == Flags &&
4044              "Entry not initialized!");
4045       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4046              "Resetting with the new address.");
4047       if (Entry.getVarSize().isZero()) {
4048         Entry.setVarSize(VarSize);
4049         Entry.setLinkage(Linkage);
4050       }
4051       return;
4052     }
4053     OffloadEntriesDeviceGlobalVar.try_emplace(
4054         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4055     ++OffloadingEntriesNum;
4056   }
4057 }
4058 
4059 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4060     actOnDeviceGlobalVarEntriesInfo(
4061         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4062   // Scan all target region entries and perform the provided action.
4063   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4064     Action(E.getKey(), E.getValue());
4065 }
4066 
4067 void CGOpenMPRuntime::createOffloadEntry(
4068     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4069     llvm::GlobalValue::LinkageTypes Linkage) {
4070   StringRef Name = Addr->getName();
4071   llvm::Module &M = CGM.getModule();
4072   llvm::LLVMContext &C = M.getContext();
4073 
4074   // Create constant string with the name.
4075   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4076 
4077   std::string StringName = getName({"omp_offloading", "entry_name"});
4078   auto *Str = new llvm::GlobalVariable(
4079       M, StrPtrInit->getType(), /*isConstant=*/true,
4080       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4081   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4082 
4083   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4084                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4085                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4086                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4087                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4088   std::string EntryName = getName({"omp_offloading", "entry", ""});
4089   llvm::GlobalVariable *Entry = createGlobalStruct(
4090       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4091       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4092 
4093   // The entry has to be created in the section the linker expects it to be.
4094   Entry->setSection("omp_offloading_entries");
4095 }
4096 
4097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4098   // Emit the offloading entries and metadata so that the device codegen side
4099   // can easily figure out what to emit. The produced metadata looks like
4100   // this:
4101   //
4102   // !omp_offload.info = !{!1, ...}
4103   //
4104   // Right now we only generate metadata for function that contain target
4105   // regions.
4106 
4107   // If we are in simd mode or there are no entries, we don't need to do
4108   // anything.
4109   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4110     return;
4111 
4112   llvm::Module &M = CGM.getModule();
4113   llvm::LLVMContext &C = M.getContext();
4114   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4115                          SourceLocation, StringRef>,
4116               16>
4117       OrderedEntries(OffloadEntriesInfoManager.size());
4118   llvm::SmallVector<StringRef, 16> ParentFunctions(
4119       OffloadEntriesInfoManager.size());
4120 
4121   // Auxiliary methods to create metadata values and strings.
4122   auto &&GetMDInt = [this](unsigned V) {
4123     return llvm::ConstantAsMetadata::get(
4124         llvm::ConstantInt::get(CGM.Int32Ty, V));
4125   };
4126 
4127   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4128 
4129   // Create the offloading info metadata node.
4130   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4131 
4132   // Create function that emits metadata for each target region entry;
4133   auto &&TargetRegionMetadataEmitter =
4134       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4135        &GetMDString](
4136           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4137           unsigned Line,
4138           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4139         // Generate metadata for target regions. Each entry of this metadata
4140         // contains:
4141         // - Entry 0 -> Kind of this type of metadata (0).
4142         // - Entry 1 -> Device ID of the file where the entry was identified.
4143         // - Entry 2 -> File ID of the file where the entry was identified.
4144         // - Entry 3 -> Mangled name of the function where the entry was
4145         // identified.
4146         // - Entry 4 -> Line in the file where the entry was identified.
4147         // - Entry 5 -> Order the entry was created.
4148         // The first element of the metadata node is the kind.
4149         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4150                                  GetMDInt(FileID),      GetMDString(ParentName),
4151                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4152 
4153         SourceLocation Loc;
4154         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4155                   E = CGM.getContext().getSourceManager().fileinfo_end();
4156              I != E; ++I) {
4157           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4158               I->getFirst()->getUniqueID().getFile() == FileID) {
4159             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4160                 I->getFirst(), Line, 1);
4161             break;
4162           }
4163         }
4164         // Save this entry in the right position of the ordered entries array.
4165         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4166         ParentFunctions[E.getOrder()] = ParentName;
4167 
4168         // Add metadata to the named metadata node.
4169         MD->addOperand(llvm::MDNode::get(C, Ops));
4170       };
4171 
4172   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4173       TargetRegionMetadataEmitter);
4174 
4175   // Create function that emits metadata for each device global variable entry;
4176   auto &&DeviceGlobalVarMetadataEmitter =
4177       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4178        MD](StringRef MangledName,
4179            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4180                &E) {
4181         // Generate metadata for global variables. Each entry of this metadata
4182         // contains:
4183         // - Entry 0 -> Kind of this type of metadata (1).
4184         // - Entry 1 -> Mangled name of the variable.
4185         // - Entry 2 -> Declare target kind.
4186         // - Entry 3 -> Order the entry was created.
4187         // The first element of the metadata node is the kind.
4188         llvm::Metadata *Ops[] = {
4189             GetMDInt(E.getKind()), GetMDString(MangledName),
4190             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4191 
4192         // Save this entry in the right position of the ordered entries array.
4193         OrderedEntries[E.getOrder()] =
4194             std::make_tuple(&E, SourceLocation(), MangledName);
4195 
4196         // Add metadata to the named metadata node.
4197         MD->addOperand(llvm::MDNode::get(C, Ops));
4198       };
4199 
4200   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4201       DeviceGlobalVarMetadataEmitter);
4202 
4203   for (const auto &E : OrderedEntries) {
4204     assert(std::get<0>(E) && "All ordered entries must exist!");
4205     if (const auto *CE =
4206             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4207                 std::get<0>(E))) {
4208       if (!CE->getID() || !CE->getAddress()) {
4209         // Do not blame the entry if the parent funtion is not emitted.
4210         StringRef FnName = ParentFunctions[CE->getOrder()];
4211         if (!CGM.GetGlobalValue(FnName))
4212           continue;
4213         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4214             DiagnosticsEngine::Error,
4215             "Offloading entry for target region in %0 is incorrect: either the "
4216             "address or the ID is invalid.");
4217         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4218         continue;
4219       }
4220       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4221                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4222     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4223                                              OffloadEntryInfoDeviceGlobalVar>(
4224                    std::get<0>(E))) {
4225       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4226           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4227               CE->getFlags());
4228       switch (Flags) {
4229       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4230         if (CGM.getLangOpts().OpenMPIsDevice &&
4231             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4232           continue;
4233         if (!CE->getAddress()) {
4234           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4235               DiagnosticsEngine::Error, "Offloading entry for declare target "
4236                                         "variable %0 is incorrect: the "
4237                                         "address is invalid.");
4238           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4239           continue;
4240         }
4241         // The vaiable has no definition - no need to add the entry.
4242         if (CE->getVarSize().isZero())
4243           continue;
4244         break;
4245       }
4246       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4247         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4248                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4249                "Declaret target link address is set.");
4250         if (CGM.getLangOpts().OpenMPIsDevice)
4251           continue;
4252         if (!CE->getAddress()) {
4253           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4254               DiagnosticsEngine::Error,
4255               "Offloading entry for declare target variable is incorrect: the "
4256               "address is invalid.");
4257           CGM.getDiags().Report(DiagID);
4258           continue;
4259         }
4260         break;
4261       }
4262       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4263                          CE->getVarSize().getQuantity(), Flags,
4264                          CE->getLinkage());
4265     } else {
4266       llvm_unreachable("Unsupported entry kind.");
4267     }
4268   }
4269 }
4270 
4271 /// Loads all the offload entries information from the host IR
4272 /// metadata.
4273 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4274   // If we are in target mode, load the metadata from the host IR. This code has
4275   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4276 
4277   if (!CGM.getLangOpts().OpenMPIsDevice)
4278     return;
4279 
4280   if (CGM.getLangOpts().OMPHostIRFile.empty())
4281     return;
4282 
4283   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4284   if (auto EC = Buf.getError()) {
4285     CGM.getDiags().Report(diag::err_cannot_open_file)
4286         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4287     return;
4288   }
4289 
4290   llvm::LLVMContext C;
4291   auto ME = expectedToErrorOrAndEmitErrors(
4292       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4293 
4294   if (auto EC = ME.getError()) {
4295     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4296         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4297     CGM.getDiags().Report(DiagID)
4298         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4299     return;
4300   }
4301 
4302   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4303   if (!MD)
4304     return;
4305 
4306   for (llvm::MDNode *MN : MD->operands()) {
4307     auto &&GetMDInt = [MN](unsigned Idx) {
4308       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4309       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4310     };
4311 
4312     auto &&GetMDString = [MN](unsigned Idx) {
4313       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4314       return V->getString();
4315     };
4316 
4317     switch (GetMDInt(0)) {
4318     default:
4319       llvm_unreachable("Unexpected metadata!");
4320       break;
4321     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4322         OffloadingEntryInfoTargetRegion:
4323       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4324           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4325           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4326           /*Order=*/GetMDInt(5));
4327       break;
4328     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4329         OffloadingEntryInfoDeviceGlobalVar:
4330       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4331           /*MangledName=*/GetMDString(1),
4332           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4333               /*Flags=*/GetMDInt(2)),
4334           /*Order=*/GetMDInt(3));
4335       break;
4336     }
4337   }
4338 }
4339 
4340 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4341   if (!KmpRoutineEntryPtrTy) {
4342     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4343     ASTContext &C = CGM.getContext();
4344     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4345     FunctionProtoType::ExtProtoInfo EPI;
4346     KmpRoutineEntryPtrQTy = C.getPointerType(
4347         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4348     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4349   }
4350 }
4351 
4352 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4353   // Make sure the type of the entry is already created. This is the type we
4354   // have to create:
4355   // struct __tgt_offload_entry{
4356   //   void      *addr;       // Pointer to the offload entry info.
4357   //                          // (function or global)
4358   //   char      *name;       // Name of the function or global.
4359   //   size_t     size;       // Size of the entry info (0 if it a function).
4360   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4361   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4362   // };
4363   if (TgtOffloadEntryQTy.isNull()) {
4364     ASTContext &C = CGM.getContext();
4365     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4366     RD->startDefinition();
4367     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4368     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4369     addFieldToRecordDecl(C, RD, C.getSizeType());
4370     addFieldToRecordDecl(
4371         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4372     addFieldToRecordDecl(
4373         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4374     RD->completeDefinition();
4375     RD->addAttr(PackedAttr::CreateImplicit(C));
4376     TgtOffloadEntryQTy = C.getRecordType(RD);
4377   }
4378   return TgtOffloadEntryQTy;
4379 }
4380 
4381 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4382   // These are the types we need to build:
4383   // struct __tgt_device_image{
4384   // void   *ImageStart;       // Pointer to the target code start.
4385   // void   *ImageEnd;         // Pointer to the target code end.
4386   // // We also add the host entries to the device image, as it may be useful
4387   // // for the target runtime to have access to that information.
4388   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4389   //                                       // the entries.
4390   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4391   //                                       // entries (non inclusive).
4392   // };
4393   if (TgtDeviceImageQTy.isNull()) {
4394     ASTContext &C = CGM.getContext();
4395     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4396     RD->startDefinition();
4397     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4398     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4399     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4400     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4401     RD->completeDefinition();
4402     TgtDeviceImageQTy = C.getRecordType(RD);
4403   }
4404   return TgtDeviceImageQTy;
4405 }
4406 
4407 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4408   // struct __tgt_bin_desc{
4409   //   int32_t              NumDevices;      // Number of devices supported.
4410   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4411   //                                         // (one per device).
4412   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4413   //                                         // entries.
4414   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4415   //                                         // entries (non inclusive).
4416   // };
4417   if (TgtBinaryDescriptorQTy.isNull()) {
4418     ASTContext &C = CGM.getContext();
4419     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4420     RD->startDefinition();
4421     addFieldToRecordDecl(
4422         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4423     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4424     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4425     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4426     RD->completeDefinition();
4427     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4428   }
4429   return TgtBinaryDescriptorQTy;
4430 }
4431 
4432 namespace {
4433 struct PrivateHelpersTy {
4434   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4435                    const VarDecl *PrivateElemInit)
4436       : Original(Original), PrivateCopy(PrivateCopy),
4437         PrivateElemInit(PrivateElemInit) {}
4438   const VarDecl *Original;
4439   const VarDecl *PrivateCopy;
4440   const VarDecl *PrivateElemInit;
4441 };
4442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4443 } // anonymous namespace
4444 
4445 static RecordDecl *
4446 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4447   if (!Privates.empty()) {
4448     ASTContext &C = CGM.getContext();
4449     // Build struct .kmp_privates_t. {
4450     //         /*  private vars  */
4451     //       };
4452     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4453     RD->startDefinition();
4454     for (const auto &Pair : Privates) {
4455       const VarDecl *VD = Pair.second.Original;
4456       QualType Type = VD->getType().getNonReferenceType();
4457       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4458       if (VD->hasAttrs()) {
4459         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4460              E(VD->getAttrs().end());
4461              I != E; ++I)
4462           FD->addAttr(*I);
4463       }
4464     }
4465     RD->completeDefinition();
4466     return RD;
4467   }
4468   return nullptr;
4469 }
4470 
4471 static RecordDecl *
4472 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4473                          QualType KmpInt32Ty,
4474                          QualType KmpRoutineEntryPointerQTy) {
4475   ASTContext &C = CGM.getContext();
4476   // Build struct kmp_task_t {
4477   //         void *              shareds;
4478   //         kmp_routine_entry_t routine;
4479   //         kmp_int32           part_id;
4480   //         kmp_cmplrdata_t data1;
4481   //         kmp_cmplrdata_t data2;
4482   // For taskloops additional fields:
4483   //         kmp_uint64          lb;
4484   //         kmp_uint64          ub;
4485   //         kmp_int64           st;
4486   //         kmp_int32           liter;
4487   //         void *              reductions;
4488   //       };
4489   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4490   UD->startDefinition();
4491   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4492   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4493   UD->completeDefinition();
4494   QualType KmpCmplrdataTy = C.getRecordType(UD);
4495   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4496   RD->startDefinition();
4497   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4498   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4499   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4500   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4501   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4502   if (isOpenMPTaskLoopDirective(Kind)) {
4503     QualType KmpUInt64Ty =
4504         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4505     QualType KmpInt64Ty =
4506         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4507     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4508     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4509     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4510     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4511     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4512   }
4513   RD->completeDefinition();
4514   return RD;
4515 }
4516 
4517 static RecordDecl *
4518 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4519                                      ArrayRef<PrivateDataTy> Privates) {
4520   ASTContext &C = CGM.getContext();
4521   // Build struct kmp_task_t_with_privates {
4522   //         kmp_task_t task_data;
4523   //         .kmp_privates_t. privates;
4524   //       };
4525   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4526   RD->startDefinition();
4527   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4528   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4529     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4530   RD->completeDefinition();
4531   return RD;
4532 }
4533 
4534 /// Emit a proxy function which accepts kmp_task_t as the second
4535 /// argument.
4536 /// \code
4537 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4538 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4539 ///   For taskloops:
4540 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4541 ///   tt->reductions, tt->shareds);
4542 ///   return 0;
4543 /// }
4544 /// \endcode
4545 static llvm::Function *
4546 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4547                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4548                       QualType KmpTaskTWithPrivatesPtrQTy,
4549                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4550                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4551                       llvm::Value *TaskPrivatesMap) {
4552   ASTContext &C = CGM.getContext();
4553   FunctionArgList Args;
4554   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4555                             ImplicitParamDecl::Other);
4556   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4557                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4558                                 ImplicitParamDecl::Other);
4559   Args.push_back(&GtidArg);
4560   Args.push_back(&TaskTypeArg);
4561   const auto &TaskEntryFnInfo =
4562       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4563   llvm::FunctionType *TaskEntryTy =
4564       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4565   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4566   auto *TaskEntry = llvm::Function::Create(
4567       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4568   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4569   TaskEntry->setDoesNotRecurse();
4570   CodeGenFunction CGF(CGM);
4571   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4572                     Loc, Loc);
4573 
4574   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4575   // tt,
4576   // For taskloops:
4577   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4578   // tt->task_data.shareds);
4579   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4580       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4581   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4582       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4583       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4584   const auto *KmpTaskTWithPrivatesQTyRD =
4585       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4586   LValue Base =
4587       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4588   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4589   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4590   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4591   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4592 
4593   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4594   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4595   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4596       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4597       CGF.ConvertTypeForMem(SharedsPtrTy));
4598 
4599   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4600   llvm::Value *PrivatesParam;
4601   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4602     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4603     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4604         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4605   } else {
4606     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4607   }
4608 
4609   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4610                                TaskPrivatesMap,
4611                                CGF.Builder
4612                                    .CreatePointerBitCastOrAddrSpaceCast(
4613                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
4614                                    .getPointer()};
4615   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4616                                           std::end(CommonArgs));
4617   if (isOpenMPTaskLoopDirective(Kind)) {
4618     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4619     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4620     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4621     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4622     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4623     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4624     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4625     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4626     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4627     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4628     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4629     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4630     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4631     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4632     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4633     CallArgs.push_back(LBParam);
4634     CallArgs.push_back(UBParam);
4635     CallArgs.push_back(StParam);
4636     CallArgs.push_back(LIParam);
4637     CallArgs.push_back(RParam);
4638   }
4639   CallArgs.push_back(SharedsParam);
4640 
4641   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4642                                                   CallArgs);
4643   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4644                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4645   CGF.FinishFunction();
4646   return TaskEntry;
4647 }
4648 
4649 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4650                                             SourceLocation Loc,
4651                                             QualType KmpInt32Ty,
4652                                             QualType KmpTaskTWithPrivatesPtrQTy,
4653                                             QualType KmpTaskTWithPrivatesQTy) {
4654   ASTContext &C = CGM.getContext();
4655   FunctionArgList Args;
4656   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4657                             ImplicitParamDecl::Other);
4658   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4659                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4660                                 ImplicitParamDecl::Other);
4661   Args.push_back(&GtidArg);
4662   Args.push_back(&TaskTypeArg);
4663   const auto &DestructorFnInfo =
4664       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4665   llvm::FunctionType *DestructorFnTy =
4666       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4667   std::string Name =
4668       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4669   auto *DestructorFn =
4670       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4671                              Name, &CGM.getModule());
4672   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4673                                     DestructorFnInfo);
4674   DestructorFn->setDoesNotRecurse();
4675   CodeGenFunction CGF(CGM);
4676   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4677                     Args, Loc, Loc);
4678 
4679   LValue Base = CGF.EmitLoadOfPointerLValue(
4680       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4681       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4682   const auto *KmpTaskTWithPrivatesQTyRD =
4683       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4684   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4685   Base = CGF.EmitLValueForField(Base, *FI);
4686   for (const auto *Field :
4687        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4688     if (QualType::DestructionKind DtorKind =
4689             Field->getType().isDestructedType()) {
4690       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4691       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4692     }
4693   }
4694   CGF.FinishFunction();
4695   return DestructorFn;
4696 }
4697 
4698 /// Emit a privates mapping function for correct handling of private and
4699 /// firstprivate variables.
4700 /// \code
4701 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4702 /// **noalias priv1,...,  <tyn> **noalias privn) {
4703 ///   *priv1 = &.privates.priv1;
4704 ///   ...;
4705 ///   *privn = &.privates.privn;
4706 /// }
4707 /// \endcode
4708 static llvm::Value *
4709 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4710                                ArrayRef<const Expr *> PrivateVars,
4711                                ArrayRef<const Expr *> FirstprivateVars,
4712                                ArrayRef<const Expr *> LastprivateVars,
4713                                QualType PrivatesQTy,
4714                                ArrayRef<PrivateDataTy> Privates) {
4715   ASTContext &C = CGM.getContext();
4716   FunctionArgList Args;
4717   ImplicitParamDecl TaskPrivatesArg(
4718       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4719       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4720       ImplicitParamDecl::Other);
4721   Args.push_back(&TaskPrivatesArg);
4722   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4723   unsigned Counter = 1;
4724   for (const Expr *E : PrivateVars) {
4725     Args.push_back(ImplicitParamDecl::Create(
4726         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4727         C.getPointerType(C.getPointerType(E->getType()))
4728             .withConst()
4729             .withRestrict(),
4730         ImplicitParamDecl::Other));
4731     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4732     PrivateVarsPos[VD] = Counter;
4733     ++Counter;
4734   }
4735   for (const Expr *E : FirstprivateVars) {
4736     Args.push_back(ImplicitParamDecl::Create(
4737         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4738         C.getPointerType(C.getPointerType(E->getType()))
4739             .withConst()
4740             .withRestrict(),
4741         ImplicitParamDecl::Other));
4742     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4743     PrivateVarsPos[VD] = Counter;
4744     ++Counter;
4745   }
4746   for (const Expr *E : LastprivateVars) {
4747     Args.push_back(ImplicitParamDecl::Create(
4748         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4749         C.getPointerType(C.getPointerType(E->getType()))
4750             .withConst()
4751             .withRestrict(),
4752         ImplicitParamDecl::Other));
4753     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4754     PrivateVarsPos[VD] = Counter;
4755     ++Counter;
4756   }
4757   const auto &TaskPrivatesMapFnInfo =
4758       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4759   llvm::FunctionType *TaskPrivatesMapTy =
4760       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4761   std::string Name =
4762       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4763   auto *TaskPrivatesMap = llvm::Function::Create(
4764       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4765       &CGM.getModule());
4766   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4767                                     TaskPrivatesMapFnInfo);
4768   if (CGM.getLangOpts().Optimize) {
4769     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4770     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4771     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4772   }
4773   CodeGenFunction CGF(CGM);
4774   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4775                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4776 
4777   // *privi = &.privates.privi;
4778   LValue Base = CGF.EmitLoadOfPointerLValue(
4779       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4780       TaskPrivatesArg.getType()->castAs<PointerType>());
4781   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4782   Counter = 0;
4783   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4784     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4785     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4786     LValue RefLVal =
4787         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4788     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4789         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4790     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4791     ++Counter;
4792   }
4793   CGF.FinishFunction();
4794   return TaskPrivatesMap;
4795 }
4796 
4797 /// Emit initialization for private variables in task-based directives.
4798 static void emitPrivatesInit(CodeGenFunction &CGF,
4799                              const OMPExecutableDirective &D,
4800                              Address KmpTaskSharedsPtr, LValue TDBase,
4801                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4802                              QualType SharedsTy, QualType SharedsPtrTy,
4803                              const OMPTaskDataTy &Data,
4804                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4805   ASTContext &C = CGF.getContext();
4806   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4807   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4808   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4809                                  ? OMPD_taskloop
4810                                  : OMPD_task;
4811   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4812   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4813   LValue SrcBase;
4814   bool IsTargetTask =
4815       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4816       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4817   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4818   // PointersArray and SizesArray. The original variables for these arrays are
4819   // not captured and we get their addresses explicitly.
4820   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4821       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4822     SrcBase = CGF.MakeAddrLValue(
4823         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4824             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4825         SharedsTy);
4826   }
4827   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4828   for (const PrivateDataTy &Pair : Privates) {
4829     const VarDecl *VD = Pair.second.PrivateCopy;
4830     const Expr *Init = VD->getAnyInitializer();
4831     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4832                              !CGF.isTrivialInitializer(Init)))) {
4833       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4834       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4835         const VarDecl *OriginalVD = Pair.second.Original;
4836         // Check if the variable is the target-based BasePointersArray,
4837         // PointersArray or SizesArray.
4838         LValue SharedRefLValue;
4839         QualType Type = PrivateLValue.getType();
4840         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4841         if (IsTargetTask && !SharedField) {
4842           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4843                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4844                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4845                          ->getNumParams() == 0 &&
4846                  isa<TranslationUnitDecl>(
4847                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4848                          ->getDeclContext()) &&
4849                  "Expected artificial target data variable.");
4850           SharedRefLValue =
4851               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4852         } else {
4853           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4854           SharedRefLValue = CGF.MakeAddrLValue(
4855               Address(SharedRefLValue.getPointer(CGF),
4856                       C.getDeclAlign(OriginalVD)),
4857               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4858               SharedRefLValue.getTBAAInfo());
4859         }
4860         if (Type->isArrayType()) {
4861           // Initialize firstprivate array.
4862           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4863             // Perform simple memcpy.
4864             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4865           } else {
4866             // Initialize firstprivate array using element-by-element
4867             // initialization.
4868             CGF.EmitOMPAggregateAssign(
4869                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4870                 Type,
4871                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4872                                                   Address SrcElement) {
4873                   // Clean up any temporaries needed by the initialization.
4874                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4875                   InitScope.addPrivate(
4876                       Elem, [SrcElement]() -> Address { return SrcElement; });
4877                   (void)InitScope.Privatize();
4878                   // Emit initialization for single element.
4879                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4880                       CGF, &CapturesInfo);
4881                   CGF.EmitAnyExprToMem(Init, DestElement,
4882                                        Init->getType().getQualifiers(),
4883                                        /*IsInitializer=*/false);
4884                 });
4885           }
4886         } else {
4887           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4888           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4889             return SharedRefLValue.getAddress(CGF);
4890           });
4891           (void)InitScope.Privatize();
4892           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4893           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4894                              /*capturedByInit=*/false);
4895         }
4896       } else {
4897         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4898       }
4899     }
4900     ++FI;
4901   }
4902 }
4903 
4904 /// Check if duplication function is required for taskloops.
4905 static bool checkInitIsRequired(CodeGenFunction &CGF,
4906                                 ArrayRef<PrivateDataTy> Privates) {
4907   bool InitRequired = false;
4908   for (const PrivateDataTy &Pair : Privates) {
4909     const VarDecl *VD = Pair.second.PrivateCopy;
4910     const Expr *Init = VD->getAnyInitializer();
4911     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4912                                     !CGF.isTrivialInitializer(Init));
4913     if (InitRequired)
4914       break;
4915   }
4916   return InitRequired;
4917 }
4918 
4919 
4920 /// Emit task_dup function (for initialization of
4921 /// private/firstprivate/lastprivate vars and last_iter flag)
4922 /// \code
4923 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4924 /// lastpriv) {
4925 /// // setup lastprivate flag
4926 ///    task_dst->last = lastpriv;
4927 /// // could be constructor calls here...
4928 /// }
4929 /// \endcode
4930 static llvm::Value *
4931 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4932                     const OMPExecutableDirective &D,
4933                     QualType KmpTaskTWithPrivatesPtrQTy,
4934                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4935                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4936                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4937                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4938   ASTContext &C = CGM.getContext();
4939   FunctionArgList Args;
4940   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4941                            KmpTaskTWithPrivatesPtrQTy,
4942                            ImplicitParamDecl::Other);
4943   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4944                            KmpTaskTWithPrivatesPtrQTy,
4945                            ImplicitParamDecl::Other);
4946   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4947                                 ImplicitParamDecl::Other);
4948   Args.push_back(&DstArg);
4949   Args.push_back(&SrcArg);
4950   Args.push_back(&LastprivArg);
4951   const auto &TaskDupFnInfo =
4952       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4953   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4954   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4955   auto *TaskDup = llvm::Function::Create(
4956       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4957   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4958   TaskDup->setDoesNotRecurse();
4959   CodeGenFunction CGF(CGM);
4960   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4961                     Loc);
4962 
4963   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4964       CGF.GetAddrOfLocalVar(&DstArg),
4965       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4966   // task_dst->liter = lastpriv;
4967   if (WithLastIter) {
4968     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4969     LValue Base = CGF.EmitLValueForField(
4970         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4971     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4972     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4973         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4974     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4975   }
4976 
4977   // Emit initial values for private copies (if any).
4978   assert(!Privates.empty());
4979   Address KmpTaskSharedsPtr = Address::invalid();
4980   if (!Data.FirstprivateVars.empty()) {
4981     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4982         CGF.GetAddrOfLocalVar(&SrcArg),
4983         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4984     LValue Base = CGF.EmitLValueForField(
4985         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4986     KmpTaskSharedsPtr = Address(
4987         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4988                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4989                                                   KmpTaskTShareds)),
4990                              Loc),
4991         CGF.getNaturalTypeAlignment(SharedsTy));
4992   }
4993   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4994                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4995   CGF.FinishFunction();
4996   return TaskDup;
4997 }
4998 
4999 /// Checks if destructor function is required to be generated.
5000 /// \return true if cleanups are required, false otherwise.
5001 static bool
5002 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
5003   bool NeedsCleanup = false;
5004   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
5005   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
5006   for (const FieldDecl *FD : PrivateRD->fields()) {
5007     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
5008     if (NeedsCleanup)
5009       break;
5010   }
5011   return NeedsCleanup;
5012 }
5013 
5014 CGOpenMPRuntime::TaskResultTy
5015 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
5016                               const OMPExecutableDirective &D,
5017                               llvm::Function *TaskFunction, QualType SharedsTy,
5018                               Address Shareds, const OMPTaskDataTy &Data) {
5019   ASTContext &C = CGM.getContext();
5020   llvm::SmallVector<PrivateDataTy, 4> Privates;
5021   // Aggregate privates and sort them by the alignment.
5022   auto I = Data.PrivateCopies.begin();
5023   for (const Expr *E : Data.PrivateVars) {
5024     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5025     Privates.emplace_back(
5026         C.getDeclAlign(VD),
5027         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5028                          /*PrivateElemInit=*/nullptr));
5029     ++I;
5030   }
5031   I = Data.FirstprivateCopies.begin();
5032   auto IElemInitRef = Data.FirstprivateInits.begin();
5033   for (const Expr *E : Data.FirstprivateVars) {
5034     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5035     Privates.emplace_back(
5036         C.getDeclAlign(VD),
5037         PrivateHelpersTy(
5038             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5039             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5040     ++I;
5041     ++IElemInitRef;
5042   }
5043   I = Data.LastprivateCopies.begin();
5044   for (const Expr *E : Data.LastprivateVars) {
5045     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5046     Privates.emplace_back(
5047         C.getDeclAlign(VD),
5048         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5049                          /*PrivateElemInit=*/nullptr));
5050     ++I;
5051   }
5052   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5053     return L.first > R.first;
5054   });
5055   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5056   // Build type kmp_routine_entry_t (if not built yet).
5057   emitKmpRoutineEntryT(KmpInt32Ty);
5058   // Build type kmp_task_t (if not built yet).
5059   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5060     if (SavedKmpTaskloopTQTy.isNull()) {
5061       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5062           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5063     }
5064     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5065   } else {
5066     assert((D.getDirectiveKind() == OMPD_task ||
5067             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5068             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5069            "Expected taskloop, task or target directive");
5070     if (SavedKmpTaskTQTy.isNull()) {
5071       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5072           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5073     }
5074     KmpTaskTQTy = SavedKmpTaskTQTy;
5075   }
5076   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5077   // Build particular struct kmp_task_t for the given task.
5078   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5079       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5080   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5081   QualType KmpTaskTWithPrivatesPtrQTy =
5082       C.getPointerType(KmpTaskTWithPrivatesQTy);
5083   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5084   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5085       KmpTaskTWithPrivatesTy->getPointerTo();
5086   llvm::Value *KmpTaskTWithPrivatesTySize =
5087       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5088   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5089 
5090   // Emit initial values for private copies (if any).
5091   llvm::Value *TaskPrivatesMap = nullptr;
5092   llvm::Type *TaskPrivatesMapTy =
5093       std::next(TaskFunction->arg_begin(), 3)->getType();
5094   if (!Privates.empty()) {
5095     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5096     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5097         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5098         FI->getType(), Privates);
5099     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5100         TaskPrivatesMap, TaskPrivatesMapTy);
5101   } else {
5102     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5103         cast<llvm::PointerType>(TaskPrivatesMapTy));
5104   }
5105   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5106   // kmp_task_t *tt);
5107   llvm::Function *TaskEntry = emitProxyTaskFunction(
5108       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5109       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5110       TaskPrivatesMap);
5111 
5112   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5113   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5114   // kmp_routine_entry_t *task_entry);
5115   // Task flags. Format is taken from
5116   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5117   // description of kmp_tasking_flags struct.
5118   enum {
5119     TiedFlag = 0x1,
5120     FinalFlag = 0x2,
5121     DestructorsFlag = 0x8,
5122     PriorityFlag = 0x20
5123   };
5124   unsigned Flags = Data.Tied ? TiedFlag : 0;
5125   bool NeedsCleanup = false;
5126   if (!Privates.empty()) {
5127     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5128     if (NeedsCleanup)
5129       Flags = Flags | DestructorsFlag;
5130   }
5131   if (Data.Priority.getInt())
5132     Flags = Flags | PriorityFlag;
5133   llvm::Value *TaskFlags =
5134       Data.Final.getPointer()
5135           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5136                                      CGF.Builder.getInt32(FinalFlag),
5137                                      CGF.Builder.getInt32(/*C=*/0))
5138           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5139   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5140   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5141   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5142       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5143       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5144           TaskEntry, KmpRoutineEntryPtrTy)};
5145   llvm::Value *NewTask;
5146   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5147     // Check if we have any device clause associated with the directive.
5148     const Expr *Device = nullptr;
5149     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5150       Device = C->getDevice();
5151     // Emit device ID if any otherwise use default value.
5152     llvm::Value *DeviceID;
5153     if (Device)
5154       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5155                                            CGF.Int64Ty, /*isSigned=*/true);
5156     else
5157       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5158     AllocArgs.push_back(DeviceID);
5159     NewTask = CGF.EmitRuntimeCall(
5160       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5161   } else {
5162     NewTask = CGF.EmitRuntimeCall(
5163       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5164   }
5165   llvm::Value *NewTaskNewTaskTTy =
5166       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5167           NewTask, KmpTaskTWithPrivatesPtrTy);
5168   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5169                                                KmpTaskTWithPrivatesQTy);
5170   LValue TDBase =
5171       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5172   // Fill the data in the resulting kmp_task_t record.
5173   // Copy shareds if there are any.
5174   Address KmpTaskSharedsPtr = Address::invalid();
5175   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5176     KmpTaskSharedsPtr =
5177         Address(CGF.EmitLoadOfScalar(
5178                     CGF.EmitLValueForField(
5179                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5180                                            KmpTaskTShareds)),
5181                     Loc),
5182                 CGF.getNaturalTypeAlignment(SharedsTy));
5183     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5184     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5185     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5186   }
5187   // Emit initial values for private copies (if any).
5188   TaskResultTy Result;
5189   if (!Privates.empty()) {
5190     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5191                      SharedsTy, SharedsPtrTy, Data, Privates,
5192                      /*ForDup=*/false);
5193     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5194         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5195       Result.TaskDupFn = emitTaskDupFunction(
5196           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5197           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5198           /*WithLastIter=*/!Data.LastprivateVars.empty());
5199     }
5200   }
5201   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5202   enum { Priority = 0, Destructors = 1 };
5203   // Provide pointer to function with destructors for privates.
5204   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5205   const RecordDecl *KmpCmplrdataUD =
5206       (*FI)->getType()->getAsUnionType()->getDecl();
5207   if (NeedsCleanup) {
5208     llvm::Value *DestructorFn = emitDestructorsFunction(
5209         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5210         KmpTaskTWithPrivatesQTy);
5211     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5212     LValue DestructorsLV = CGF.EmitLValueForField(
5213         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5214     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5215                               DestructorFn, KmpRoutineEntryPtrTy),
5216                           DestructorsLV);
5217   }
5218   // Set priority.
5219   if (Data.Priority.getInt()) {
5220     LValue Data2LV = CGF.EmitLValueForField(
5221         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5222     LValue PriorityLV = CGF.EmitLValueForField(
5223         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5224     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5225   }
5226   Result.NewTask = NewTask;
5227   Result.TaskEntry = TaskEntry;
5228   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5229   Result.TDBase = TDBase;
5230   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5231   return Result;
5232 }
5233 
5234 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5235                                    const OMPExecutableDirective &D,
5236                                    llvm::Function *TaskFunction,
5237                                    QualType SharedsTy, Address Shareds,
5238                                    const Expr *IfCond,
5239                                    const OMPTaskDataTy &Data) {
5240   if (!CGF.HaveInsertPoint())
5241     return;
5242 
5243   TaskResultTy Result =
5244       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5245   llvm::Value *NewTask = Result.NewTask;
5246   llvm::Function *TaskEntry = Result.TaskEntry;
5247   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5248   LValue TDBase = Result.TDBase;
5249   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5250   ASTContext &C = CGM.getContext();
5251   // Process list of dependences.
5252   Address DependenciesArray = Address::invalid();
5253   unsigned NumDependencies = Data.Dependences.size();
5254   if (NumDependencies) {
5255     // Dependence kind for RTL.
5256     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5257     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5258     RecordDecl *KmpDependInfoRD;
5259     QualType FlagsTy =
5260         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5261     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5262     if (KmpDependInfoTy.isNull()) {
5263       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5264       KmpDependInfoRD->startDefinition();
5265       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5266       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5267       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5268       KmpDependInfoRD->completeDefinition();
5269       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5270     } else {
5271       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5272     }
5273     // Define type kmp_depend_info[<Dependences.size()>];
5274     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5275         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5276         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5277     // kmp_depend_info[<Dependences.size()>] deps;
5278     DependenciesArray =
5279         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5280     for (unsigned I = 0; I < NumDependencies; ++I) {
5281       const Expr *E = Data.Dependences[I].second;
5282       LValue Addr = CGF.EmitLValue(E);
5283       llvm::Value *Size;
5284       QualType Ty = E->getType();
5285       if (const auto *ASE =
5286               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5287         LValue UpAddrLVal =
5288             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5289         llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5290             UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5291         llvm::Value *LowIntPtr =
5292             CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5293         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5294         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5295       } else {
5296         Size = CGF.getTypeSize(Ty);
5297       }
5298       LValue Base = CGF.MakeAddrLValue(
5299           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5300           KmpDependInfoTy);
5301       // deps[i].base_addr = &<Dependences[i].second>;
5302       LValue BaseAddrLVal = CGF.EmitLValueForField(
5303           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5304       CGF.EmitStoreOfScalar(
5305           CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5306           BaseAddrLVal);
5307       // deps[i].len = sizeof(<Dependences[i].second>);
5308       LValue LenLVal = CGF.EmitLValueForField(
5309           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5310       CGF.EmitStoreOfScalar(Size, LenLVal);
5311       // deps[i].flags = <Dependences[i].first>;
5312       RTLDependenceKindTy DepKind;
5313       switch (Data.Dependences[I].first) {
5314       case OMPC_DEPEND_in:
5315         DepKind = DepIn;
5316         break;
5317       // Out and InOut dependencies must use the same code.
5318       case OMPC_DEPEND_out:
5319       case OMPC_DEPEND_inout:
5320         DepKind = DepInOut;
5321         break;
5322       case OMPC_DEPEND_mutexinoutset:
5323         DepKind = DepMutexInOutSet;
5324         break;
5325       case OMPC_DEPEND_source:
5326       case OMPC_DEPEND_sink:
5327       case OMPC_DEPEND_unknown:
5328         llvm_unreachable("Unknown task dependence type");
5329       }
5330       LValue FlagsLVal = CGF.EmitLValueForField(
5331           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5332       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5333                             FlagsLVal);
5334     }
5335     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5336         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5337   }
5338 
5339   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5340   // libcall.
5341   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5342   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5343   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5344   // list is not empty
5345   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5346   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5347   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5348   llvm::Value *DepTaskArgs[7];
5349   if (NumDependencies) {
5350     DepTaskArgs[0] = UpLoc;
5351     DepTaskArgs[1] = ThreadID;
5352     DepTaskArgs[2] = NewTask;
5353     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5354     DepTaskArgs[4] = DependenciesArray.getPointer();
5355     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5356     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5357   }
5358   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5359                         &TaskArgs,
5360                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5361     if (!Data.Tied) {
5362       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5363       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5364       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5365     }
5366     if (NumDependencies) {
5367       CGF.EmitRuntimeCall(
5368           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5369     } else {
5370       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5371                           TaskArgs);
5372     }
5373     // Check if parent region is untied and build return for untied task;
5374     if (auto *Region =
5375             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5376       Region->emitUntiedSwitch(CGF);
5377   };
5378 
5379   llvm::Value *DepWaitTaskArgs[6];
5380   if (NumDependencies) {
5381     DepWaitTaskArgs[0] = UpLoc;
5382     DepWaitTaskArgs[1] = ThreadID;
5383     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5384     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5385     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5386     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5387   }
5388   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5389                         NumDependencies, &DepWaitTaskArgs,
5390                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5391     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5392     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5393     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5394     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5395     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5396     // is specified.
5397     if (NumDependencies)
5398       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5399                           DepWaitTaskArgs);
5400     // Call proxy_task_entry(gtid, new_task);
5401     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5402                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5403       Action.Enter(CGF);
5404       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5405       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5406                                                           OutlinedFnArgs);
5407     };
5408 
5409     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5410     // kmp_task_t *new_task);
5411     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5412     // kmp_task_t *new_task);
5413     RegionCodeGenTy RCG(CodeGen);
5414     CommonActionTy Action(
5415         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5416         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5417     RCG.setAction(Action);
5418     RCG(CGF);
5419   };
5420 
5421   if (IfCond) {
5422     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5423   } else {
5424     RegionCodeGenTy ThenRCG(ThenCodeGen);
5425     ThenRCG(CGF);
5426   }
5427 }
5428 
5429 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5430                                        const OMPLoopDirective &D,
5431                                        llvm::Function *TaskFunction,
5432                                        QualType SharedsTy, Address Shareds,
5433                                        const Expr *IfCond,
5434                                        const OMPTaskDataTy &Data) {
5435   if (!CGF.HaveInsertPoint())
5436     return;
5437   TaskResultTy Result =
5438       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5439   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5440   // libcall.
5441   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5442   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5443   // sched, kmp_uint64 grainsize, void *task_dup);
5444   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5445   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5446   llvm::Value *IfVal;
5447   if (IfCond) {
5448     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5449                                       /*isSigned=*/true);
5450   } else {
5451     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5452   }
5453 
5454   LValue LBLVal = CGF.EmitLValueForField(
5455       Result.TDBase,
5456       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5457   const auto *LBVar =
5458       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5459   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5460                        LBLVal.getQuals(),
5461                        /*IsInitializer=*/true);
5462   LValue UBLVal = CGF.EmitLValueForField(
5463       Result.TDBase,
5464       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5465   const auto *UBVar =
5466       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5467   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5468                        UBLVal.getQuals(),
5469                        /*IsInitializer=*/true);
5470   LValue StLVal = CGF.EmitLValueForField(
5471       Result.TDBase,
5472       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5473   const auto *StVar =
5474       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5475   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5476                        StLVal.getQuals(),
5477                        /*IsInitializer=*/true);
5478   // Store reductions address.
5479   LValue RedLVal = CGF.EmitLValueForField(
5480       Result.TDBase,
5481       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5482   if (Data.Reductions) {
5483     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5484   } else {
5485     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5486                                CGF.getContext().VoidPtrTy);
5487   }
5488   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5489   llvm::Value *TaskArgs[] = {
5490       UpLoc,
5491       ThreadID,
5492       Result.NewTask,
5493       IfVal,
5494       LBLVal.getPointer(CGF),
5495       UBLVal.getPointer(CGF),
5496       CGF.EmitLoadOfScalar(StLVal, Loc),
5497       llvm::ConstantInt::getSigned(
5498           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5499       llvm::ConstantInt::getSigned(
5500           CGF.IntTy, Data.Schedule.getPointer()
5501                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5502                          : NoSchedule),
5503       Data.Schedule.getPointer()
5504           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5505                                       /*isSigned=*/false)
5506           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5507       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5508                              Result.TaskDupFn, CGF.VoidPtrTy)
5509                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5510   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5511 }
5512 
5513 /// Emit reduction operation for each element of array (required for
5514 /// array sections) LHS op = RHS.
5515 /// \param Type Type of array.
5516 /// \param LHSVar Variable on the left side of the reduction operation
5517 /// (references element of array in original variable).
5518 /// \param RHSVar Variable on the right side of the reduction operation
5519 /// (references element of array in original variable).
5520 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5521 /// RHSVar.
5522 static void EmitOMPAggregateReduction(
5523     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5524     const VarDecl *RHSVar,
5525     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5526                                   const Expr *, const Expr *)> &RedOpGen,
5527     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5528     const Expr *UpExpr = nullptr) {
5529   // Perform element-by-element initialization.
5530   QualType ElementTy;
5531   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5532   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5533 
5534   // Drill down to the base element type on both arrays.
5535   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5536   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5537 
5538   llvm::Value *RHSBegin = RHSAddr.getPointer();
5539   llvm::Value *LHSBegin = LHSAddr.getPointer();
5540   // Cast from pointer to array type to pointer to single element.
5541   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5542   // The basic structure here is a while-do loop.
5543   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5544   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5545   llvm::Value *IsEmpty =
5546       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5547   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5548 
5549   // Enter the loop body, making that address the current address.
5550   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5551   CGF.EmitBlock(BodyBB);
5552 
5553   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5554 
5555   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5556       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5557   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5558   Address RHSElementCurrent =
5559       Address(RHSElementPHI,
5560               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5561 
5562   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5563       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5564   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5565   Address LHSElementCurrent =
5566       Address(LHSElementPHI,
5567               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5568 
5569   // Emit copy.
5570   CodeGenFunction::OMPPrivateScope Scope(CGF);
5571   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5572   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5573   Scope.Privatize();
5574   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5575   Scope.ForceCleanup();
5576 
5577   // Shift the address forward by one element.
5578   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5579       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5580   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5581       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5582   // Check whether we've reached the end.
5583   llvm::Value *Done =
5584       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5585   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5586   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5587   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5588 
5589   // Done.
5590   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5591 }
5592 
5593 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5594 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5595 /// UDR combiner function.
5596 static void emitReductionCombiner(CodeGenFunction &CGF,
5597                                   const Expr *ReductionOp) {
5598   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5599     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5600       if (const auto *DRE =
5601               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5602         if (const auto *DRD =
5603                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5604           std::pair<llvm::Function *, llvm::Function *> Reduction =
5605               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5606           RValue Func = RValue::get(Reduction.first);
5607           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5608           CGF.EmitIgnoredExpr(ReductionOp);
5609           return;
5610         }
5611   CGF.EmitIgnoredExpr(ReductionOp);
5612 }
5613 
5614 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5615     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5616     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5617     ArrayRef<const Expr *> ReductionOps) {
5618   ASTContext &C = CGM.getContext();
5619 
5620   // void reduction_func(void *LHSArg, void *RHSArg);
5621   FunctionArgList Args;
5622   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5623                            ImplicitParamDecl::Other);
5624   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5625                            ImplicitParamDecl::Other);
5626   Args.push_back(&LHSArg);
5627   Args.push_back(&RHSArg);
5628   const auto &CGFI =
5629       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5630   std::string Name = getName({"omp", "reduction", "reduction_func"});
5631   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5632                                     llvm::GlobalValue::InternalLinkage, Name,
5633                                     &CGM.getModule());
5634   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5635   Fn->setDoesNotRecurse();
5636   CodeGenFunction CGF(CGM);
5637   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5638 
5639   // Dst = (void*[n])(LHSArg);
5640   // Src = (void*[n])(RHSArg);
5641   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5642       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5643       ArgsType), CGF.getPointerAlign());
5644   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5645       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5646       ArgsType), CGF.getPointerAlign());
5647 
5648   //  ...
5649   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5650   //  ...
5651   CodeGenFunction::OMPPrivateScope Scope(CGF);
5652   auto IPriv = Privates.begin();
5653   unsigned Idx = 0;
5654   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5655     const auto *RHSVar =
5656         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5657     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5658       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5659     });
5660     const auto *LHSVar =
5661         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5662     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5663       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5664     });
5665     QualType PrivTy = (*IPriv)->getType();
5666     if (PrivTy->isVariablyModifiedType()) {
5667       // Get array size and emit VLA type.
5668       ++Idx;
5669       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5670       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5671       const VariableArrayType *VLA =
5672           CGF.getContext().getAsVariableArrayType(PrivTy);
5673       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5674       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5675           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5676       CGF.EmitVariablyModifiedType(PrivTy);
5677     }
5678   }
5679   Scope.Privatize();
5680   IPriv = Privates.begin();
5681   auto ILHS = LHSExprs.begin();
5682   auto IRHS = RHSExprs.begin();
5683   for (const Expr *E : ReductionOps) {
5684     if ((*IPriv)->getType()->isArrayType()) {
5685       // Emit reduction for array section.
5686       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5687       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5688       EmitOMPAggregateReduction(
5689           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5690           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5691             emitReductionCombiner(CGF, E);
5692           });
5693     } else {
5694       // Emit reduction for array subscript or single variable.
5695       emitReductionCombiner(CGF, E);
5696     }
5697     ++IPriv;
5698     ++ILHS;
5699     ++IRHS;
5700   }
5701   Scope.ForceCleanup();
5702   CGF.FinishFunction();
5703   return Fn;
5704 }
5705 
5706 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5707                                                   const Expr *ReductionOp,
5708                                                   const Expr *PrivateRef,
5709                                                   const DeclRefExpr *LHS,
5710                                                   const DeclRefExpr *RHS) {
5711   if (PrivateRef->getType()->isArrayType()) {
5712     // Emit reduction for array section.
5713     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5714     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5715     EmitOMPAggregateReduction(
5716         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5717         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5718           emitReductionCombiner(CGF, ReductionOp);
5719         });
5720   } else {
5721     // Emit reduction for array subscript or single variable.
5722     emitReductionCombiner(CGF, ReductionOp);
5723   }
5724 }
5725 
5726 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5727                                     ArrayRef<const Expr *> Privates,
5728                                     ArrayRef<const Expr *> LHSExprs,
5729                                     ArrayRef<const Expr *> RHSExprs,
5730                                     ArrayRef<const Expr *> ReductionOps,
5731                                     ReductionOptionsTy Options) {
5732   if (!CGF.HaveInsertPoint())
5733     return;
5734 
5735   bool WithNowait = Options.WithNowait;
5736   bool SimpleReduction = Options.SimpleReduction;
5737 
5738   // Next code should be emitted for reduction:
5739   //
5740   // static kmp_critical_name lock = { 0 };
5741   //
5742   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5743   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5744   //  ...
5745   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5746   //  *(Type<n>-1*)rhs[<n>-1]);
5747   // }
5748   //
5749   // ...
5750   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5751   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5752   // RedList, reduce_func, &<lock>)) {
5753   // case 1:
5754   //  ...
5755   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5756   //  ...
5757   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5758   // break;
5759   // case 2:
5760   //  ...
5761   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5762   //  ...
5763   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5764   // break;
5765   // default:;
5766   // }
5767   //
5768   // if SimpleReduction is true, only the next code is generated:
5769   //  ...
5770   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5771   //  ...
5772 
5773   ASTContext &C = CGM.getContext();
5774 
5775   if (SimpleReduction) {
5776     CodeGenFunction::RunCleanupsScope Scope(CGF);
5777     auto IPriv = Privates.begin();
5778     auto ILHS = LHSExprs.begin();
5779     auto IRHS = RHSExprs.begin();
5780     for (const Expr *E : ReductionOps) {
5781       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5782                                   cast<DeclRefExpr>(*IRHS));
5783       ++IPriv;
5784       ++ILHS;
5785       ++IRHS;
5786     }
5787     return;
5788   }
5789 
5790   // 1. Build a list of reduction variables.
5791   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5792   auto Size = RHSExprs.size();
5793   for (const Expr *E : Privates) {
5794     if (E->getType()->isVariablyModifiedType())
5795       // Reserve place for array size.
5796       ++Size;
5797   }
5798   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5799   QualType ReductionArrayTy =
5800       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5801                              /*IndexTypeQuals=*/0);
5802   Address ReductionList =
5803       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5804   auto IPriv = Privates.begin();
5805   unsigned Idx = 0;
5806   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5807     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5808     CGF.Builder.CreateStore(
5809         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5810             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5811         Elem);
5812     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5813       // Store array size.
5814       ++Idx;
5815       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5816       llvm::Value *Size = CGF.Builder.CreateIntCast(
5817           CGF.getVLASize(
5818                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5819               .NumElts,
5820           CGF.SizeTy, /*isSigned=*/false);
5821       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5822                               Elem);
5823     }
5824   }
5825 
5826   // 2. Emit reduce_func().
5827   llvm::Function *ReductionFn = emitReductionFunction(
5828       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5829       LHSExprs, RHSExprs, ReductionOps);
5830 
5831   // 3. Create static kmp_critical_name lock = { 0 };
5832   std::string Name = getName({"reduction"});
5833   llvm::Value *Lock = getCriticalRegionLock(Name);
5834 
5835   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5836   // RedList, reduce_func, &<lock>);
5837   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5838   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5839   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5840   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5841       ReductionList.getPointer(), CGF.VoidPtrTy);
5842   llvm::Value *Args[] = {
5843       IdentTLoc,                             // ident_t *<loc>
5844       ThreadId,                              // i32 <gtid>
5845       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5846       ReductionArrayTySize,                  // size_type sizeof(RedList)
5847       RL,                                    // void *RedList
5848       ReductionFn, // void (*) (void *, void *) <reduce_func>
5849       Lock         // kmp_critical_name *&<lock>
5850   };
5851   llvm::Value *Res = CGF.EmitRuntimeCall(
5852       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5853                                        : OMPRTL__kmpc_reduce),
5854       Args);
5855 
5856   // 5. Build switch(res)
5857   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5858   llvm::SwitchInst *SwInst =
5859       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5860 
5861   // 6. Build case 1:
5862   //  ...
5863   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5864   //  ...
5865   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5866   // break;
5867   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5868   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5869   CGF.EmitBlock(Case1BB);
5870 
5871   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5872   llvm::Value *EndArgs[] = {
5873       IdentTLoc, // ident_t *<loc>
5874       ThreadId,  // i32 <gtid>
5875       Lock       // kmp_critical_name *&<lock>
5876   };
5877   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5878                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5879     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5880     auto IPriv = Privates.begin();
5881     auto ILHS = LHSExprs.begin();
5882     auto IRHS = RHSExprs.begin();
5883     for (const Expr *E : ReductionOps) {
5884       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5885                                      cast<DeclRefExpr>(*IRHS));
5886       ++IPriv;
5887       ++ILHS;
5888       ++IRHS;
5889     }
5890   };
5891   RegionCodeGenTy RCG(CodeGen);
5892   CommonActionTy Action(
5893       nullptr, llvm::None,
5894       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5895                                        : OMPRTL__kmpc_end_reduce),
5896       EndArgs);
5897   RCG.setAction(Action);
5898   RCG(CGF);
5899 
5900   CGF.EmitBranch(DefaultBB);
5901 
5902   // 7. Build case 2:
5903   //  ...
5904   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5905   //  ...
5906   // break;
5907   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5908   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5909   CGF.EmitBlock(Case2BB);
5910 
5911   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5912                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5913     auto ILHS = LHSExprs.begin();
5914     auto IRHS = RHSExprs.begin();
5915     auto IPriv = Privates.begin();
5916     for (const Expr *E : ReductionOps) {
5917       const Expr *XExpr = nullptr;
5918       const Expr *EExpr = nullptr;
5919       const Expr *UpExpr = nullptr;
5920       BinaryOperatorKind BO = BO_Comma;
5921       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5922         if (BO->getOpcode() == BO_Assign) {
5923           XExpr = BO->getLHS();
5924           UpExpr = BO->getRHS();
5925         }
5926       }
5927       // Try to emit update expression as a simple atomic.
5928       const Expr *RHSExpr = UpExpr;
5929       if (RHSExpr) {
5930         // Analyze RHS part of the whole expression.
5931         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5932                 RHSExpr->IgnoreParenImpCasts())) {
5933           // If this is a conditional operator, analyze its condition for
5934           // min/max reduction operator.
5935           RHSExpr = ACO->getCond();
5936         }
5937         if (const auto *BORHS =
5938                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5939           EExpr = BORHS->getRHS();
5940           BO = BORHS->getOpcode();
5941         }
5942       }
5943       if (XExpr) {
5944         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5945         auto &&AtomicRedGen = [BO, VD,
5946                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5947                                     const Expr *EExpr, const Expr *UpExpr) {
5948           LValue X = CGF.EmitLValue(XExpr);
5949           RValue E;
5950           if (EExpr)
5951             E = CGF.EmitAnyExpr(EExpr);
5952           CGF.EmitOMPAtomicSimpleUpdateExpr(
5953               X, E, BO, /*IsXLHSInRHSPart=*/true,
5954               llvm::AtomicOrdering::Monotonic, Loc,
5955               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5956                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5957                 PrivateScope.addPrivate(
5958                     VD, [&CGF, VD, XRValue, Loc]() {
5959                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5960                       CGF.emitOMPSimpleStore(
5961                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5962                           VD->getType().getNonReferenceType(), Loc);
5963                       return LHSTemp;
5964                     });
5965                 (void)PrivateScope.Privatize();
5966                 return CGF.EmitAnyExpr(UpExpr);
5967               });
5968         };
5969         if ((*IPriv)->getType()->isArrayType()) {
5970           // Emit atomic reduction for array section.
5971           const auto *RHSVar =
5972               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5973           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5974                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5975         } else {
5976           // Emit atomic reduction for array subscript or single variable.
5977           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5978         }
5979       } else {
5980         // Emit as a critical region.
5981         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5982                                            const Expr *, const Expr *) {
5983           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5984           std::string Name = RT.getName({"atomic_reduction"});
5985           RT.emitCriticalRegion(
5986               CGF, Name,
5987               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5988                 Action.Enter(CGF);
5989                 emitReductionCombiner(CGF, E);
5990               },
5991               Loc);
5992         };
5993         if ((*IPriv)->getType()->isArrayType()) {
5994           const auto *LHSVar =
5995               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5996           const auto *RHSVar =
5997               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5998           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5999                                     CritRedGen);
6000         } else {
6001           CritRedGen(CGF, nullptr, nullptr, nullptr);
6002         }
6003       }
6004       ++ILHS;
6005       ++IRHS;
6006       ++IPriv;
6007     }
6008   };
6009   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6010   if (!WithNowait) {
6011     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6012     llvm::Value *EndArgs[] = {
6013         IdentTLoc, // ident_t *<loc>
6014         ThreadId,  // i32 <gtid>
6015         Lock       // kmp_critical_name *&<lock>
6016     };
6017     CommonActionTy Action(nullptr, llvm::None,
6018                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6019                           EndArgs);
6020     AtomicRCG.setAction(Action);
6021     AtomicRCG(CGF);
6022   } else {
6023     AtomicRCG(CGF);
6024   }
6025 
6026   CGF.EmitBranch(DefaultBB);
6027   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6028 }
6029 
6030 /// Generates unique name for artificial threadprivate variables.
6031 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6032 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6033                                       const Expr *Ref) {
6034   SmallString<256> Buffer;
6035   llvm::raw_svector_ostream Out(Buffer);
6036   const clang::DeclRefExpr *DE;
6037   const VarDecl *D = ::getBaseDecl(Ref, DE);
6038   if (!D)
6039     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6040   D = D->getCanonicalDecl();
6041   std::string Name = CGM.getOpenMPRuntime().getName(
6042       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6043   Out << Prefix << Name << "_"
6044       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6045   return Out.str();
6046 }
6047 
6048 /// Emits reduction initializer function:
6049 /// \code
6050 /// void @.red_init(void* %arg) {
6051 /// %0 = bitcast void* %arg to <type>*
6052 /// store <type> <init>, <type>* %0
6053 /// ret void
6054 /// }
6055 /// \endcode
6056 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6057                                            SourceLocation Loc,
6058                                            ReductionCodeGen &RCG, unsigned N) {
6059   ASTContext &C = CGM.getContext();
6060   FunctionArgList Args;
6061   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6062                           ImplicitParamDecl::Other);
6063   Args.emplace_back(&Param);
6064   const auto &FnInfo =
6065       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6066   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6067   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6068   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6069                                     Name, &CGM.getModule());
6070   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6071   Fn->setDoesNotRecurse();
6072   CodeGenFunction CGF(CGM);
6073   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6074   Address PrivateAddr = CGF.EmitLoadOfPointer(
6075       CGF.GetAddrOfLocalVar(&Param),
6076       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6077   llvm::Value *Size = nullptr;
6078   // If the size of the reduction item is non-constant, load it from global
6079   // threadprivate variable.
6080   if (RCG.getSizes(N).second) {
6081     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6082         CGF, CGM.getContext().getSizeType(),
6083         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6084     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6085                                 CGM.getContext().getSizeType(), Loc);
6086   }
6087   RCG.emitAggregateType(CGF, N, Size);
6088   LValue SharedLVal;
6089   // If initializer uses initializer from declare reduction construct, emit a
6090   // pointer to the address of the original reduction item (reuired by reduction
6091   // initializer)
6092   if (RCG.usesReductionInitializer(N)) {
6093     Address SharedAddr =
6094         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6095             CGF, CGM.getContext().VoidPtrTy,
6096             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6097     SharedAddr = CGF.EmitLoadOfPointer(
6098         SharedAddr,
6099         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6100     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6101   } else {
6102     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6103         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6104         CGM.getContext().VoidPtrTy);
6105   }
6106   // Emit the initializer:
6107   // %0 = bitcast void* %arg to <type>*
6108   // store <type> <init>, <type>* %0
6109   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6110                          [](CodeGenFunction &) { return false; });
6111   CGF.FinishFunction();
6112   return Fn;
6113 }
6114 
6115 /// Emits reduction combiner function:
6116 /// \code
6117 /// void @.red_comb(void* %arg0, void* %arg1) {
6118 /// %lhs = bitcast void* %arg0 to <type>*
6119 /// %rhs = bitcast void* %arg1 to <type>*
6120 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6121 /// store <type> %2, <type>* %lhs
6122 /// ret void
6123 /// }
6124 /// \endcode
6125 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6126                                            SourceLocation Loc,
6127                                            ReductionCodeGen &RCG, unsigned N,
6128                                            const Expr *ReductionOp,
6129                                            const Expr *LHS, const Expr *RHS,
6130                                            const Expr *PrivateRef) {
6131   ASTContext &C = CGM.getContext();
6132   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6133   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6134   FunctionArgList Args;
6135   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6136                                C.VoidPtrTy, ImplicitParamDecl::Other);
6137   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6138                             ImplicitParamDecl::Other);
6139   Args.emplace_back(&ParamInOut);
6140   Args.emplace_back(&ParamIn);
6141   const auto &FnInfo =
6142       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6143   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6144   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6145   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6146                                     Name, &CGM.getModule());
6147   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6148   Fn->setDoesNotRecurse();
6149   CodeGenFunction CGF(CGM);
6150   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6151   llvm::Value *Size = nullptr;
6152   // If the size of the reduction item is non-constant, load it from global
6153   // threadprivate variable.
6154   if (RCG.getSizes(N).second) {
6155     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6156         CGF, CGM.getContext().getSizeType(),
6157         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6158     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6159                                 CGM.getContext().getSizeType(), Loc);
6160   }
6161   RCG.emitAggregateType(CGF, N, Size);
6162   // Remap lhs and rhs variables to the addresses of the function arguments.
6163   // %lhs = bitcast void* %arg0 to <type>*
6164   // %rhs = bitcast void* %arg1 to <type>*
6165   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6166   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6167     // Pull out the pointer to the variable.
6168     Address PtrAddr = CGF.EmitLoadOfPointer(
6169         CGF.GetAddrOfLocalVar(&ParamInOut),
6170         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6171     return CGF.Builder.CreateElementBitCast(
6172         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6173   });
6174   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6175     // Pull out the pointer to the variable.
6176     Address PtrAddr = CGF.EmitLoadOfPointer(
6177         CGF.GetAddrOfLocalVar(&ParamIn),
6178         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6179     return CGF.Builder.CreateElementBitCast(
6180         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6181   });
6182   PrivateScope.Privatize();
6183   // Emit the combiner body:
6184   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6185   // store <type> %2, <type>* %lhs
6186   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6187       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6188       cast<DeclRefExpr>(RHS));
6189   CGF.FinishFunction();
6190   return Fn;
6191 }
6192 
6193 /// Emits reduction finalizer function:
6194 /// \code
6195 /// void @.red_fini(void* %arg) {
6196 /// %0 = bitcast void* %arg to <type>*
6197 /// <destroy>(<type>* %0)
6198 /// ret void
6199 /// }
6200 /// \endcode
6201 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6202                                            SourceLocation Loc,
6203                                            ReductionCodeGen &RCG, unsigned N) {
6204   if (!RCG.needCleanups(N))
6205     return nullptr;
6206   ASTContext &C = CGM.getContext();
6207   FunctionArgList Args;
6208   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6209                           ImplicitParamDecl::Other);
6210   Args.emplace_back(&Param);
6211   const auto &FnInfo =
6212       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6213   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6214   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6215   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6216                                     Name, &CGM.getModule());
6217   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6218   Fn->setDoesNotRecurse();
6219   CodeGenFunction CGF(CGM);
6220   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6221   Address PrivateAddr = CGF.EmitLoadOfPointer(
6222       CGF.GetAddrOfLocalVar(&Param),
6223       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6224   llvm::Value *Size = nullptr;
6225   // If the size of the reduction item is non-constant, load it from global
6226   // threadprivate variable.
6227   if (RCG.getSizes(N).second) {
6228     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6229         CGF, CGM.getContext().getSizeType(),
6230         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6231     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6232                                 CGM.getContext().getSizeType(), Loc);
6233   }
6234   RCG.emitAggregateType(CGF, N, Size);
6235   // Emit the finalizer body:
6236   // <destroy>(<type>* %0)
6237   RCG.emitCleanups(CGF, N, PrivateAddr);
6238   CGF.FinishFunction(Loc);
6239   return Fn;
6240 }
6241 
6242 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6243     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6244     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6245   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6246     return nullptr;
6247 
6248   // Build typedef struct:
6249   // kmp_task_red_input {
6250   //   void *reduce_shar; // shared reduction item
6251   //   size_t reduce_size; // size of data item
6252   //   void *reduce_init; // data initialization routine
6253   //   void *reduce_fini; // data finalization routine
6254   //   void *reduce_comb; // data combiner routine
6255   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6256   // } kmp_task_red_input_t;
6257   ASTContext &C = CGM.getContext();
6258   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6259   RD->startDefinition();
6260   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6261   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6262   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6263   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6264   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6265   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6266       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6267   RD->completeDefinition();
6268   QualType RDType = C.getRecordType(RD);
6269   unsigned Size = Data.ReductionVars.size();
6270   llvm::APInt ArraySize(/*numBits=*/64, Size);
6271   QualType ArrayRDType = C.getConstantArrayType(
6272       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6273   // kmp_task_red_input_t .rd_input.[Size];
6274   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6275   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6276                        Data.ReductionOps);
6277   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6278     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6279     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6280                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6281     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6282         TaskRedInput.getPointer(), Idxs,
6283         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6284         ".rd_input.gep.");
6285     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6286     // ElemLVal.reduce_shar = &Shareds[Cnt];
6287     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6288     RCG.emitSharedLValue(CGF, Cnt);
6289     llvm::Value *CastedShared =
6290         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6291     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6292     RCG.emitAggregateType(CGF, Cnt);
6293     llvm::Value *SizeValInChars;
6294     llvm::Value *SizeVal;
6295     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6296     // We use delayed creation/initialization for VLAs, array sections and
6297     // custom reduction initializations. It is required because runtime does not
6298     // provide the way to pass the sizes of VLAs/array sections to
6299     // initializer/combiner/finalizer functions and does not pass the pointer to
6300     // original reduction item to the initializer. Instead threadprivate global
6301     // variables are used to store these values and use them in the functions.
6302     bool DelayedCreation = !!SizeVal;
6303     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6304                                                /*isSigned=*/false);
6305     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6306     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6307     // ElemLVal.reduce_init = init;
6308     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6309     llvm::Value *InitAddr =
6310         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6311     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6312     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6313     // ElemLVal.reduce_fini = fini;
6314     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6315     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6316     llvm::Value *FiniAddr = Fini
6317                                 ? CGF.EmitCastToVoidPtr(Fini)
6318                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6319     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6320     // ElemLVal.reduce_comb = comb;
6321     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6322     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6323         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6324         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6325     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6326     // ElemLVal.flags = 0;
6327     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6328     if (DelayedCreation) {
6329       CGF.EmitStoreOfScalar(
6330           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6331           FlagsLVal);
6332     } else
6333       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6334                                  FlagsLVal.getType());
6335   }
6336   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6337   // *data);
6338   llvm::Value *Args[] = {
6339       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6340                                 /*isSigned=*/true),
6341       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6342       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6343                                                       CGM.VoidPtrTy)};
6344   return CGF.EmitRuntimeCall(
6345       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6346 }
6347 
6348 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6349                                               SourceLocation Loc,
6350                                               ReductionCodeGen &RCG,
6351                                               unsigned N) {
6352   auto Sizes = RCG.getSizes(N);
6353   // Emit threadprivate global variable if the type is non-constant
6354   // (Sizes.second = nullptr).
6355   if (Sizes.second) {
6356     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6357                                                      /*isSigned=*/false);
6358     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6359         CGF, CGM.getContext().getSizeType(),
6360         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6361     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6362   }
6363   // Store address of the original reduction item if custom initializer is used.
6364   if (RCG.usesReductionInitializer(N)) {
6365     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6366         CGF, CGM.getContext().VoidPtrTy,
6367         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6368     CGF.Builder.CreateStore(
6369         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6370             RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6371         SharedAddr, /*IsVolatile=*/false);
6372   }
6373 }
6374 
6375 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6376                                               SourceLocation Loc,
6377                                               llvm::Value *ReductionsPtr,
6378                                               LValue SharedLVal) {
6379   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6380   // *d);
6381   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6382                                                    CGM.IntTy,
6383                                                    /*isSigned=*/true),
6384                          ReductionsPtr,
6385                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6386                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6387   return Address(
6388       CGF.EmitRuntimeCall(
6389           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6390       SharedLVal.getAlignment());
6391 }
6392 
6393 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6394                                        SourceLocation Loc) {
6395   if (!CGF.HaveInsertPoint())
6396     return;
6397   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6398   // global_tid);
6399   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6400   // Ignore return result until untied tasks are supported.
6401   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6402   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6403     Region->emitUntiedSwitch(CGF);
6404 }
6405 
6406 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6407                                            OpenMPDirectiveKind InnerKind,
6408                                            const RegionCodeGenTy &CodeGen,
6409                                            bool HasCancel) {
6410   if (!CGF.HaveInsertPoint())
6411     return;
6412   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6413   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6414 }
6415 
6416 namespace {
6417 enum RTCancelKind {
6418   CancelNoreq = 0,
6419   CancelParallel = 1,
6420   CancelLoop = 2,
6421   CancelSections = 3,
6422   CancelTaskgroup = 4
6423 };
6424 } // anonymous namespace
6425 
6426 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6427   RTCancelKind CancelKind = CancelNoreq;
6428   if (CancelRegion == OMPD_parallel)
6429     CancelKind = CancelParallel;
6430   else if (CancelRegion == OMPD_for)
6431     CancelKind = CancelLoop;
6432   else if (CancelRegion == OMPD_sections)
6433     CancelKind = CancelSections;
6434   else {
6435     assert(CancelRegion == OMPD_taskgroup);
6436     CancelKind = CancelTaskgroup;
6437   }
6438   return CancelKind;
6439 }
6440 
6441 void CGOpenMPRuntime::emitCancellationPointCall(
6442     CodeGenFunction &CGF, SourceLocation Loc,
6443     OpenMPDirectiveKind CancelRegion) {
6444   if (!CGF.HaveInsertPoint())
6445     return;
6446   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6447   // global_tid, kmp_int32 cncl_kind);
6448   if (auto *OMPRegionInfo =
6449           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6450     // For 'cancellation point taskgroup', the task region info may not have a
6451     // cancel. This may instead happen in another adjacent task.
6452     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6453       llvm::Value *Args[] = {
6454           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6455           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6456       // Ignore return result until untied tasks are supported.
6457       llvm::Value *Result = CGF.EmitRuntimeCall(
6458           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6459       // if (__kmpc_cancellationpoint()) {
6460       //   exit from construct;
6461       // }
6462       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6463       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6464       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6465       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6466       CGF.EmitBlock(ExitBB);
6467       // exit from construct;
6468       CodeGenFunction::JumpDest CancelDest =
6469           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6470       CGF.EmitBranchThroughCleanup(CancelDest);
6471       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6472     }
6473   }
6474 }
6475 
6476 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6477                                      const Expr *IfCond,
6478                                      OpenMPDirectiveKind CancelRegion) {
6479   if (!CGF.HaveInsertPoint())
6480     return;
6481   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6482   // kmp_int32 cncl_kind);
6483   if (auto *OMPRegionInfo =
6484           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6485     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6486                                                         PrePostActionTy &) {
6487       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6488       llvm::Value *Args[] = {
6489           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6490           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6491       // Ignore return result until untied tasks are supported.
6492       llvm::Value *Result = CGF.EmitRuntimeCall(
6493           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6494       // if (__kmpc_cancel()) {
6495       //   exit from construct;
6496       // }
6497       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6498       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6499       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6500       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6501       CGF.EmitBlock(ExitBB);
6502       // exit from construct;
6503       CodeGenFunction::JumpDest CancelDest =
6504           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6505       CGF.EmitBranchThroughCleanup(CancelDest);
6506       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6507     };
6508     if (IfCond) {
6509       emitIfClause(CGF, IfCond, ThenGen,
6510                    [](CodeGenFunction &, PrePostActionTy &) {});
6511     } else {
6512       RegionCodeGenTy ThenRCG(ThenGen);
6513       ThenRCG(CGF);
6514     }
6515   }
6516 }
6517 
6518 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6519     const OMPExecutableDirective &D, StringRef ParentName,
6520     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6521     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6522   assert(!ParentName.empty() && "Invalid target region parent name!");
6523   HasEmittedTargetRegion = true;
6524   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6525                                    IsOffloadEntry, CodeGen);
6526 }
6527 
6528 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6529     const OMPExecutableDirective &D, StringRef ParentName,
6530     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6531     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6532   // Create a unique name for the entry function using the source location
6533   // information of the current target region. The name will be something like:
6534   //
6535   // __omp_offloading_DD_FFFF_PP_lBB
6536   //
6537   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6538   // mangled name of the function that encloses the target region and BB is the
6539   // line number of the target region.
6540 
6541   unsigned DeviceID;
6542   unsigned FileID;
6543   unsigned Line;
6544   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6545                            Line);
6546   SmallString<64> EntryFnName;
6547   {
6548     llvm::raw_svector_ostream OS(EntryFnName);
6549     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6550        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6551   }
6552 
6553   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6554 
6555   CodeGenFunction CGF(CGM, true);
6556   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6557   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6558 
6559   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6560 
6561   // If this target outline function is not an offload entry, we don't need to
6562   // register it.
6563   if (!IsOffloadEntry)
6564     return;
6565 
6566   // The target region ID is used by the runtime library to identify the current
6567   // target region, so it only has to be unique and not necessarily point to
6568   // anything. It could be the pointer to the outlined function that implements
6569   // the target region, but we aren't using that so that the compiler doesn't
6570   // need to keep that, and could therefore inline the host function if proven
6571   // worthwhile during optimization. In the other hand, if emitting code for the
6572   // device, the ID has to be the function address so that it can retrieved from
6573   // the offloading entry and launched by the runtime library. We also mark the
6574   // outlined function to have external linkage in case we are emitting code for
6575   // the device, because these functions will be entry points to the device.
6576 
6577   if (CGM.getLangOpts().OpenMPIsDevice) {
6578     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6579     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6580     OutlinedFn->setDSOLocal(false);
6581   } else {
6582     std::string Name = getName({EntryFnName, "region_id"});
6583     OutlinedFnID = new llvm::GlobalVariable(
6584         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6585         llvm::GlobalValue::WeakAnyLinkage,
6586         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6587   }
6588 
6589   // Register the information for the entry associated with this target region.
6590   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6591       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6592       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6593 }
6594 
6595 /// Checks if the expression is constant or does not have non-trivial function
6596 /// calls.
6597 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6598   // We can skip constant expressions.
6599   // We can skip expressions with trivial calls or simple expressions.
6600   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6601           !E->hasNonTrivialCall(Ctx)) &&
6602          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6603 }
6604 
6605 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6606                                                     const Stmt *Body) {
6607   const Stmt *Child = Body->IgnoreContainers();
6608   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6609     Child = nullptr;
6610     for (const Stmt *S : C->body()) {
6611       if (const auto *E = dyn_cast<Expr>(S)) {
6612         if (isTrivial(Ctx, E))
6613           continue;
6614       }
6615       // Some of the statements can be ignored.
6616       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6617           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6618         continue;
6619       // Analyze declarations.
6620       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6621         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6622               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6623                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6624                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6625                   isa<UsingDirectiveDecl>(D) ||
6626                   isa<OMPDeclareReductionDecl>(D) ||
6627                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6628                 return true;
6629               const auto *VD = dyn_cast<VarDecl>(D);
6630               if (!VD)
6631                 return false;
6632               return VD->isConstexpr() ||
6633                      ((VD->getType().isTrivialType(Ctx) ||
6634                        VD->getType()->isReferenceType()) &&
6635                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6636             }))
6637           continue;
6638       }
6639       // Found multiple children - cannot get the one child only.
6640       if (Child)
6641         return nullptr;
6642       Child = S;
6643     }
6644     if (Child)
6645       Child = Child->IgnoreContainers();
6646   }
6647   return Child;
6648 }
6649 
6650 /// Emit the number of teams for a target directive.  Inspect the num_teams
6651 /// clause associated with a teams construct combined or closely nested
6652 /// with the target directive.
6653 ///
6654 /// Emit a team of size one for directives such as 'target parallel' that
6655 /// have no associated teams construct.
6656 ///
6657 /// Otherwise, return nullptr.
6658 static llvm::Value *
6659 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6660                                const OMPExecutableDirective &D) {
6661   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6662          "Clauses associated with the teams directive expected to be emitted "
6663          "only for the host!");
6664   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6665   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6666          "Expected target-based executable directive.");
6667   CGBuilderTy &Bld = CGF.Builder;
6668   switch (DirectiveKind) {
6669   case OMPD_target: {
6670     const auto *CS = D.getInnermostCapturedStmt();
6671     const auto *Body =
6672         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6673     const Stmt *ChildStmt =
6674         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6675     if (const auto *NestedDir =
6676             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6677       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6678         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6679           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6680           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6681           const Expr *NumTeams =
6682               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6683           llvm::Value *NumTeamsVal =
6684               CGF.EmitScalarExpr(NumTeams,
6685                                  /*IgnoreResultAssign*/ true);
6686           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6687                                    /*isSigned=*/true);
6688         }
6689         return Bld.getInt32(0);
6690       }
6691       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6692           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6693         return Bld.getInt32(1);
6694       return Bld.getInt32(0);
6695     }
6696     return nullptr;
6697   }
6698   case OMPD_target_teams:
6699   case OMPD_target_teams_distribute:
6700   case OMPD_target_teams_distribute_simd:
6701   case OMPD_target_teams_distribute_parallel_for:
6702   case OMPD_target_teams_distribute_parallel_for_simd: {
6703     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6704       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6705       const Expr *NumTeams =
6706           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6707       llvm::Value *NumTeamsVal =
6708           CGF.EmitScalarExpr(NumTeams,
6709                              /*IgnoreResultAssign*/ true);
6710       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6711                                /*isSigned=*/true);
6712     }
6713     return Bld.getInt32(0);
6714   }
6715   case OMPD_target_parallel:
6716   case OMPD_target_parallel_for:
6717   case OMPD_target_parallel_for_simd:
6718   case OMPD_target_simd:
6719     return Bld.getInt32(1);
6720   case OMPD_parallel:
6721   case OMPD_for:
6722   case OMPD_parallel_for:
6723   case OMPD_parallel_master:
6724   case OMPD_parallel_sections:
6725   case OMPD_for_simd:
6726   case OMPD_parallel_for_simd:
6727   case OMPD_cancel:
6728   case OMPD_cancellation_point:
6729   case OMPD_ordered:
6730   case OMPD_threadprivate:
6731   case OMPD_allocate:
6732   case OMPD_task:
6733   case OMPD_simd:
6734   case OMPD_sections:
6735   case OMPD_section:
6736   case OMPD_single:
6737   case OMPD_master:
6738   case OMPD_critical:
6739   case OMPD_taskyield:
6740   case OMPD_barrier:
6741   case OMPD_taskwait:
6742   case OMPD_taskgroup:
6743   case OMPD_atomic:
6744   case OMPD_flush:
6745   case OMPD_teams:
6746   case OMPD_target_data:
6747   case OMPD_target_exit_data:
6748   case OMPD_target_enter_data:
6749   case OMPD_distribute:
6750   case OMPD_distribute_simd:
6751   case OMPD_distribute_parallel_for:
6752   case OMPD_distribute_parallel_for_simd:
6753   case OMPD_teams_distribute:
6754   case OMPD_teams_distribute_simd:
6755   case OMPD_teams_distribute_parallel_for:
6756   case OMPD_teams_distribute_parallel_for_simd:
6757   case OMPD_target_update:
6758   case OMPD_declare_simd:
6759   case OMPD_declare_variant:
6760   case OMPD_declare_target:
6761   case OMPD_end_declare_target:
6762   case OMPD_declare_reduction:
6763   case OMPD_declare_mapper:
6764   case OMPD_taskloop:
6765   case OMPD_taskloop_simd:
6766   case OMPD_master_taskloop:
6767   case OMPD_master_taskloop_simd:
6768   case OMPD_parallel_master_taskloop:
6769   case OMPD_parallel_master_taskloop_simd:
6770   case OMPD_requires:
6771   case OMPD_unknown:
6772     break;
6773   }
6774   llvm_unreachable("Unexpected directive kind.");
6775 }
6776 
6777 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6778                                   llvm::Value *DefaultThreadLimitVal) {
6779   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6780       CGF.getContext(), CS->getCapturedStmt());
6781   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6782     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6783       llvm::Value *NumThreads = nullptr;
6784       llvm::Value *CondVal = nullptr;
6785       // Handle if clause. If if clause present, the number of threads is
6786       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6787       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6788         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6789         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6790         const OMPIfClause *IfClause = nullptr;
6791         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6792           if (C->getNameModifier() == OMPD_unknown ||
6793               C->getNameModifier() == OMPD_parallel) {
6794             IfClause = C;
6795             break;
6796           }
6797         }
6798         if (IfClause) {
6799           const Expr *Cond = IfClause->getCondition();
6800           bool Result;
6801           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6802             if (!Result)
6803               return CGF.Builder.getInt32(1);
6804           } else {
6805             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6806             if (const auto *PreInit =
6807                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6808               for (const auto *I : PreInit->decls()) {
6809                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6810                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6811                 } else {
6812                   CodeGenFunction::AutoVarEmission Emission =
6813                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6814                   CGF.EmitAutoVarCleanups(Emission);
6815                 }
6816               }
6817             }
6818             CondVal = CGF.EvaluateExprAsBool(Cond);
6819           }
6820         }
6821       }
6822       // Check the value of num_threads clause iff if clause was not specified
6823       // or is not evaluated to false.
6824       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6825         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6826         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6827         const auto *NumThreadsClause =
6828             Dir->getSingleClause<OMPNumThreadsClause>();
6829         CodeGenFunction::LexicalScope Scope(
6830             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6831         if (const auto *PreInit =
6832                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6833           for (const auto *I : PreInit->decls()) {
6834             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6835               CGF.EmitVarDecl(cast<VarDecl>(*I));
6836             } else {
6837               CodeGenFunction::AutoVarEmission Emission =
6838                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6839               CGF.EmitAutoVarCleanups(Emission);
6840             }
6841           }
6842         }
6843         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6844         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6845                                                /*isSigned=*/false);
6846         if (DefaultThreadLimitVal)
6847           NumThreads = CGF.Builder.CreateSelect(
6848               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6849               DefaultThreadLimitVal, NumThreads);
6850       } else {
6851         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6852                                            : CGF.Builder.getInt32(0);
6853       }
6854       // Process condition of the if clause.
6855       if (CondVal) {
6856         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6857                                               CGF.Builder.getInt32(1));
6858       }
6859       return NumThreads;
6860     }
6861     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6862       return CGF.Builder.getInt32(1);
6863     return DefaultThreadLimitVal;
6864   }
6865   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6866                                : CGF.Builder.getInt32(0);
6867 }
6868 
6869 /// Emit the number of threads for a target directive.  Inspect the
6870 /// thread_limit clause associated with a teams construct combined or closely
6871 /// nested with the target directive.
6872 ///
6873 /// Emit the num_threads clause for directives such as 'target parallel' that
6874 /// have no associated teams construct.
6875 ///
6876 /// Otherwise, return nullptr.
6877 static llvm::Value *
6878 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6879                                  const OMPExecutableDirective &D) {
6880   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6881          "Clauses associated with the teams directive expected to be emitted "
6882          "only for the host!");
6883   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6884   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6885          "Expected target-based executable directive.");
6886   CGBuilderTy &Bld = CGF.Builder;
6887   llvm::Value *ThreadLimitVal = nullptr;
6888   llvm::Value *NumThreadsVal = nullptr;
6889   switch (DirectiveKind) {
6890   case OMPD_target: {
6891     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6892     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6893       return NumThreads;
6894     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6895         CGF.getContext(), CS->getCapturedStmt());
6896     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6897       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6898         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6899         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6900         const auto *ThreadLimitClause =
6901             Dir->getSingleClause<OMPThreadLimitClause>();
6902         CodeGenFunction::LexicalScope Scope(
6903             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6904         if (const auto *PreInit =
6905                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6906           for (const auto *I : PreInit->decls()) {
6907             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6908               CGF.EmitVarDecl(cast<VarDecl>(*I));
6909             } else {
6910               CodeGenFunction::AutoVarEmission Emission =
6911                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6912               CGF.EmitAutoVarCleanups(Emission);
6913             }
6914           }
6915         }
6916         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6917             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6918         ThreadLimitVal =
6919             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6920       }
6921       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6922           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6923         CS = Dir->getInnermostCapturedStmt();
6924         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6925             CGF.getContext(), CS->getCapturedStmt());
6926         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6927       }
6928       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6929           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6930         CS = Dir->getInnermostCapturedStmt();
6931         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6932           return NumThreads;
6933       }
6934       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6935         return Bld.getInt32(1);
6936     }
6937     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6938   }
6939   case OMPD_target_teams: {
6940     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6941       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6942       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6943       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6944           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6945       ThreadLimitVal =
6946           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6947     }
6948     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6949     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6950       return NumThreads;
6951     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6952         CGF.getContext(), CS->getCapturedStmt());
6953     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6954       if (Dir->getDirectiveKind() == OMPD_distribute) {
6955         CS = Dir->getInnermostCapturedStmt();
6956         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6957           return NumThreads;
6958       }
6959     }
6960     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6961   }
6962   case OMPD_target_teams_distribute:
6963     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6964       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6965       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6966       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6967           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6968       ThreadLimitVal =
6969           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6970     }
6971     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6972   case OMPD_target_parallel:
6973   case OMPD_target_parallel_for:
6974   case OMPD_target_parallel_for_simd:
6975   case OMPD_target_teams_distribute_parallel_for:
6976   case OMPD_target_teams_distribute_parallel_for_simd: {
6977     llvm::Value *CondVal = nullptr;
6978     // Handle if clause. If if clause present, the number of threads is
6979     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6980     if (D.hasClausesOfKind<OMPIfClause>()) {
6981       const OMPIfClause *IfClause = nullptr;
6982       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6983         if (C->getNameModifier() == OMPD_unknown ||
6984             C->getNameModifier() == OMPD_parallel) {
6985           IfClause = C;
6986           break;
6987         }
6988       }
6989       if (IfClause) {
6990         const Expr *Cond = IfClause->getCondition();
6991         bool Result;
6992         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6993           if (!Result)
6994             return Bld.getInt32(1);
6995         } else {
6996           CodeGenFunction::RunCleanupsScope Scope(CGF);
6997           CondVal = CGF.EvaluateExprAsBool(Cond);
6998         }
6999       }
7000     }
7001     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7002       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7003       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7004       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7005           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7006       ThreadLimitVal =
7007           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7008     }
7009     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7010       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7011       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7012       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7013           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7014       NumThreadsVal =
7015           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7016       ThreadLimitVal = ThreadLimitVal
7017                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7018                                                                 ThreadLimitVal),
7019                                               NumThreadsVal, ThreadLimitVal)
7020                            : NumThreadsVal;
7021     }
7022     if (!ThreadLimitVal)
7023       ThreadLimitVal = Bld.getInt32(0);
7024     if (CondVal)
7025       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7026     return ThreadLimitVal;
7027   }
7028   case OMPD_target_teams_distribute_simd:
7029   case OMPD_target_simd:
7030     return Bld.getInt32(1);
7031   case OMPD_parallel:
7032   case OMPD_for:
7033   case OMPD_parallel_for:
7034   case OMPD_parallel_master:
7035   case OMPD_parallel_sections:
7036   case OMPD_for_simd:
7037   case OMPD_parallel_for_simd:
7038   case OMPD_cancel:
7039   case OMPD_cancellation_point:
7040   case OMPD_ordered:
7041   case OMPD_threadprivate:
7042   case OMPD_allocate:
7043   case OMPD_task:
7044   case OMPD_simd:
7045   case OMPD_sections:
7046   case OMPD_section:
7047   case OMPD_single:
7048   case OMPD_master:
7049   case OMPD_critical:
7050   case OMPD_taskyield:
7051   case OMPD_barrier:
7052   case OMPD_taskwait:
7053   case OMPD_taskgroup:
7054   case OMPD_atomic:
7055   case OMPD_flush:
7056   case OMPD_teams:
7057   case OMPD_target_data:
7058   case OMPD_target_exit_data:
7059   case OMPD_target_enter_data:
7060   case OMPD_distribute:
7061   case OMPD_distribute_simd:
7062   case OMPD_distribute_parallel_for:
7063   case OMPD_distribute_parallel_for_simd:
7064   case OMPD_teams_distribute:
7065   case OMPD_teams_distribute_simd:
7066   case OMPD_teams_distribute_parallel_for:
7067   case OMPD_teams_distribute_parallel_for_simd:
7068   case OMPD_target_update:
7069   case OMPD_declare_simd:
7070   case OMPD_declare_variant:
7071   case OMPD_declare_target:
7072   case OMPD_end_declare_target:
7073   case OMPD_declare_reduction:
7074   case OMPD_declare_mapper:
7075   case OMPD_taskloop:
7076   case OMPD_taskloop_simd:
7077   case OMPD_master_taskloop:
7078   case OMPD_master_taskloop_simd:
7079   case OMPD_parallel_master_taskloop:
7080   case OMPD_parallel_master_taskloop_simd:
7081   case OMPD_requires:
7082   case OMPD_unknown:
7083     break;
7084   }
7085   llvm_unreachable("Unsupported directive kind.");
7086 }
7087 
7088 namespace {
7089 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7090 
7091 // Utility to handle information from clauses associated with a given
7092 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7093 // It provides a convenient interface to obtain the information and generate
7094 // code for that information.
7095 class MappableExprsHandler {
7096 public:
7097   /// Values for bit flags used to specify the mapping type for
7098   /// offloading.
7099   enum OpenMPOffloadMappingFlags : uint64_t {
7100     /// No flags
7101     OMP_MAP_NONE = 0x0,
7102     /// Allocate memory on the device and move data from host to device.
7103     OMP_MAP_TO = 0x01,
7104     /// Allocate memory on the device and move data from device to host.
7105     OMP_MAP_FROM = 0x02,
7106     /// Always perform the requested mapping action on the element, even
7107     /// if it was already mapped before.
7108     OMP_MAP_ALWAYS = 0x04,
7109     /// Delete the element from the device environment, ignoring the
7110     /// current reference count associated with the element.
7111     OMP_MAP_DELETE = 0x08,
7112     /// The element being mapped is a pointer-pointee pair; both the
7113     /// pointer and the pointee should be mapped.
7114     OMP_MAP_PTR_AND_OBJ = 0x10,
7115     /// This flags signals that the base address of an entry should be
7116     /// passed to the target kernel as an argument.
7117     OMP_MAP_TARGET_PARAM = 0x20,
7118     /// Signal that the runtime library has to return the device pointer
7119     /// in the current position for the data being mapped. Used when we have the
7120     /// use_device_ptr clause.
7121     OMP_MAP_RETURN_PARAM = 0x40,
7122     /// This flag signals that the reference being passed is a pointer to
7123     /// private data.
7124     OMP_MAP_PRIVATE = 0x80,
7125     /// Pass the element to the device by value.
7126     OMP_MAP_LITERAL = 0x100,
7127     /// Implicit map
7128     OMP_MAP_IMPLICIT = 0x200,
7129     /// Close is a hint to the runtime to allocate memory close to
7130     /// the target device.
7131     OMP_MAP_CLOSE = 0x400,
7132     /// The 16 MSBs of the flags indicate whether the entry is member of some
7133     /// struct/class.
7134     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7135     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7136   };
7137 
7138   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7139   static unsigned getFlagMemberOffset() {
7140     unsigned Offset = 0;
7141     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7142          Remain = Remain >> 1)
7143       Offset++;
7144     return Offset;
7145   }
7146 
7147   /// Class that associates information with a base pointer to be passed to the
7148   /// runtime library.
7149   class BasePointerInfo {
7150     /// The base pointer.
7151     llvm::Value *Ptr = nullptr;
7152     /// The base declaration that refers to this device pointer, or null if
7153     /// there is none.
7154     const ValueDecl *DevPtrDecl = nullptr;
7155 
7156   public:
7157     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7158         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7159     llvm::Value *operator*() const { return Ptr; }
7160     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7161     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7162   };
7163 
7164   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7165   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7166   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7167 
7168   /// Map between a struct and the its lowest & highest elements which have been
7169   /// mapped.
7170   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7171   ///                    HE(FieldIndex, Pointer)}
7172   struct StructRangeInfoTy {
7173     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7174         0, Address::invalid()};
7175     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7176         0, Address::invalid()};
7177     Address Base = Address::invalid();
7178   };
7179 
7180 private:
7181   /// Kind that defines how a device pointer has to be returned.
7182   struct MapInfo {
7183     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7184     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7185     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7186     bool ReturnDevicePointer = false;
7187     bool IsImplicit = false;
7188 
7189     MapInfo() = default;
7190     MapInfo(
7191         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7192         OpenMPMapClauseKind MapType,
7193         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7194         bool ReturnDevicePointer, bool IsImplicit)
7195         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7196           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7197   };
7198 
7199   /// If use_device_ptr is used on a pointer which is a struct member and there
7200   /// is no map information about it, then emission of that entry is deferred
7201   /// until the whole struct has been processed.
7202   struct DeferredDevicePtrEntryTy {
7203     const Expr *IE = nullptr;
7204     const ValueDecl *VD = nullptr;
7205 
7206     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7207         : IE(IE), VD(VD) {}
7208   };
7209 
7210   /// The target directive from where the mappable clauses were extracted. It
7211   /// is either a executable directive or a user-defined mapper directive.
7212   llvm::PointerUnion<const OMPExecutableDirective *,
7213                      const OMPDeclareMapperDecl *>
7214       CurDir;
7215 
7216   /// Function the directive is being generated for.
7217   CodeGenFunction &CGF;
7218 
7219   /// Set of all first private variables in the current directive.
7220   /// bool data is set to true if the variable is implicitly marked as
7221   /// firstprivate, false otherwise.
7222   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7223 
7224   /// Map between device pointer declarations and their expression components.
7225   /// The key value for declarations in 'this' is null.
7226   llvm::DenseMap<
7227       const ValueDecl *,
7228       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7229       DevPointersMap;
7230 
7231   llvm::Value *getExprTypeSize(const Expr *E) const {
7232     QualType ExprTy = E->getType().getCanonicalType();
7233 
7234     // Reference types are ignored for mapping purposes.
7235     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7236       ExprTy = RefTy->getPointeeType().getCanonicalType();
7237 
7238     // Given that an array section is considered a built-in type, we need to
7239     // do the calculation based on the length of the section instead of relying
7240     // on CGF.getTypeSize(E->getType()).
7241     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7242       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7243                             OAE->getBase()->IgnoreParenImpCasts())
7244                             .getCanonicalType();
7245 
7246       // If there is no length associated with the expression and lower bound is
7247       // not specified too, that means we are using the whole length of the
7248       // base.
7249       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7250           !OAE->getLowerBound())
7251         return CGF.getTypeSize(BaseTy);
7252 
7253       llvm::Value *ElemSize;
7254       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7255         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7256       } else {
7257         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7258         assert(ATy && "Expecting array type if not a pointer type.");
7259         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7260       }
7261 
7262       // If we don't have a length at this point, that is because we have an
7263       // array section with a single element.
7264       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7265         return ElemSize;
7266 
7267       if (const Expr *LenExpr = OAE->getLength()) {
7268         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7269         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7270                                              CGF.getContext().getSizeType(),
7271                                              LenExpr->getExprLoc());
7272         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7273       }
7274       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7275              OAE->getLowerBound() && "expected array_section[lb:].");
7276       // Size = sizetype - lb * elemtype;
7277       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7278       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7279       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7280                                        CGF.getContext().getSizeType(),
7281                                        OAE->getLowerBound()->getExprLoc());
7282       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7283       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7284       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7285       LengthVal = CGF.Builder.CreateSelect(
7286           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7287       return LengthVal;
7288     }
7289     return CGF.getTypeSize(ExprTy);
7290   }
7291 
7292   /// Return the corresponding bits for a given map clause modifier. Add
7293   /// a flag marking the map as a pointer if requested. Add a flag marking the
7294   /// map as the first one of a series of maps that relate to the same map
7295   /// expression.
7296   OpenMPOffloadMappingFlags getMapTypeBits(
7297       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7298       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7299     OpenMPOffloadMappingFlags Bits =
7300         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7301     switch (MapType) {
7302     case OMPC_MAP_alloc:
7303     case OMPC_MAP_release:
7304       // alloc and release is the default behavior in the runtime library,  i.e.
7305       // if we don't pass any bits alloc/release that is what the runtime is
7306       // going to do. Therefore, we don't need to signal anything for these two
7307       // type modifiers.
7308       break;
7309     case OMPC_MAP_to:
7310       Bits |= OMP_MAP_TO;
7311       break;
7312     case OMPC_MAP_from:
7313       Bits |= OMP_MAP_FROM;
7314       break;
7315     case OMPC_MAP_tofrom:
7316       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7317       break;
7318     case OMPC_MAP_delete:
7319       Bits |= OMP_MAP_DELETE;
7320       break;
7321     case OMPC_MAP_unknown:
7322       llvm_unreachable("Unexpected map type!");
7323     }
7324     if (AddPtrFlag)
7325       Bits |= OMP_MAP_PTR_AND_OBJ;
7326     if (AddIsTargetParamFlag)
7327       Bits |= OMP_MAP_TARGET_PARAM;
7328     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7329         != MapModifiers.end())
7330       Bits |= OMP_MAP_ALWAYS;
7331     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7332         != MapModifiers.end())
7333       Bits |= OMP_MAP_CLOSE;
7334     return Bits;
7335   }
7336 
7337   /// Return true if the provided expression is a final array section. A
7338   /// final array section, is one whose length can't be proved to be one.
7339   bool isFinalArraySectionExpression(const Expr *E) const {
7340     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7341 
7342     // It is not an array section and therefore not a unity-size one.
7343     if (!OASE)
7344       return false;
7345 
7346     // An array section with no colon always refer to a single element.
7347     if (OASE->getColonLoc().isInvalid())
7348       return false;
7349 
7350     const Expr *Length = OASE->getLength();
7351 
7352     // If we don't have a length we have to check if the array has size 1
7353     // for this dimension. Also, we should always expect a length if the
7354     // base type is pointer.
7355     if (!Length) {
7356       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7357                              OASE->getBase()->IgnoreParenImpCasts())
7358                              .getCanonicalType();
7359       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7360         return ATy->getSize().getSExtValue() != 1;
7361       // If we don't have a constant dimension length, we have to consider
7362       // the current section as having any size, so it is not necessarily
7363       // unitary. If it happen to be unity size, that's user fault.
7364       return true;
7365     }
7366 
7367     // Check if the length evaluates to 1.
7368     Expr::EvalResult Result;
7369     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7370       return true; // Can have more that size 1.
7371 
7372     llvm::APSInt ConstLength = Result.Val.getInt();
7373     return ConstLength.getSExtValue() != 1;
7374   }
7375 
7376   /// Generate the base pointers, section pointers, sizes and map type
7377   /// bits for the provided map type, map modifier, and expression components.
7378   /// \a IsFirstComponent should be set to true if the provided set of
7379   /// components is the first associated with a capture.
7380   void generateInfoForComponentList(
7381       OpenMPMapClauseKind MapType,
7382       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7383       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7384       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7385       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7386       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7387       bool IsImplicit,
7388       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7389           OverlappedElements = llvm::None) const {
7390     // The following summarizes what has to be generated for each map and the
7391     // types below. The generated information is expressed in this order:
7392     // base pointer, section pointer, size, flags
7393     // (to add to the ones that come from the map type and modifier).
7394     //
7395     // double d;
7396     // int i[100];
7397     // float *p;
7398     //
7399     // struct S1 {
7400     //   int i;
7401     //   float f[50];
7402     // }
7403     // struct S2 {
7404     //   int i;
7405     //   float f[50];
7406     //   S1 s;
7407     //   double *p;
7408     //   struct S2 *ps;
7409     // }
7410     // S2 s;
7411     // S2 *ps;
7412     //
7413     // map(d)
7414     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7415     //
7416     // map(i)
7417     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7418     //
7419     // map(i[1:23])
7420     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7421     //
7422     // map(p)
7423     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7424     //
7425     // map(p[1:24])
7426     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7427     //
7428     // map(s)
7429     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7430     //
7431     // map(s.i)
7432     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7433     //
7434     // map(s.s.f)
7435     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7436     //
7437     // map(s.p)
7438     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7439     //
7440     // map(to: s.p[:22])
7441     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7442     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7443     // &(s.p), &(s.p[0]), 22*sizeof(double),
7444     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7445     // (*) alloc space for struct members, only this is a target parameter
7446     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7447     //      optimizes this entry out, same in the examples below)
7448     // (***) map the pointee (map: to)
7449     //
7450     // map(s.ps)
7451     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7452     //
7453     // map(from: s.ps->s.i)
7454     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7455     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7456     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7457     //
7458     // map(to: s.ps->ps)
7459     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7460     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7461     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7462     //
7463     // map(s.ps->ps->ps)
7464     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7465     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7466     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7467     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7468     //
7469     // map(to: s.ps->ps->s.f[:22])
7470     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7471     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7472     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7473     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7474     //
7475     // map(ps)
7476     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7477     //
7478     // map(ps->i)
7479     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7480     //
7481     // map(ps->s.f)
7482     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7483     //
7484     // map(from: ps->p)
7485     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7486     //
7487     // map(to: ps->p[:22])
7488     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7489     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7490     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7491     //
7492     // map(ps->ps)
7493     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7494     //
7495     // map(from: ps->ps->s.i)
7496     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7497     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7498     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7499     //
7500     // map(from: ps->ps->ps)
7501     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7502     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7503     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7504     //
7505     // map(ps->ps->ps->ps)
7506     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7507     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7508     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7509     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7510     //
7511     // map(to: ps->ps->ps->s.f[:22])
7512     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7513     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7514     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7515     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7516     //
7517     // map(to: s.f[:22]) map(from: s.p[:33])
7518     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7519     //     sizeof(double*) (**), TARGET_PARAM
7520     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7521     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7522     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7523     // (*) allocate contiguous space needed to fit all mapped members even if
7524     //     we allocate space for members not mapped (in this example,
7525     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7526     //     them as well because they fall between &s.f[0] and &s.p)
7527     //
7528     // map(from: s.f[:22]) map(to: ps->p[:33])
7529     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7530     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7531     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7532     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7533     // (*) the struct this entry pertains to is the 2nd element in the list of
7534     //     arguments, hence MEMBER_OF(2)
7535     //
7536     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7537     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7538     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7539     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7540     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7541     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7542     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7543     // (*) the struct this entry pertains to is the 4th element in the list
7544     //     of arguments, hence MEMBER_OF(4)
7545 
7546     // Track if the map information being generated is the first for a capture.
7547     bool IsCaptureFirstInfo = IsFirstComponentList;
7548     // When the variable is on a declare target link or in a to clause with
7549     // unified memory, a reference is needed to hold the host/device address
7550     // of the variable.
7551     bool RequiresReference = false;
7552 
7553     // Scan the components from the base to the complete expression.
7554     auto CI = Components.rbegin();
7555     auto CE = Components.rend();
7556     auto I = CI;
7557 
7558     // Track if the map information being generated is the first for a list of
7559     // components.
7560     bool IsExpressionFirstInfo = true;
7561     Address BP = Address::invalid();
7562     const Expr *AssocExpr = I->getAssociatedExpression();
7563     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7564     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7565 
7566     if (isa<MemberExpr>(AssocExpr)) {
7567       // The base is the 'this' pointer. The content of the pointer is going
7568       // to be the base of the field being mapped.
7569       BP = CGF.LoadCXXThisAddress();
7570     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7571                (OASE &&
7572                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7573       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7574     } else {
7575       // The base is the reference to the variable.
7576       // BP = &Var.
7577       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7578       if (const auto *VD =
7579               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7580         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7581                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7582           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7583               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7584                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7585             RequiresReference = true;
7586             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7587           }
7588         }
7589       }
7590 
7591       // If the variable is a pointer and is being dereferenced (i.e. is not
7592       // the last component), the base has to be the pointer itself, not its
7593       // reference. References are ignored for mapping purposes.
7594       QualType Ty =
7595           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7596       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7597         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7598 
7599         // We do not need to generate individual map information for the
7600         // pointer, it can be associated with the combined storage.
7601         ++I;
7602       }
7603     }
7604 
7605     // Track whether a component of the list should be marked as MEMBER_OF some
7606     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7607     // in a component list should be marked as MEMBER_OF, all subsequent entries
7608     // do not belong to the base struct. E.g.
7609     // struct S2 s;
7610     // s.ps->ps->ps->f[:]
7611     //   (1) (2) (3) (4)
7612     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7613     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7614     // is the pointee of ps(2) which is not member of struct s, so it should not
7615     // be marked as such (it is still PTR_AND_OBJ).
7616     // The variable is initialized to false so that PTR_AND_OBJ entries which
7617     // are not struct members are not considered (e.g. array of pointers to
7618     // data).
7619     bool ShouldBeMemberOf = false;
7620 
7621     // Variable keeping track of whether or not we have encountered a component
7622     // in the component list which is a member expression. Useful when we have a
7623     // pointer or a final array section, in which case it is the previous
7624     // component in the list which tells us whether we have a member expression.
7625     // E.g. X.f[:]
7626     // While processing the final array section "[:]" it is "f" which tells us
7627     // whether we are dealing with a member of a declared struct.
7628     const MemberExpr *EncounteredME = nullptr;
7629 
7630     for (; I != CE; ++I) {
7631       // If the current component is member of a struct (parent struct) mark it.
7632       if (!EncounteredME) {
7633         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7634         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7635         // as MEMBER_OF the parent struct.
7636         if (EncounteredME)
7637           ShouldBeMemberOf = true;
7638       }
7639 
7640       auto Next = std::next(I);
7641 
7642       // We need to generate the addresses and sizes if this is the last
7643       // component, if the component is a pointer or if it is an array section
7644       // whose length can't be proved to be one. If this is a pointer, it
7645       // becomes the base address for the following components.
7646 
7647       // A final array section, is one whose length can't be proved to be one.
7648       bool IsFinalArraySection =
7649           isFinalArraySectionExpression(I->getAssociatedExpression());
7650 
7651       // Get information on whether the element is a pointer. Have to do a
7652       // special treatment for array sections given that they are built-in
7653       // types.
7654       const auto *OASE =
7655           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7656       bool IsPointer =
7657           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7658                        .getCanonicalType()
7659                        ->isAnyPointerType()) ||
7660           I->getAssociatedExpression()->getType()->isAnyPointerType();
7661 
7662       if (Next == CE || IsPointer || IsFinalArraySection) {
7663         // If this is not the last component, we expect the pointer to be
7664         // associated with an array expression or member expression.
7665         assert((Next == CE ||
7666                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7667                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7668                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7669                "Unexpected expression");
7670 
7671         Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7672                          .getAddress(CGF);
7673 
7674         // If this component is a pointer inside the base struct then we don't
7675         // need to create any entry for it - it will be combined with the object
7676         // it is pointing to into a single PTR_AND_OBJ entry.
7677         bool IsMemberPointer =
7678             IsPointer && EncounteredME &&
7679             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7680              EncounteredME);
7681         if (!OverlappedElements.empty()) {
7682           // Handle base element with the info for overlapped elements.
7683           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7684           assert(Next == CE &&
7685                  "Expected last element for the overlapped elements.");
7686           assert(!IsPointer &&
7687                  "Unexpected base element with the pointer type.");
7688           // Mark the whole struct as the struct that requires allocation on the
7689           // device.
7690           PartialStruct.LowestElem = {0, LB};
7691           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7692               I->getAssociatedExpression()->getType());
7693           Address HB = CGF.Builder.CreateConstGEP(
7694               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7695                                                               CGF.VoidPtrTy),
7696               TypeSize.getQuantity() - 1);
7697           PartialStruct.HighestElem = {
7698               std::numeric_limits<decltype(
7699                   PartialStruct.HighestElem.first)>::max(),
7700               HB};
7701           PartialStruct.Base = BP;
7702           // Emit data for non-overlapped data.
7703           OpenMPOffloadMappingFlags Flags =
7704               OMP_MAP_MEMBER_OF |
7705               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7706                              /*AddPtrFlag=*/false,
7707                              /*AddIsTargetParamFlag=*/false);
7708           LB = BP;
7709           llvm::Value *Size = nullptr;
7710           // Do bitcopy of all non-overlapped structure elements.
7711           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7712                    Component : OverlappedElements) {
7713             Address ComponentLB = Address::invalid();
7714             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7715                  Component) {
7716               if (MC.getAssociatedDeclaration()) {
7717                 ComponentLB =
7718                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7719                         .getAddress(CGF);
7720                 Size = CGF.Builder.CreatePtrDiff(
7721                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7722                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7723                 break;
7724               }
7725             }
7726             BasePointers.push_back(BP.getPointer());
7727             Pointers.push_back(LB.getPointer());
7728             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7729                                                       /*isSigned=*/true));
7730             Types.push_back(Flags);
7731             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7732           }
7733           BasePointers.push_back(BP.getPointer());
7734           Pointers.push_back(LB.getPointer());
7735           Size = CGF.Builder.CreatePtrDiff(
7736               CGF.EmitCastToVoidPtr(
7737                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7738               CGF.EmitCastToVoidPtr(LB.getPointer()));
7739           Sizes.push_back(
7740               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7741           Types.push_back(Flags);
7742           break;
7743         }
7744         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7745         if (!IsMemberPointer) {
7746           BasePointers.push_back(BP.getPointer());
7747           Pointers.push_back(LB.getPointer());
7748           Sizes.push_back(
7749               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7750 
7751           // We need to add a pointer flag for each map that comes from the
7752           // same expression except for the first one. We also need to signal
7753           // this map is the first one that relates with the current capture
7754           // (there is a set of entries for each capture).
7755           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7756               MapType, MapModifiers, IsImplicit,
7757               !IsExpressionFirstInfo || RequiresReference,
7758               IsCaptureFirstInfo && !RequiresReference);
7759 
7760           if (!IsExpressionFirstInfo) {
7761             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7762             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7763             if (IsPointer)
7764               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7765                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7766 
7767             if (ShouldBeMemberOf) {
7768               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7769               // should be later updated with the correct value of MEMBER_OF.
7770               Flags |= OMP_MAP_MEMBER_OF;
7771               // From now on, all subsequent PTR_AND_OBJ entries should not be
7772               // marked as MEMBER_OF.
7773               ShouldBeMemberOf = false;
7774             }
7775           }
7776 
7777           Types.push_back(Flags);
7778         }
7779 
7780         // If we have encountered a member expression so far, keep track of the
7781         // mapped member. If the parent is "*this", then the value declaration
7782         // is nullptr.
7783         if (EncounteredME) {
7784           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7785           unsigned FieldIndex = FD->getFieldIndex();
7786 
7787           // Update info about the lowest and highest elements for this struct
7788           if (!PartialStruct.Base.isValid()) {
7789             PartialStruct.LowestElem = {FieldIndex, LB};
7790             PartialStruct.HighestElem = {FieldIndex, LB};
7791             PartialStruct.Base = BP;
7792           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7793             PartialStruct.LowestElem = {FieldIndex, LB};
7794           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7795             PartialStruct.HighestElem = {FieldIndex, LB};
7796           }
7797         }
7798 
7799         // If we have a final array section, we are done with this expression.
7800         if (IsFinalArraySection)
7801           break;
7802 
7803         // The pointer becomes the base for the next element.
7804         if (Next != CE)
7805           BP = LB;
7806 
7807         IsExpressionFirstInfo = false;
7808         IsCaptureFirstInfo = false;
7809       }
7810     }
7811   }
7812 
7813   /// Return the adjusted map modifiers if the declaration a capture refers to
7814   /// appears in a first-private clause. This is expected to be used only with
7815   /// directives that start with 'target'.
7816   MappableExprsHandler::OpenMPOffloadMappingFlags
7817   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7818     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7819 
7820     // A first private variable captured by reference will use only the
7821     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7822     // declaration is known as first-private in this handler.
7823     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7824       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7825           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7826         return MappableExprsHandler::OMP_MAP_ALWAYS |
7827                MappableExprsHandler::OMP_MAP_TO;
7828       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7829         return MappableExprsHandler::OMP_MAP_TO |
7830                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7831       return MappableExprsHandler::OMP_MAP_PRIVATE |
7832              MappableExprsHandler::OMP_MAP_TO;
7833     }
7834     return MappableExprsHandler::OMP_MAP_TO |
7835            MappableExprsHandler::OMP_MAP_FROM;
7836   }
7837 
7838   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7839     // Rotate by getFlagMemberOffset() bits.
7840     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7841                                                   << getFlagMemberOffset());
7842   }
7843 
7844   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7845                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7846     // If the entry is PTR_AND_OBJ but has not been marked with the special
7847     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7848     // marked as MEMBER_OF.
7849     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7850         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7851       return;
7852 
7853     // Reset the placeholder value to prepare the flag for the assignment of the
7854     // proper MEMBER_OF value.
7855     Flags &= ~OMP_MAP_MEMBER_OF;
7856     Flags |= MemberOfFlag;
7857   }
7858 
7859   void getPlainLayout(const CXXRecordDecl *RD,
7860                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7861                       bool AsBase) const {
7862     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7863 
7864     llvm::StructType *St =
7865         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7866 
7867     unsigned NumElements = St->getNumElements();
7868     llvm::SmallVector<
7869         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7870         RecordLayout(NumElements);
7871 
7872     // Fill bases.
7873     for (const auto &I : RD->bases()) {
7874       if (I.isVirtual())
7875         continue;
7876       const auto *Base = I.getType()->getAsCXXRecordDecl();
7877       // Ignore empty bases.
7878       if (Base->isEmpty() || CGF.getContext()
7879                                  .getASTRecordLayout(Base)
7880                                  .getNonVirtualSize()
7881                                  .isZero())
7882         continue;
7883 
7884       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7885       RecordLayout[FieldIndex] = Base;
7886     }
7887     // Fill in virtual bases.
7888     for (const auto &I : RD->vbases()) {
7889       const auto *Base = I.getType()->getAsCXXRecordDecl();
7890       // Ignore empty bases.
7891       if (Base->isEmpty())
7892         continue;
7893       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7894       if (RecordLayout[FieldIndex])
7895         continue;
7896       RecordLayout[FieldIndex] = Base;
7897     }
7898     // Fill in all the fields.
7899     assert(!RD->isUnion() && "Unexpected union.");
7900     for (const auto *Field : RD->fields()) {
7901       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7902       // will fill in later.)
7903       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7904         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7905         RecordLayout[FieldIndex] = Field;
7906       }
7907     }
7908     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7909              &Data : RecordLayout) {
7910       if (Data.isNull())
7911         continue;
7912       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7913         getPlainLayout(Base, Layout, /*AsBase=*/true);
7914       else
7915         Layout.push_back(Data.get<const FieldDecl *>());
7916     }
7917   }
7918 
7919 public:
7920   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7921       : CurDir(&Dir), CGF(CGF) {
7922     // Extract firstprivate clause information.
7923     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7924       for (const auto *D : C->varlists())
7925         FirstPrivateDecls.try_emplace(
7926             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7927     // Extract device pointer clause information.
7928     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7929       for (auto L : C->component_lists())
7930         DevPointersMap[L.first].push_back(L.second);
7931   }
7932 
7933   /// Constructor for the declare mapper directive.
7934   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7935       : CurDir(&Dir), CGF(CGF) {}
7936 
7937   /// Generate code for the combined entry if we have a partially mapped struct
7938   /// and take care of the mapping flags of the arguments corresponding to
7939   /// individual struct members.
7940   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7941                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7942                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7943                          const StructRangeInfoTy &PartialStruct) const {
7944     // Base is the base of the struct
7945     BasePointers.push_back(PartialStruct.Base.getPointer());
7946     // Pointer is the address of the lowest element
7947     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7948     Pointers.push_back(LB);
7949     // Size is (addr of {highest+1} element) - (addr of lowest element)
7950     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7951     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7952     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7953     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7954     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7955     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7956                                                   /*isSigned=*/false);
7957     Sizes.push_back(Size);
7958     // Map type is always TARGET_PARAM
7959     Types.push_back(OMP_MAP_TARGET_PARAM);
7960     // Remove TARGET_PARAM flag from the first element
7961     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7962 
7963     // All other current entries will be MEMBER_OF the combined entry
7964     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7965     // 0xFFFF in the MEMBER_OF field).
7966     OpenMPOffloadMappingFlags MemberOfFlag =
7967         getMemberOfFlag(BasePointers.size() - 1);
7968     for (auto &M : CurTypes)
7969       setCorrectMemberOfFlag(M, MemberOfFlag);
7970   }
7971 
7972   /// Generate all the base pointers, section pointers, sizes and map
7973   /// types for the extracted mappable expressions. Also, for each item that
7974   /// relates with a device pointer, a pair of the relevant declaration and
7975   /// index where it occurs is appended to the device pointers info array.
7976   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7977                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7978                        MapFlagsArrayTy &Types) const {
7979     // We have to process the component lists that relate with the same
7980     // declaration in a single chunk so that we can generate the map flags
7981     // correctly. Therefore, we organize all lists in a map.
7982     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7983 
7984     // Helper function to fill the information map for the different supported
7985     // clauses.
7986     auto &&InfoGen = [&Info](
7987         const ValueDecl *D,
7988         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7989         OpenMPMapClauseKind MapType,
7990         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7991         bool ReturnDevicePointer, bool IsImplicit) {
7992       const ValueDecl *VD =
7993           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7994       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7995                             IsImplicit);
7996     };
7997 
7998     assert(CurDir.is<const OMPExecutableDirective *>() &&
7999            "Expect a executable directive");
8000     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8001     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
8002       for (const auto L : C->component_lists()) {
8003         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
8004             /*ReturnDevicePointer=*/false, C->isImplicit());
8005       }
8006     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
8007       for (const auto L : C->component_lists()) {
8008         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
8009             /*ReturnDevicePointer=*/false, C->isImplicit());
8010       }
8011     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
8012       for (const auto L : C->component_lists()) {
8013         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
8014             /*ReturnDevicePointer=*/false, C->isImplicit());
8015       }
8016 
8017     // Look at the use_device_ptr clause information and mark the existing map
8018     // entries as such. If there is no map information for an entry in the
8019     // use_device_ptr list, we create one with map type 'alloc' and zero size
8020     // section. It is the user fault if that was not mapped before. If there is
8021     // no map information and the pointer is a struct member, then we defer the
8022     // emission of that entry until the whole struct has been processed.
8023     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
8024         DeferredInfo;
8025 
8026     for (const auto *C :
8027          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
8028       for (const auto L : C->component_lists()) {
8029         assert(!L.second.empty() && "Not expecting empty list of components!");
8030         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8031         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8032         const Expr *IE = L.second.back().getAssociatedExpression();
8033         // If the first component is a member expression, we have to look into
8034         // 'this', which maps to null in the map of map information. Otherwise
8035         // look directly for the information.
8036         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8037 
8038         // We potentially have map information for this declaration already.
8039         // Look for the first set of components that refer to it.
8040         if (It != Info.end()) {
8041           auto CI = std::find_if(
8042               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8043                 return MI.Components.back().getAssociatedDeclaration() == VD;
8044               });
8045           // If we found a map entry, signal that the pointer has to be returned
8046           // and move on to the next declaration.
8047           if (CI != It->second.end()) {
8048             CI->ReturnDevicePointer = true;
8049             continue;
8050           }
8051         }
8052 
8053         // We didn't find any match in our map information - generate a zero
8054         // size array section - if the pointer is a struct member we defer this
8055         // action until the whole struct has been processed.
8056         if (isa<MemberExpr>(IE)) {
8057           // Insert the pointer into Info to be processed by
8058           // generateInfoForComponentList. Because it is a member pointer
8059           // without a pointee, no entry will be generated for it, therefore
8060           // we need to generate one after the whole struct has been processed.
8061           // Nonetheless, generateInfoForComponentList must be called to take
8062           // the pointer into account for the calculation of the range of the
8063           // partial struct.
8064           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8065                   /*ReturnDevicePointer=*/false, C->isImplicit());
8066           DeferredInfo[nullptr].emplace_back(IE, VD);
8067         } else {
8068           llvm::Value *Ptr =
8069               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8070           BasePointers.emplace_back(Ptr, VD);
8071           Pointers.push_back(Ptr);
8072           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8073           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8074         }
8075       }
8076     }
8077 
8078     for (const auto &M : Info) {
8079       // We need to know when we generate information for the first component
8080       // associated with a capture, because the mapping flags depend on it.
8081       bool IsFirstComponentList = true;
8082 
8083       // Temporary versions of arrays
8084       MapBaseValuesArrayTy CurBasePointers;
8085       MapValuesArrayTy CurPointers;
8086       MapValuesArrayTy CurSizes;
8087       MapFlagsArrayTy CurTypes;
8088       StructRangeInfoTy PartialStruct;
8089 
8090       for (const MapInfo &L : M.second) {
8091         assert(!L.Components.empty() &&
8092                "Not expecting declaration with no component lists.");
8093 
8094         // Remember the current base pointer index.
8095         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8096         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8097                                      CurBasePointers, CurPointers, CurSizes,
8098                                      CurTypes, PartialStruct,
8099                                      IsFirstComponentList, L.IsImplicit);
8100 
8101         // If this entry relates with a device pointer, set the relevant
8102         // declaration and add the 'return pointer' flag.
8103         if (L.ReturnDevicePointer) {
8104           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8105                  "Unexpected number of mapped base pointers.");
8106 
8107           const ValueDecl *RelevantVD =
8108               L.Components.back().getAssociatedDeclaration();
8109           assert(RelevantVD &&
8110                  "No relevant declaration related with device pointer??");
8111 
8112           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8113           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8114         }
8115         IsFirstComponentList = false;
8116       }
8117 
8118       // Append any pending zero-length pointers which are struct members and
8119       // used with use_device_ptr.
8120       auto CI = DeferredInfo.find(M.first);
8121       if (CI != DeferredInfo.end()) {
8122         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8123           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8124           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8125               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8126           CurBasePointers.emplace_back(BasePtr, L.VD);
8127           CurPointers.push_back(Ptr);
8128           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8129           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8130           // value MEMBER_OF=FFFF so that the entry is later updated with the
8131           // correct value of MEMBER_OF.
8132           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8133                              OMP_MAP_MEMBER_OF);
8134         }
8135       }
8136 
8137       // If there is an entry in PartialStruct it means we have a struct with
8138       // individual members mapped. Emit an extra combined entry.
8139       if (PartialStruct.Base.isValid())
8140         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8141                           PartialStruct);
8142 
8143       // We need to append the results of this capture to what we already have.
8144       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8145       Pointers.append(CurPointers.begin(), CurPointers.end());
8146       Sizes.append(CurSizes.begin(), CurSizes.end());
8147       Types.append(CurTypes.begin(), CurTypes.end());
8148     }
8149   }
8150 
8151   /// Generate all the base pointers, section pointers, sizes and map types for
8152   /// the extracted map clauses of user-defined mapper.
8153   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8154                                 MapValuesArrayTy &Pointers,
8155                                 MapValuesArrayTy &Sizes,
8156                                 MapFlagsArrayTy &Types) const {
8157     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8158            "Expect a declare mapper directive");
8159     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8160     // We have to process the component lists that relate with the same
8161     // declaration in a single chunk so that we can generate the map flags
8162     // correctly. Therefore, we organize all lists in a map.
8163     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8164 
8165     // Helper function to fill the information map for the different supported
8166     // clauses.
8167     auto &&InfoGen = [&Info](
8168         const ValueDecl *D,
8169         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8170         OpenMPMapClauseKind MapType,
8171         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8172         bool ReturnDevicePointer, bool IsImplicit) {
8173       const ValueDecl *VD =
8174           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8175       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8176                             IsImplicit);
8177     };
8178 
8179     for (const auto *C : CurMapperDir->clauselists()) {
8180       const auto *MC = cast<OMPMapClause>(C);
8181       for (const auto L : MC->component_lists()) {
8182         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8183                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8184       }
8185     }
8186 
8187     for (const auto &M : Info) {
8188       // We need to know when we generate information for the first component
8189       // associated with a capture, because the mapping flags depend on it.
8190       bool IsFirstComponentList = true;
8191 
8192       // Temporary versions of arrays
8193       MapBaseValuesArrayTy CurBasePointers;
8194       MapValuesArrayTy CurPointers;
8195       MapValuesArrayTy CurSizes;
8196       MapFlagsArrayTy CurTypes;
8197       StructRangeInfoTy PartialStruct;
8198 
8199       for (const MapInfo &L : M.second) {
8200         assert(!L.Components.empty() &&
8201                "Not expecting declaration with no component lists.");
8202         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8203                                      CurBasePointers, CurPointers, CurSizes,
8204                                      CurTypes, PartialStruct,
8205                                      IsFirstComponentList, L.IsImplicit);
8206         IsFirstComponentList = false;
8207       }
8208 
8209       // If there is an entry in PartialStruct it means we have a struct with
8210       // individual members mapped. Emit an extra combined entry.
8211       if (PartialStruct.Base.isValid())
8212         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8213                           PartialStruct);
8214 
8215       // We need to append the results of this capture to what we already have.
8216       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8217       Pointers.append(CurPointers.begin(), CurPointers.end());
8218       Sizes.append(CurSizes.begin(), CurSizes.end());
8219       Types.append(CurTypes.begin(), CurTypes.end());
8220     }
8221   }
8222 
8223   /// Emit capture info for lambdas for variables captured by reference.
8224   void generateInfoForLambdaCaptures(
8225       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8226       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8227       MapFlagsArrayTy &Types,
8228       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8229     const auto *RD = VD->getType()
8230                          .getCanonicalType()
8231                          .getNonReferenceType()
8232                          ->getAsCXXRecordDecl();
8233     if (!RD || !RD->isLambda())
8234       return;
8235     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8236     LValue VDLVal = CGF.MakeAddrLValue(
8237         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8238     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8239     FieldDecl *ThisCapture = nullptr;
8240     RD->getCaptureFields(Captures, ThisCapture);
8241     if (ThisCapture) {
8242       LValue ThisLVal =
8243           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8244       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8245       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8246                                  VDLVal.getPointer(CGF));
8247       BasePointers.push_back(ThisLVal.getPointer(CGF));
8248       Pointers.push_back(ThisLValVal.getPointer(CGF));
8249       Sizes.push_back(
8250           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8251                                     CGF.Int64Ty, /*isSigned=*/true));
8252       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8253                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8254     }
8255     for (const LambdaCapture &LC : RD->captures()) {
8256       if (!LC.capturesVariable())
8257         continue;
8258       const VarDecl *VD = LC.getCapturedVar();
8259       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8260         continue;
8261       auto It = Captures.find(VD);
8262       assert(It != Captures.end() && "Found lambda capture without field.");
8263       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8264       if (LC.getCaptureKind() == LCK_ByRef) {
8265         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8266         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8267                                    VDLVal.getPointer(CGF));
8268         BasePointers.push_back(VarLVal.getPointer(CGF));
8269         Pointers.push_back(VarLValVal.getPointer(CGF));
8270         Sizes.push_back(CGF.Builder.CreateIntCast(
8271             CGF.getTypeSize(
8272                 VD->getType().getCanonicalType().getNonReferenceType()),
8273             CGF.Int64Ty, /*isSigned=*/true));
8274       } else {
8275         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8276         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8277                                    VDLVal.getPointer(CGF));
8278         BasePointers.push_back(VarLVal.getPointer(CGF));
8279         Pointers.push_back(VarRVal.getScalarVal());
8280         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8281       }
8282       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8283                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8284     }
8285   }
8286 
8287   /// Set correct indices for lambdas captures.
8288   void adjustMemberOfForLambdaCaptures(
8289       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8290       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8291       MapFlagsArrayTy &Types) const {
8292     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8293       // Set correct member_of idx for all implicit lambda captures.
8294       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8295                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8296         continue;
8297       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8298       assert(BasePtr && "Unable to find base lambda address.");
8299       int TgtIdx = -1;
8300       for (unsigned J = I; J > 0; --J) {
8301         unsigned Idx = J - 1;
8302         if (Pointers[Idx] != BasePtr)
8303           continue;
8304         TgtIdx = Idx;
8305         break;
8306       }
8307       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8308       // All other current entries will be MEMBER_OF the combined entry
8309       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8310       // 0xFFFF in the MEMBER_OF field).
8311       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8312       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8313     }
8314   }
8315 
8316   /// Generate the base pointers, section pointers, sizes and map types
8317   /// associated to a given capture.
8318   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8319                               llvm::Value *Arg,
8320                               MapBaseValuesArrayTy &BasePointers,
8321                               MapValuesArrayTy &Pointers,
8322                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8323                               StructRangeInfoTy &PartialStruct) const {
8324     assert(!Cap->capturesVariableArrayType() &&
8325            "Not expecting to generate map info for a variable array type!");
8326 
8327     // We need to know when we generating information for the first component
8328     const ValueDecl *VD = Cap->capturesThis()
8329                               ? nullptr
8330                               : Cap->getCapturedVar()->getCanonicalDecl();
8331 
8332     // If this declaration appears in a is_device_ptr clause we just have to
8333     // pass the pointer by value. If it is a reference to a declaration, we just
8334     // pass its value.
8335     if (DevPointersMap.count(VD)) {
8336       BasePointers.emplace_back(Arg, VD);
8337       Pointers.push_back(Arg);
8338       Sizes.push_back(
8339           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8340                                     CGF.Int64Ty, /*isSigned=*/true));
8341       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8342       return;
8343     }
8344 
8345     using MapData =
8346         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8347                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8348     SmallVector<MapData, 4> DeclComponentLists;
8349     assert(CurDir.is<const OMPExecutableDirective *>() &&
8350            "Expect a executable directive");
8351     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8352     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8353       for (const auto L : C->decl_component_lists(VD)) {
8354         assert(L.first == VD &&
8355                "We got information for the wrong declaration??");
8356         assert(!L.second.empty() &&
8357                "Not expecting declaration with no component lists.");
8358         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8359                                         C->getMapTypeModifiers(),
8360                                         C->isImplicit());
8361       }
8362     }
8363 
8364     // Find overlapping elements (including the offset from the base element).
8365     llvm::SmallDenseMap<
8366         const MapData *,
8367         llvm::SmallVector<
8368             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8369         4>
8370         OverlappedData;
8371     size_t Count = 0;
8372     for (const MapData &L : DeclComponentLists) {
8373       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8374       OpenMPMapClauseKind MapType;
8375       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8376       bool IsImplicit;
8377       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8378       ++Count;
8379       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8380         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8381         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8382         auto CI = Components.rbegin();
8383         auto CE = Components.rend();
8384         auto SI = Components1.rbegin();
8385         auto SE = Components1.rend();
8386         for (; CI != CE && SI != SE; ++CI, ++SI) {
8387           if (CI->getAssociatedExpression()->getStmtClass() !=
8388               SI->getAssociatedExpression()->getStmtClass())
8389             break;
8390           // Are we dealing with different variables/fields?
8391           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8392             break;
8393         }
8394         // Found overlapping if, at least for one component, reached the head of
8395         // the components list.
8396         if (CI == CE || SI == SE) {
8397           assert((CI != CE || SI != SE) &&
8398                  "Unexpected full match of the mapping components.");
8399           const MapData &BaseData = CI == CE ? L : L1;
8400           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8401               SI == SE ? Components : Components1;
8402           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8403           OverlappedElements.getSecond().push_back(SubData);
8404         }
8405       }
8406     }
8407     // Sort the overlapped elements for each item.
8408     llvm::SmallVector<const FieldDecl *, 4> Layout;
8409     if (!OverlappedData.empty()) {
8410       if (const auto *CRD =
8411               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8412         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8413       else {
8414         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8415         Layout.append(RD->field_begin(), RD->field_end());
8416       }
8417     }
8418     for (auto &Pair : OverlappedData) {
8419       llvm::sort(
8420           Pair.getSecond(),
8421           [&Layout](
8422               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8423               OMPClauseMappableExprCommon::MappableExprComponentListRef
8424                   Second) {
8425             auto CI = First.rbegin();
8426             auto CE = First.rend();
8427             auto SI = Second.rbegin();
8428             auto SE = Second.rend();
8429             for (; CI != CE && SI != SE; ++CI, ++SI) {
8430               if (CI->getAssociatedExpression()->getStmtClass() !=
8431                   SI->getAssociatedExpression()->getStmtClass())
8432                 break;
8433               // Are we dealing with different variables/fields?
8434               if (CI->getAssociatedDeclaration() !=
8435                   SI->getAssociatedDeclaration())
8436                 break;
8437             }
8438 
8439             // Lists contain the same elements.
8440             if (CI == CE && SI == SE)
8441               return false;
8442 
8443             // List with less elements is less than list with more elements.
8444             if (CI == CE || SI == SE)
8445               return CI == CE;
8446 
8447             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8448             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8449             if (FD1->getParent() == FD2->getParent())
8450               return FD1->getFieldIndex() < FD2->getFieldIndex();
8451             const auto It =
8452                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8453                   return FD == FD1 || FD == FD2;
8454                 });
8455             return *It == FD1;
8456           });
8457     }
8458 
8459     // Associated with a capture, because the mapping flags depend on it.
8460     // Go through all of the elements with the overlapped elements.
8461     for (const auto &Pair : OverlappedData) {
8462       const MapData &L = *Pair.getFirst();
8463       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8464       OpenMPMapClauseKind MapType;
8465       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8466       bool IsImplicit;
8467       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8468       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8469           OverlappedComponents = Pair.getSecond();
8470       bool IsFirstComponentList = true;
8471       generateInfoForComponentList(MapType, MapModifiers, Components,
8472                                    BasePointers, Pointers, Sizes, Types,
8473                                    PartialStruct, IsFirstComponentList,
8474                                    IsImplicit, OverlappedComponents);
8475     }
8476     // Go through other elements without overlapped elements.
8477     bool IsFirstComponentList = OverlappedData.empty();
8478     for (const MapData &L : DeclComponentLists) {
8479       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8480       OpenMPMapClauseKind MapType;
8481       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8482       bool IsImplicit;
8483       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8484       auto It = OverlappedData.find(&L);
8485       if (It == OverlappedData.end())
8486         generateInfoForComponentList(MapType, MapModifiers, Components,
8487                                      BasePointers, Pointers, Sizes, Types,
8488                                      PartialStruct, IsFirstComponentList,
8489                                      IsImplicit);
8490       IsFirstComponentList = false;
8491     }
8492   }
8493 
8494   /// Generate the base pointers, section pointers, sizes and map types
8495   /// associated with the declare target link variables.
8496   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8497                                         MapValuesArrayTy &Pointers,
8498                                         MapValuesArrayTy &Sizes,
8499                                         MapFlagsArrayTy &Types) const {
8500     assert(CurDir.is<const OMPExecutableDirective *>() &&
8501            "Expect a executable directive");
8502     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8503     // Map other list items in the map clause which are not captured variables
8504     // but "declare target link" global variables.
8505     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8506       for (const auto L : C->component_lists()) {
8507         if (!L.first)
8508           continue;
8509         const auto *VD = dyn_cast<VarDecl>(L.first);
8510         if (!VD)
8511           continue;
8512         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8513             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8514         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8515             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8516           continue;
8517         StructRangeInfoTy PartialStruct;
8518         generateInfoForComponentList(
8519             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8520             Pointers, Sizes, Types, PartialStruct,
8521             /*IsFirstComponentList=*/true, C->isImplicit());
8522         assert(!PartialStruct.Base.isValid() &&
8523                "No partial structs for declare target link expected.");
8524       }
8525     }
8526   }
8527 
8528   /// Generate the default map information for a given capture \a CI,
8529   /// record field declaration \a RI and captured value \a CV.
8530   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8531                               const FieldDecl &RI, llvm::Value *CV,
8532                               MapBaseValuesArrayTy &CurBasePointers,
8533                               MapValuesArrayTy &CurPointers,
8534                               MapValuesArrayTy &CurSizes,
8535                               MapFlagsArrayTy &CurMapTypes) const {
8536     bool IsImplicit = true;
8537     // Do the default mapping.
8538     if (CI.capturesThis()) {
8539       CurBasePointers.push_back(CV);
8540       CurPointers.push_back(CV);
8541       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8542       CurSizes.push_back(
8543           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8544                                     CGF.Int64Ty, /*isSigned=*/true));
8545       // Default map type.
8546       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8547     } else if (CI.capturesVariableByCopy()) {
8548       CurBasePointers.push_back(CV);
8549       CurPointers.push_back(CV);
8550       if (!RI.getType()->isAnyPointerType()) {
8551         // We have to signal to the runtime captures passed by value that are
8552         // not pointers.
8553         CurMapTypes.push_back(OMP_MAP_LITERAL);
8554         CurSizes.push_back(CGF.Builder.CreateIntCast(
8555             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8556       } else {
8557         // Pointers are implicitly mapped with a zero size and no flags
8558         // (other than first map that is added for all implicit maps).
8559         CurMapTypes.push_back(OMP_MAP_NONE);
8560         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8561       }
8562       const VarDecl *VD = CI.getCapturedVar();
8563       auto I = FirstPrivateDecls.find(VD);
8564       if (I != FirstPrivateDecls.end())
8565         IsImplicit = I->getSecond();
8566     } else {
8567       assert(CI.capturesVariable() && "Expected captured reference.");
8568       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8569       QualType ElementType = PtrTy->getPointeeType();
8570       CurSizes.push_back(CGF.Builder.CreateIntCast(
8571           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8572       // The default map type for a scalar/complex type is 'to' because by
8573       // default the value doesn't have to be retrieved. For an aggregate
8574       // type, the default is 'tofrom'.
8575       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8576       const VarDecl *VD = CI.getCapturedVar();
8577       auto I = FirstPrivateDecls.find(VD);
8578       if (I != FirstPrivateDecls.end() &&
8579           VD->getType().isConstant(CGF.getContext())) {
8580         llvm::Constant *Addr =
8581             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8582         // Copy the value of the original variable to the new global copy.
8583         CGF.Builder.CreateMemCpy(
8584             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8585             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8586             CurSizes.back(), /*IsVolatile=*/false);
8587         // Use new global variable as the base pointers.
8588         CurBasePointers.push_back(Addr);
8589         CurPointers.push_back(Addr);
8590       } else {
8591         CurBasePointers.push_back(CV);
8592         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8593           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8594               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8595               AlignmentSource::Decl));
8596           CurPointers.push_back(PtrAddr.getPointer());
8597         } else {
8598           CurPointers.push_back(CV);
8599         }
8600       }
8601       if (I != FirstPrivateDecls.end())
8602         IsImplicit = I->getSecond();
8603     }
8604     // Every default map produces a single argument which is a target parameter.
8605     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8606 
8607     // Add flag stating this is an implicit map.
8608     if (IsImplicit)
8609       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8610   }
8611 };
8612 } // anonymous namespace
8613 
8614 /// Emit the arrays used to pass the captures and map information to the
8615 /// offloading runtime library. If there is no map or capture information,
8616 /// return nullptr by reference.
8617 static void
8618 emitOffloadingArrays(CodeGenFunction &CGF,
8619                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8620                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8621                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8622                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8623                      CGOpenMPRuntime::TargetDataInfo &Info) {
8624   CodeGenModule &CGM = CGF.CGM;
8625   ASTContext &Ctx = CGF.getContext();
8626 
8627   // Reset the array information.
8628   Info.clearArrayInfo();
8629   Info.NumberOfPtrs = BasePointers.size();
8630 
8631   if (Info.NumberOfPtrs) {
8632     // Detect if we have any capture size requiring runtime evaluation of the
8633     // size so that a constant array could be eventually used.
8634     bool hasRuntimeEvaluationCaptureSize = false;
8635     for (llvm::Value *S : Sizes)
8636       if (!isa<llvm::Constant>(S)) {
8637         hasRuntimeEvaluationCaptureSize = true;
8638         break;
8639       }
8640 
8641     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8642     QualType PointerArrayType = Ctx.getConstantArrayType(
8643         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8644         /*IndexTypeQuals=*/0);
8645 
8646     Info.BasePointersArray =
8647         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8648     Info.PointersArray =
8649         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8650 
8651     // If we don't have any VLA types or other types that require runtime
8652     // evaluation, we can use a constant array for the map sizes, otherwise we
8653     // need to fill up the arrays as we do for the pointers.
8654     QualType Int64Ty =
8655         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8656     if (hasRuntimeEvaluationCaptureSize) {
8657       QualType SizeArrayType = Ctx.getConstantArrayType(
8658           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8659           /*IndexTypeQuals=*/0);
8660       Info.SizesArray =
8661           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8662     } else {
8663       // We expect all the sizes to be constant, so we collect them to create
8664       // a constant array.
8665       SmallVector<llvm::Constant *, 16> ConstSizes;
8666       for (llvm::Value *S : Sizes)
8667         ConstSizes.push_back(cast<llvm::Constant>(S));
8668 
8669       auto *SizesArrayInit = llvm::ConstantArray::get(
8670           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8671       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8672       auto *SizesArrayGbl = new llvm::GlobalVariable(
8673           CGM.getModule(), SizesArrayInit->getType(),
8674           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8675           SizesArrayInit, Name);
8676       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8677       Info.SizesArray = SizesArrayGbl;
8678     }
8679 
8680     // The map types are always constant so we don't need to generate code to
8681     // fill arrays. Instead, we create an array constant.
8682     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8683     llvm::copy(MapTypes, Mapping.begin());
8684     llvm::Constant *MapTypesArrayInit =
8685         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8686     std::string MaptypesName =
8687         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8688     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8689         CGM.getModule(), MapTypesArrayInit->getType(),
8690         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8691         MapTypesArrayInit, MaptypesName);
8692     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8693     Info.MapTypesArray = MapTypesArrayGbl;
8694 
8695     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8696       llvm::Value *BPVal = *BasePointers[I];
8697       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8698           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8699           Info.BasePointersArray, 0, I);
8700       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8701           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8702       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8703       CGF.Builder.CreateStore(BPVal, BPAddr);
8704 
8705       if (Info.requiresDevicePointerInfo())
8706         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8707           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8708 
8709       llvm::Value *PVal = Pointers[I];
8710       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8711           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8712           Info.PointersArray, 0, I);
8713       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8714           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8715       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8716       CGF.Builder.CreateStore(PVal, PAddr);
8717 
8718       if (hasRuntimeEvaluationCaptureSize) {
8719         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8720             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8721             Info.SizesArray,
8722             /*Idx0=*/0,
8723             /*Idx1=*/I);
8724         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8725         CGF.Builder.CreateStore(
8726             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8727             SAddr);
8728       }
8729     }
8730   }
8731 }
8732 
8733 /// Emit the arguments to be passed to the runtime library based on the
8734 /// arrays of pointers, sizes and map types.
8735 static void emitOffloadingArraysArgument(
8736     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8737     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8738     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8739   CodeGenModule &CGM = CGF.CGM;
8740   if (Info.NumberOfPtrs) {
8741     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8742         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8743         Info.BasePointersArray,
8744         /*Idx0=*/0, /*Idx1=*/0);
8745     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8746         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8747         Info.PointersArray,
8748         /*Idx0=*/0,
8749         /*Idx1=*/0);
8750     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8751         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8752         /*Idx0=*/0, /*Idx1=*/0);
8753     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8754         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8755         Info.MapTypesArray,
8756         /*Idx0=*/0,
8757         /*Idx1=*/0);
8758   } else {
8759     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8760     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8761     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8762     MapTypesArrayArg =
8763         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8764   }
8765 }
8766 
8767 /// Check for inner distribute directive.
8768 static const OMPExecutableDirective *
8769 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8770   const auto *CS = D.getInnermostCapturedStmt();
8771   const auto *Body =
8772       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8773   const Stmt *ChildStmt =
8774       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8775 
8776   if (const auto *NestedDir =
8777           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8778     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8779     switch (D.getDirectiveKind()) {
8780     case OMPD_target:
8781       if (isOpenMPDistributeDirective(DKind))
8782         return NestedDir;
8783       if (DKind == OMPD_teams) {
8784         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8785             /*IgnoreCaptured=*/true);
8786         if (!Body)
8787           return nullptr;
8788         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8789         if (const auto *NND =
8790                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8791           DKind = NND->getDirectiveKind();
8792           if (isOpenMPDistributeDirective(DKind))
8793             return NND;
8794         }
8795       }
8796       return nullptr;
8797     case OMPD_target_teams:
8798       if (isOpenMPDistributeDirective(DKind))
8799         return NestedDir;
8800       return nullptr;
8801     case OMPD_target_parallel:
8802     case OMPD_target_simd:
8803     case OMPD_target_parallel_for:
8804     case OMPD_target_parallel_for_simd:
8805       return nullptr;
8806     case OMPD_target_teams_distribute:
8807     case OMPD_target_teams_distribute_simd:
8808     case OMPD_target_teams_distribute_parallel_for:
8809     case OMPD_target_teams_distribute_parallel_for_simd:
8810     case OMPD_parallel:
8811     case OMPD_for:
8812     case OMPD_parallel_for:
8813     case OMPD_parallel_master:
8814     case OMPD_parallel_sections:
8815     case OMPD_for_simd:
8816     case OMPD_parallel_for_simd:
8817     case OMPD_cancel:
8818     case OMPD_cancellation_point:
8819     case OMPD_ordered:
8820     case OMPD_threadprivate:
8821     case OMPD_allocate:
8822     case OMPD_task:
8823     case OMPD_simd:
8824     case OMPD_sections:
8825     case OMPD_section:
8826     case OMPD_single:
8827     case OMPD_master:
8828     case OMPD_critical:
8829     case OMPD_taskyield:
8830     case OMPD_barrier:
8831     case OMPD_taskwait:
8832     case OMPD_taskgroup:
8833     case OMPD_atomic:
8834     case OMPD_flush:
8835     case OMPD_teams:
8836     case OMPD_target_data:
8837     case OMPD_target_exit_data:
8838     case OMPD_target_enter_data:
8839     case OMPD_distribute:
8840     case OMPD_distribute_simd:
8841     case OMPD_distribute_parallel_for:
8842     case OMPD_distribute_parallel_for_simd:
8843     case OMPD_teams_distribute:
8844     case OMPD_teams_distribute_simd:
8845     case OMPD_teams_distribute_parallel_for:
8846     case OMPD_teams_distribute_parallel_for_simd:
8847     case OMPD_target_update:
8848     case OMPD_declare_simd:
8849     case OMPD_declare_variant:
8850     case OMPD_declare_target:
8851     case OMPD_end_declare_target:
8852     case OMPD_declare_reduction:
8853     case OMPD_declare_mapper:
8854     case OMPD_taskloop:
8855     case OMPD_taskloop_simd:
8856     case OMPD_master_taskloop:
8857     case OMPD_master_taskloop_simd:
8858     case OMPD_parallel_master_taskloop:
8859     case OMPD_parallel_master_taskloop_simd:
8860     case OMPD_requires:
8861     case OMPD_unknown:
8862       llvm_unreachable("Unexpected directive.");
8863     }
8864   }
8865 
8866   return nullptr;
8867 }
8868 
8869 /// Emit the user-defined mapper function. The code generation follows the
8870 /// pattern in the example below.
8871 /// \code
8872 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8873 ///                                           void *base, void *begin,
8874 ///                                           int64_t size, int64_t type) {
8875 ///   // Allocate space for an array section first.
8876 ///   if (size > 1 && !maptype.IsDelete)
8877 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8878 ///                                 size*sizeof(Ty), clearToFrom(type));
8879 ///   // Map members.
8880 ///   for (unsigned i = 0; i < size; i++) {
8881 ///     // For each component specified by this mapper:
8882 ///     for (auto c : all_components) {
8883 ///       if (c.hasMapper())
8884 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8885 ///                       c.arg_type);
8886 ///       else
8887 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8888 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8889 ///     }
8890 ///   }
8891 ///   // Delete the array section.
8892 ///   if (size > 1 && maptype.IsDelete)
8893 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8894 ///                                 size*sizeof(Ty), clearToFrom(type));
8895 /// }
8896 /// \endcode
8897 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8898                                             CodeGenFunction *CGF) {
8899   if (UDMMap.count(D) > 0)
8900     return;
8901   ASTContext &C = CGM.getContext();
8902   QualType Ty = D->getType();
8903   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8904   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8905   auto *MapperVarDecl =
8906       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8907   SourceLocation Loc = D->getLocation();
8908   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8909 
8910   // Prepare mapper function arguments and attributes.
8911   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8912                               C.VoidPtrTy, ImplicitParamDecl::Other);
8913   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8914                             ImplicitParamDecl::Other);
8915   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8916                              C.VoidPtrTy, ImplicitParamDecl::Other);
8917   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8918                             ImplicitParamDecl::Other);
8919   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8920                             ImplicitParamDecl::Other);
8921   FunctionArgList Args;
8922   Args.push_back(&HandleArg);
8923   Args.push_back(&BaseArg);
8924   Args.push_back(&BeginArg);
8925   Args.push_back(&SizeArg);
8926   Args.push_back(&TypeArg);
8927   const CGFunctionInfo &FnInfo =
8928       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8929   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8930   SmallString<64> TyStr;
8931   llvm::raw_svector_ostream Out(TyStr);
8932   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8933   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8934   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8935                                     Name, &CGM.getModule());
8936   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8937   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8938   // Start the mapper function code generation.
8939   CodeGenFunction MapperCGF(CGM);
8940   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8941   // Compute the starting and end addreses of array elements.
8942   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8943       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8944       C.getPointerType(Int64Ty), Loc);
8945   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8946       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8947       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8948   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8949   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8950       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8951       C.getPointerType(Int64Ty), Loc);
8952   // Prepare common arguments for array initiation and deletion.
8953   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8954       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8955       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8956   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8957       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8958       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8959   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8960       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8961       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8962 
8963   // Emit array initiation if this is an array section and \p MapType indicates
8964   // that memory allocation is required.
8965   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8966   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8967                              ElementSize, HeadBB, /*IsInit=*/true);
8968 
8969   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8970 
8971   // Emit the loop header block.
8972   MapperCGF.EmitBlock(HeadBB);
8973   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8974   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8975   // Evaluate whether the initial condition is satisfied.
8976   llvm::Value *IsEmpty =
8977       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8978   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8979   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8980 
8981   // Emit the loop body block.
8982   MapperCGF.EmitBlock(BodyBB);
8983   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8984       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8985   PtrPHI->addIncoming(PtrBegin, EntryBB);
8986   Address PtrCurrent =
8987       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8988                           .getAlignment()
8989                           .alignmentOfArrayElement(ElementSize));
8990   // Privatize the declared variable of mapper to be the current array element.
8991   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8992   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8993     return MapperCGF
8994         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8995         .getAddress(MapperCGF);
8996   });
8997   (void)Scope.Privatize();
8998 
8999   // Get map clause information. Fill up the arrays with all mapped variables.
9000   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9001   MappableExprsHandler::MapValuesArrayTy Pointers;
9002   MappableExprsHandler::MapValuesArrayTy Sizes;
9003   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9004   MappableExprsHandler MEHandler(*D, MapperCGF);
9005   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9006 
9007   // Call the runtime API __tgt_mapper_num_components to get the number of
9008   // pre-existing components.
9009   llvm::Value *OffloadingArgs[] = {Handle};
9010   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9011       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
9012   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9013       PreviousSize,
9014       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9015 
9016   // Fill up the runtime mapper handle for all components.
9017   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9018     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9019         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9020     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9021         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9022     llvm::Value *CurSizeArg = Sizes[I];
9023 
9024     // Extract the MEMBER_OF field from the map type.
9025     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9026     MapperCGF.EmitBlock(MemberBB);
9027     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9028     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9029         OriMapType,
9030         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9031     llvm::BasicBlock *MemberCombineBB =
9032         MapperCGF.createBasicBlock("omp.member.combine");
9033     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9034     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9035     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9036     // Add the number of pre-existing components to the MEMBER_OF field if it
9037     // is valid.
9038     MapperCGF.EmitBlock(MemberCombineBB);
9039     llvm::Value *CombinedMember =
9040         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9041     // Do nothing if it is not a member of previous components.
9042     MapperCGF.EmitBlock(TypeBB);
9043     llvm::PHINode *MemberMapType =
9044         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9045     MemberMapType->addIncoming(OriMapType, MemberBB);
9046     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9047 
9048     // Combine the map type inherited from user-defined mapper with that
9049     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9050     // bits of the \a MapType, which is the input argument of the mapper
9051     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9052     // bits of MemberMapType.
9053     // [OpenMP 5.0], 1.2.6. map-type decay.
9054     //        | alloc |  to   | from  | tofrom | release | delete
9055     // ----------------------------------------------------------
9056     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9057     // to     | alloc |  to   | alloc |   to   | release | delete
9058     // from   | alloc | alloc | from  |  from  | release | delete
9059     // tofrom | alloc |  to   | from  | tofrom | release | delete
9060     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9061         MapType,
9062         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9063                                    MappableExprsHandler::OMP_MAP_FROM));
9064     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9065     llvm::BasicBlock *AllocElseBB =
9066         MapperCGF.createBasicBlock("omp.type.alloc.else");
9067     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9068     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9069     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9070     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9071     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9072     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9073     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9074     MapperCGF.EmitBlock(AllocBB);
9075     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9076         MemberMapType,
9077         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9078                                      MappableExprsHandler::OMP_MAP_FROM)));
9079     MapperCGF.Builder.CreateBr(EndBB);
9080     MapperCGF.EmitBlock(AllocElseBB);
9081     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9082         LeftToFrom,
9083         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9084     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9085     // In case of to, clear OMP_MAP_FROM.
9086     MapperCGF.EmitBlock(ToBB);
9087     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9088         MemberMapType,
9089         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9090     MapperCGF.Builder.CreateBr(EndBB);
9091     MapperCGF.EmitBlock(ToElseBB);
9092     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9093         LeftToFrom,
9094         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9095     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9096     // In case of from, clear OMP_MAP_TO.
9097     MapperCGF.EmitBlock(FromBB);
9098     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9099         MemberMapType,
9100         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9101     // In case of tofrom, do nothing.
9102     MapperCGF.EmitBlock(EndBB);
9103     llvm::PHINode *CurMapType =
9104         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9105     CurMapType->addIncoming(AllocMapType, AllocBB);
9106     CurMapType->addIncoming(ToMapType, ToBB);
9107     CurMapType->addIncoming(FromMapType, FromBB);
9108     CurMapType->addIncoming(MemberMapType, ToElseBB);
9109 
9110     // TODO: call the corresponding mapper function if a user-defined mapper is
9111     // associated with this map clause.
9112     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9113     // data structure.
9114     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9115                                      CurSizeArg, CurMapType};
9116     MapperCGF.EmitRuntimeCall(
9117         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9118         OffloadingArgs);
9119   }
9120 
9121   // Update the pointer to point to the next element that needs to be mapped,
9122   // and check whether we have mapped all elements.
9123   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9124       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9125   PtrPHI->addIncoming(PtrNext, BodyBB);
9126   llvm::Value *IsDone =
9127       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9128   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9129   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9130 
9131   MapperCGF.EmitBlock(ExitBB);
9132   // Emit array deletion if this is an array section and \p MapType indicates
9133   // that deletion is required.
9134   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9135                              ElementSize, DoneBB, /*IsInit=*/false);
9136 
9137   // Emit the function exit block.
9138   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9139   MapperCGF.FinishFunction();
9140   UDMMap.try_emplace(D, Fn);
9141   if (CGF) {
9142     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9143     Decls.second.push_back(D);
9144   }
9145 }
9146 
9147 /// Emit the array initialization or deletion portion for user-defined mapper
9148 /// code generation. First, it evaluates whether an array section is mapped and
9149 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9150 /// true, and \a MapType indicates to not delete this array, array
9151 /// initialization code is generated. If \a IsInit is false, and \a MapType
9152 /// indicates to not this array, array deletion code is generated.
9153 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9154     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9155     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9156     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9157   StringRef Prefix = IsInit ? ".init" : ".del";
9158 
9159   // Evaluate if this is an array section.
9160   llvm::BasicBlock *IsDeleteBB =
9161       MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9162   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9163   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9164       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9165   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9166 
9167   // Evaluate if we are going to delete this section.
9168   MapperCGF.EmitBlock(IsDeleteBB);
9169   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9170       MapType,
9171       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9172   llvm::Value *DeleteCond;
9173   if (IsInit) {
9174     DeleteCond = MapperCGF.Builder.CreateIsNull(
9175         DeleteBit, "omp.array" + Prefix + ".delete");
9176   } else {
9177     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9178         DeleteBit, "omp.array" + Prefix + ".delete");
9179   }
9180   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9181 
9182   MapperCGF.EmitBlock(BodyBB);
9183   // Get the array size by multiplying element size and element number (i.e., \p
9184   // Size).
9185   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9186       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9187   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9188   // memory allocation/deletion purpose only.
9189   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9190       MapType,
9191       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9192                                    MappableExprsHandler::OMP_MAP_FROM)));
9193   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9194   // data structure.
9195   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9196   MapperCGF.EmitRuntimeCall(
9197       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9198 }
9199 
9200 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9201     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9202     llvm::Value *DeviceID,
9203     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9204                                      const OMPLoopDirective &D)>
9205         SizeEmitter) {
9206   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9207   const OMPExecutableDirective *TD = &D;
9208   // Get nested teams distribute kind directive, if any.
9209   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9210     TD = getNestedDistributeDirective(CGM.getContext(), D);
9211   if (!TD)
9212     return;
9213   const auto *LD = cast<OMPLoopDirective>(TD);
9214   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9215                                                      PrePostActionTy &) {
9216     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9217       llvm::Value *Args[] = {DeviceID, NumIterations};
9218       CGF.EmitRuntimeCall(
9219           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9220     }
9221   };
9222   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9223 }
9224 
9225 void CGOpenMPRuntime::emitTargetCall(
9226     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9227     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9228     const Expr *Device,
9229     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9230                                      const OMPLoopDirective &D)>
9231         SizeEmitter) {
9232   if (!CGF.HaveInsertPoint())
9233     return;
9234 
9235   assert(OutlinedFn && "Invalid outlined function!");
9236 
9237   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9238   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9239   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9240   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9241                                             PrePostActionTy &) {
9242     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9243   };
9244   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9245 
9246   CodeGenFunction::OMPTargetDataInfo InputInfo;
9247   llvm::Value *MapTypesArray = nullptr;
9248   // Fill up the pointer arrays and transfer execution to the device.
9249   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9250                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9251                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9252     // On top of the arrays that were filled up, the target offloading call
9253     // takes as arguments the device id as well as the host pointer. The host
9254     // pointer is used by the runtime library to identify the current target
9255     // region, so it only has to be unique and not necessarily point to
9256     // anything. It could be the pointer to the outlined function that
9257     // implements the target region, but we aren't using that so that the
9258     // compiler doesn't need to keep that, and could therefore inline the host
9259     // function if proven worthwhile during optimization.
9260 
9261     // From this point on, we need to have an ID of the target region defined.
9262     assert(OutlinedFnID && "Invalid outlined function ID!");
9263 
9264     // Emit device ID if any.
9265     llvm::Value *DeviceID;
9266     if (Device) {
9267       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9268                                            CGF.Int64Ty, /*isSigned=*/true);
9269     } else {
9270       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9271     }
9272 
9273     // Emit the number of elements in the offloading arrays.
9274     llvm::Value *PointerNum =
9275         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9276 
9277     // Return value of the runtime offloading call.
9278     llvm::Value *Return;
9279 
9280     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9281     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9282 
9283     // Emit tripcount for the target loop-based directive.
9284     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9285 
9286     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9287     // The target region is an outlined function launched by the runtime
9288     // via calls __tgt_target() or __tgt_target_teams().
9289     //
9290     // __tgt_target() launches a target region with one team and one thread,
9291     // executing a serial region.  This master thread may in turn launch
9292     // more threads within its team upon encountering a parallel region,
9293     // however, no additional teams can be launched on the device.
9294     //
9295     // __tgt_target_teams() launches a target region with one or more teams,
9296     // each with one or more threads.  This call is required for target
9297     // constructs such as:
9298     //  'target teams'
9299     //  'target' / 'teams'
9300     //  'target teams distribute parallel for'
9301     //  'target parallel'
9302     // and so on.
9303     //
9304     // Note that on the host and CPU targets, the runtime implementation of
9305     // these calls simply call the outlined function without forking threads.
9306     // The outlined functions themselves have runtime calls to
9307     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9308     // the compiler in emitTeamsCall() and emitParallelCall().
9309     //
9310     // In contrast, on the NVPTX target, the implementation of
9311     // __tgt_target_teams() launches a GPU kernel with the requested number
9312     // of teams and threads so no additional calls to the runtime are required.
9313     if (NumTeams) {
9314       // If we have NumTeams defined this means that we have an enclosed teams
9315       // region. Therefore we also expect to have NumThreads defined. These two
9316       // values should be defined in the presence of a teams directive,
9317       // regardless of having any clauses associated. If the user is using teams
9318       // but no clauses, these two values will be the default that should be
9319       // passed to the runtime library - a 32-bit integer with the value zero.
9320       assert(NumThreads && "Thread limit expression should be available along "
9321                            "with number of teams.");
9322       llvm::Value *OffloadingArgs[] = {DeviceID,
9323                                        OutlinedFnID,
9324                                        PointerNum,
9325                                        InputInfo.BasePointersArray.getPointer(),
9326                                        InputInfo.PointersArray.getPointer(),
9327                                        InputInfo.SizesArray.getPointer(),
9328                                        MapTypesArray,
9329                                        NumTeams,
9330                                        NumThreads};
9331       Return = CGF.EmitRuntimeCall(
9332           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9333                                           : OMPRTL__tgt_target_teams),
9334           OffloadingArgs);
9335     } else {
9336       llvm::Value *OffloadingArgs[] = {DeviceID,
9337                                        OutlinedFnID,
9338                                        PointerNum,
9339                                        InputInfo.BasePointersArray.getPointer(),
9340                                        InputInfo.PointersArray.getPointer(),
9341                                        InputInfo.SizesArray.getPointer(),
9342                                        MapTypesArray};
9343       Return = CGF.EmitRuntimeCall(
9344           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9345                                           : OMPRTL__tgt_target),
9346           OffloadingArgs);
9347     }
9348 
9349     // Check the error code and execute the host version if required.
9350     llvm::BasicBlock *OffloadFailedBlock =
9351         CGF.createBasicBlock("omp_offload.failed");
9352     llvm::BasicBlock *OffloadContBlock =
9353         CGF.createBasicBlock("omp_offload.cont");
9354     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9355     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9356 
9357     CGF.EmitBlock(OffloadFailedBlock);
9358     if (RequiresOuterTask) {
9359       CapturedVars.clear();
9360       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9361     }
9362     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9363     CGF.EmitBranch(OffloadContBlock);
9364 
9365     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9366   };
9367 
9368   // Notify that the host version must be executed.
9369   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9370                     RequiresOuterTask](CodeGenFunction &CGF,
9371                                        PrePostActionTy &) {
9372     if (RequiresOuterTask) {
9373       CapturedVars.clear();
9374       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9375     }
9376     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9377   };
9378 
9379   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9380                           &CapturedVars, RequiresOuterTask,
9381                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9382     // Fill up the arrays with all the captured variables.
9383     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9384     MappableExprsHandler::MapValuesArrayTy Pointers;
9385     MappableExprsHandler::MapValuesArrayTy Sizes;
9386     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9387 
9388     // Get mappable expression information.
9389     MappableExprsHandler MEHandler(D, CGF);
9390     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9391 
9392     auto RI = CS.getCapturedRecordDecl()->field_begin();
9393     auto CV = CapturedVars.begin();
9394     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9395                                               CE = CS.capture_end();
9396          CI != CE; ++CI, ++RI, ++CV) {
9397       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9398       MappableExprsHandler::MapValuesArrayTy CurPointers;
9399       MappableExprsHandler::MapValuesArrayTy CurSizes;
9400       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9401       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9402 
9403       // VLA sizes are passed to the outlined region by copy and do not have map
9404       // information associated.
9405       if (CI->capturesVariableArrayType()) {
9406         CurBasePointers.push_back(*CV);
9407         CurPointers.push_back(*CV);
9408         CurSizes.push_back(CGF.Builder.CreateIntCast(
9409             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9410         // Copy to the device as an argument. No need to retrieve it.
9411         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9412                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9413                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9414       } else {
9415         // If we have any information in the map clause, we use it, otherwise we
9416         // just do a default mapping.
9417         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9418                                          CurSizes, CurMapTypes, PartialStruct);
9419         if (CurBasePointers.empty())
9420           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9421                                            CurPointers, CurSizes, CurMapTypes);
9422         // Generate correct mapping for variables captured by reference in
9423         // lambdas.
9424         if (CI->capturesVariable())
9425           MEHandler.generateInfoForLambdaCaptures(
9426               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9427               CurMapTypes, LambdaPointers);
9428       }
9429       // We expect to have at least an element of information for this capture.
9430       assert(!CurBasePointers.empty() &&
9431              "Non-existing map pointer for capture!");
9432       assert(CurBasePointers.size() == CurPointers.size() &&
9433              CurBasePointers.size() == CurSizes.size() &&
9434              CurBasePointers.size() == CurMapTypes.size() &&
9435              "Inconsistent map information sizes!");
9436 
9437       // If there is an entry in PartialStruct it means we have a struct with
9438       // individual members mapped. Emit an extra combined entry.
9439       if (PartialStruct.Base.isValid())
9440         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9441                                     CurMapTypes, PartialStruct);
9442 
9443       // We need to append the results of this capture to what we already have.
9444       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9445       Pointers.append(CurPointers.begin(), CurPointers.end());
9446       Sizes.append(CurSizes.begin(), CurSizes.end());
9447       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9448     }
9449     // Adjust MEMBER_OF flags for the lambdas captures.
9450     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9451                                               Pointers, MapTypes);
9452     // Map other list items in the map clause which are not captured variables
9453     // but "declare target link" global variables.
9454     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9455                                                MapTypes);
9456 
9457     TargetDataInfo Info;
9458     // Fill up the arrays and create the arguments.
9459     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9460     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9461                                  Info.PointersArray, Info.SizesArray,
9462                                  Info.MapTypesArray, Info);
9463     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9464     InputInfo.BasePointersArray =
9465         Address(Info.BasePointersArray, CGM.getPointerAlign());
9466     InputInfo.PointersArray =
9467         Address(Info.PointersArray, CGM.getPointerAlign());
9468     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9469     MapTypesArray = Info.MapTypesArray;
9470     if (RequiresOuterTask)
9471       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9472     else
9473       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9474   };
9475 
9476   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9477                              CodeGenFunction &CGF, PrePostActionTy &) {
9478     if (RequiresOuterTask) {
9479       CodeGenFunction::OMPTargetDataInfo InputInfo;
9480       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9481     } else {
9482       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9483     }
9484   };
9485 
9486   // If we have a target function ID it means that we need to support
9487   // offloading, otherwise, just execute on the host. We need to execute on host
9488   // regardless of the conditional in the if clause if, e.g., the user do not
9489   // specify target triples.
9490   if (OutlinedFnID) {
9491     if (IfCond) {
9492       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9493     } else {
9494       RegionCodeGenTy ThenRCG(TargetThenGen);
9495       ThenRCG(CGF);
9496     }
9497   } else {
9498     RegionCodeGenTy ElseRCG(TargetElseGen);
9499     ElseRCG(CGF);
9500   }
9501 }
9502 
9503 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9504                                                     StringRef ParentName) {
9505   if (!S)
9506     return;
9507 
9508   // Codegen OMP target directives that offload compute to the device.
9509   bool RequiresDeviceCodegen =
9510       isa<OMPExecutableDirective>(S) &&
9511       isOpenMPTargetExecutionDirective(
9512           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9513 
9514   if (RequiresDeviceCodegen) {
9515     const auto &E = *cast<OMPExecutableDirective>(S);
9516     unsigned DeviceID;
9517     unsigned FileID;
9518     unsigned Line;
9519     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9520                              FileID, Line);
9521 
9522     // Is this a target region that should not be emitted as an entry point? If
9523     // so just signal we are done with this target region.
9524     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9525                                                             ParentName, Line))
9526       return;
9527 
9528     switch (E.getDirectiveKind()) {
9529     case OMPD_target:
9530       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9531                                                    cast<OMPTargetDirective>(E));
9532       break;
9533     case OMPD_target_parallel:
9534       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9535           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9536       break;
9537     case OMPD_target_teams:
9538       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9539           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9540       break;
9541     case OMPD_target_teams_distribute:
9542       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9543           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9544       break;
9545     case OMPD_target_teams_distribute_simd:
9546       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9547           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9548       break;
9549     case OMPD_target_parallel_for:
9550       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9551           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9552       break;
9553     case OMPD_target_parallel_for_simd:
9554       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9555           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9556       break;
9557     case OMPD_target_simd:
9558       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9559           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9560       break;
9561     case OMPD_target_teams_distribute_parallel_for:
9562       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9563           CGM, ParentName,
9564           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9565       break;
9566     case OMPD_target_teams_distribute_parallel_for_simd:
9567       CodeGenFunction::
9568           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9569               CGM, ParentName,
9570               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9571       break;
9572     case OMPD_parallel:
9573     case OMPD_for:
9574     case OMPD_parallel_for:
9575     case OMPD_parallel_master:
9576     case OMPD_parallel_sections:
9577     case OMPD_for_simd:
9578     case OMPD_parallel_for_simd:
9579     case OMPD_cancel:
9580     case OMPD_cancellation_point:
9581     case OMPD_ordered:
9582     case OMPD_threadprivate:
9583     case OMPD_allocate:
9584     case OMPD_task:
9585     case OMPD_simd:
9586     case OMPD_sections:
9587     case OMPD_section:
9588     case OMPD_single:
9589     case OMPD_master:
9590     case OMPD_critical:
9591     case OMPD_taskyield:
9592     case OMPD_barrier:
9593     case OMPD_taskwait:
9594     case OMPD_taskgroup:
9595     case OMPD_atomic:
9596     case OMPD_flush:
9597     case OMPD_teams:
9598     case OMPD_target_data:
9599     case OMPD_target_exit_data:
9600     case OMPD_target_enter_data:
9601     case OMPD_distribute:
9602     case OMPD_distribute_simd:
9603     case OMPD_distribute_parallel_for:
9604     case OMPD_distribute_parallel_for_simd:
9605     case OMPD_teams_distribute:
9606     case OMPD_teams_distribute_simd:
9607     case OMPD_teams_distribute_parallel_for:
9608     case OMPD_teams_distribute_parallel_for_simd:
9609     case OMPD_target_update:
9610     case OMPD_declare_simd:
9611     case OMPD_declare_variant:
9612     case OMPD_declare_target:
9613     case OMPD_end_declare_target:
9614     case OMPD_declare_reduction:
9615     case OMPD_declare_mapper:
9616     case OMPD_taskloop:
9617     case OMPD_taskloop_simd:
9618     case OMPD_master_taskloop:
9619     case OMPD_master_taskloop_simd:
9620     case OMPD_parallel_master_taskloop:
9621     case OMPD_parallel_master_taskloop_simd:
9622     case OMPD_requires:
9623     case OMPD_unknown:
9624       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9625     }
9626     return;
9627   }
9628 
9629   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9630     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9631       return;
9632 
9633     scanForTargetRegionsFunctions(
9634         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9635     return;
9636   }
9637 
9638   // If this is a lambda function, look into its body.
9639   if (const auto *L = dyn_cast<LambdaExpr>(S))
9640     S = L->getBody();
9641 
9642   // Keep looking for target regions recursively.
9643   for (const Stmt *II : S->children())
9644     scanForTargetRegionsFunctions(II, ParentName);
9645 }
9646 
9647 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9648   // If emitting code for the host, we do not process FD here. Instead we do
9649   // the normal code generation.
9650   if (!CGM.getLangOpts().OpenMPIsDevice) {
9651     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9652       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9653           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9654       // Do not emit device_type(nohost) functions for the host.
9655       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9656         return true;
9657     }
9658     return false;
9659   }
9660 
9661   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9662   StringRef Name = CGM.getMangledName(GD);
9663   // Try to detect target regions in the function.
9664   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9665     scanForTargetRegionsFunctions(FD->getBody(), Name);
9666     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9667         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9668     // Do not emit device_type(nohost) functions for the host.
9669     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9670       return true;
9671   }
9672 
9673   // Do not to emit function if it is not marked as declare target.
9674   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9675          AlreadyEmittedTargetFunctions.count(Name) == 0;
9676 }
9677 
9678 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9679   if (!CGM.getLangOpts().OpenMPIsDevice)
9680     return false;
9681 
9682   // Check if there are Ctors/Dtors in this declaration and look for target
9683   // regions in it. We use the complete variant to produce the kernel name
9684   // mangling.
9685   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9686   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9687     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9688       StringRef ParentName =
9689           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9690       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9691     }
9692     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9693       StringRef ParentName =
9694           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9695       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9696     }
9697   }
9698 
9699   // Do not to emit variable if it is not marked as declare target.
9700   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9701       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9702           cast<VarDecl>(GD.getDecl()));
9703   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9704       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9705        HasRequiresUnifiedSharedMemory)) {
9706     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9707     return true;
9708   }
9709   return false;
9710 }
9711 
9712 llvm::Constant *
9713 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9714                                                 const VarDecl *VD) {
9715   assert(VD->getType().isConstant(CGM.getContext()) &&
9716          "Expected constant variable.");
9717   StringRef VarName;
9718   llvm::Constant *Addr;
9719   llvm::GlobalValue::LinkageTypes Linkage;
9720   QualType Ty = VD->getType();
9721   SmallString<128> Buffer;
9722   {
9723     unsigned DeviceID;
9724     unsigned FileID;
9725     unsigned Line;
9726     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9727                              FileID, Line);
9728     llvm::raw_svector_ostream OS(Buffer);
9729     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9730        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9731     VarName = OS.str();
9732   }
9733   Linkage = llvm::GlobalValue::InternalLinkage;
9734   Addr =
9735       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9736                                   getDefaultFirstprivateAddressSpace());
9737   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9738   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9739   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9740   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9741       VarName, Addr, VarSize,
9742       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9743   return Addr;
9744 }
9745 
9746 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9747                                                    llvm::Constant *Addr) {
9748   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9749       !CGM.getLangOpts().OpenMPIsDevice)
9750     return;
9751   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9752       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9753   if (!Res) {
9754     if (CGM.getLangOpts().OpenMPIsDevice) {
9755       // Register non-target variables being emitted in device code (debug info
9756       // may cause this).
9757       StringRef VarName = CGM.getMangledName(VD);
9758       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9759     }
9760     return;
9761   }
9762   // Register declare target variables.
9763   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9764   StringRef VarName;
9765   CharUnits VarSize;
9766   llvm::GlobalValue::LinkageTypes Linkage;
9767 
9768   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9769       !HasRequiresUnifiedSharedMemory) {
9770     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9771     VarName = CGM.getMangledName(VD);
9772     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9773       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9774       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9775     } else {
9776       VarSize = CharUnits::Zero();
9777     }
9778     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9779     // Temp solution to prevent optimizations of the internal variables.
9780     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9781       std::string RefName = getName({VarName, "ref"});
9782       if (!CGM.GetGlobalValue(RefName)) {
9783         llvm::Constant *AddrRef =
9784             getOrCreateInternalVariable(Addr->getType(), RefName);
9785         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9786         GVAddrRef->setConstant(/*Val=*/true);
9787         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9788         GVAddrRef->setInitializer(Addr);
9789         CGM.addCompilerUsedGlobal(GVAddrRef);
9790       }
9791     }
9792   } else {
9793     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9794             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9795              HasRequiresUnifiedSharedMemory)) &&
9796            "Declare target attribute must link or to with unified memory.");
9797     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9798       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9799     else
9800       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9801 
9802     if (CGM.getLangOpts().OpenMPIsDevice) {
9803       VarName = Addr->getName();
9804       Addr = nullptr;
9805     } else {
9806       VarName = getAddrOfDeclareTargetVar(VD).getName();
9807       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9808     }
9809     VarSize = CGM.getPointerSize();
9810     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9811   }
9812 
9813   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9814       VarName, Addr, VarSize, Flags, Linkage);
9815 }
9816 
9817 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9818   if (isa<FunctionDecl>(GD.getDecl()) ||
9819       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9820     return emitTargetFunctions(GD);
9821 
9822   return emitTargetGlobalVariable(GD);
9823 }
9824 
9825 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9826   for (const VarDecl *VD : DeferredGlobalVariables) {
9827     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9828         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9829     if (!Res)
9830       continue;
9831     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9832         !HasRequiresUnifiedSharedMemory) {
9833       CGM.EmitGlobal(VD);
9834     } else {
9835       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9836               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9837                HasRequiresUnifiedSharedMemory)) &&
9838              "Expected link clause or to clause with unified memory.");
9839       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9840     }
9841   }
9842 }
9843 
9844 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9845     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9846   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9847          " Expected target-based directive.");
9848 }
9849 
9850 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9851     const OMPRequiresDecl *D) {
9852   for (const OMPClause *Clause : D->clauselists()) {
9853     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9854       HasRequiresUnifiedSharedMemory = true;
9855       break;
9856     }
9857   }
9858 }
9859 
9860 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9861                                                        LangAS &AS) {
9862   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9863     return false;
9864   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9865   switch(A->getAllocatorType()) {
9866   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9867   // Not supported, fallback to the default mem space.
9868   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9869   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9870   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9871   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9872   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9873   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9874   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9875     AS = LangAS::Default;
9876     return true;
9877   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9878     llvm_unreachable("Expected predefined allocator for the variables with the "
9879                      "static storage.");
9880   }
9881   return false;
9882 }
9883 
9884 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9885   return HasRequiresUnifiedSharedMemory;
9886 }
9887 
9888 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9889     CodeGenModule &CGM)
9890     : CGM(CGM) {
9891   if (CGM.getLangOpts().OpenMPIsDevice) {
9892     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9893     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9894   }
9895 }
9896 
9897 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9898   if (CGM.getLangOpts().OpenMPIsDevice)
9899     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9900 }
9901 
9902 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9903   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9904     return true;
9905 
9906   StringRef Name = CGM.getMangledName(GD);
9907   const auto *D = cast<FunctionDecl>(GD.getDecl());
9908   // Do not to emit function if it is marked as declare target as it was already
9909   // emitted.
9910   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9911     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9912       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9913         return !F->isDeclaration();
9914       return false;
9915     }
9916     return true;
9917   }
9918 
9919   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9920 }
9921 
9922 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9923   // If we don't have entries or if we are emitting code for the device, we
9924   // don't need to do anything.
9925   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9926       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9927       (OffloadEntriesInfoManager.empty() &&
9928        !HasEmittedDeclareTargetRegion &&
9929        !HasEmittedTargetRegion))
9930     return nullptr;
9931 
9932   // Create and register the function that handles the requires directives.
9933   ASTContext &C = CGM.getContext();
9934 
9935   llvm::Function *RequiresRegFn;
9936   {
9937     CodeGenFunction CGF(CGM);
9938     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9939     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9940     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9941     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9942     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9943     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9944     // TODO: check for other requires clauses.
9945     // The requires directive takes effect only when a target region is
9946     // present in the compilation unit. Otherwise it is ignored and not
9947     // passed to the runtime. This avoids the runtime from throwing an error
9948     // for mismatching requires clauses across compilation units that don't
9949     // contain at least 1 target region.
9950     assert((HasEmittedTargetRegion ||
9951             HasEmittedDeclareTargetRegion ||
9952             !OffloadEntriesInfoManager.empty()) &&
9953            "Target or declare target region expected.");
9954     if (HasRequiresUnifiedSharedMemory)
9955       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9956     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9957         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9958     CGF.FinishFunction();
9959   }
9960   return RequiresRegFn;
9961 }
9962 
9963 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9964                                     const OMPExecutableDirective &D,
9965                                     SourceLocation Loc,
9966                                     llvm::Function *OutlinedFn,
9967                                     ArrayRef<llvm::Value *> CapturedVars) {
9968   if (!CGF.HaveInsertPoint())
9969     return;
9970 
9971   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9972   CodeGenFunction::RunCleanupsScope Scope(CGF);
9973 
9974   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9975   llvm::Value *Args[] = {
9976       RTLoc,
9977       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9978       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9979   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9980   RealArgs.append(std::begin(Args), std::end(Args));
9981   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9982 
9983   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9984   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9985 }
9986 
9987 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9988                                          const Expr *NumTeams,
9989                                          const Expr *ThreadLimit,
9990                                          SourceLocation Loc) {
9991   if (!CGF.HaveInsertPoint())
9992     return;
9993 
9994   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9995 
9996   llvm::Value *NumTeamsVal =
9997       NumTeams
9998           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9999                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10000           : CGF.Builder.getInt32(0);
10001 
10002   llvm::Value *ThreadLimitVal =
10003       ThreadLimit
10004           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10005                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10006           : CGF.Builder.getInt32(0);
10007 
10008   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10009   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10010                                      ThreadLimitVal};
10011   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
10012                       PushNumTeamsArgs);
10013 }
10014 
10015 void CGOpenMPRuntime::emitTargetDataCalls(
10016     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10017     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10018   if (!CGF.HaveInsertPoint())
10019     return;
10020 
10021   // Action used to replace the default codegen action and turn privatization
10022   // off.
10023   PrePostActionTy NoPrivAction;
10024 
10025   // Generate the code for the opening of the data environment. Capture all the
10026   // arguments of the runtime call by reference because they are used in the
10027   // closing of the region.
10028   auto &&BeginThenGen = [this, &D, Device, &Info,
10029                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10030     // Fill up the arrays with all the mapped variables.
10031     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10032     MappableExprsHandler::MapValuesArrayTy Pointers;
10033     MappableExprsHandler::MapValuesArrayTy Sizes;
10034     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10035 
10036     // Get map clause information.
10037     MappableExprsHandler MCHandler(D, CGF);
10038     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10039 
10040     // Fill up the arrays and create the arguments.
10041     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10042 
10043     llvm::Value *BasePointersArrayArg = nullptr;
10044     llvm::Value *PointersArrayArg = nullptr;
10045     llvm::Value *SizesArrayArg = nullptr;
10046     llvm::Value *MapTypesArrayArg = nullptr;
10047     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10048                                  SizesArrayArg, MapTypesArrayArg, Info);
10049 
10050     // Emit device ID if any.
10051     llvm::Value *DeviceID = nullptr;
10052     if (Device) {
10053       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10054                                            CGF.Int64Ty, /*isSigned=*/true);
10055     } else {
10056       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10057     }
10058 
10059     // Emit the number of elements in the offloading arrays.
10060     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10061 
10062     llvm::Value *OffloadingArgs[] = {
10063         DeviceID,         PointerNum,    BasePointersArrayArg,
10064         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10065     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10066                         OffloadingArgs);
10067 
10068     // If device pointer privatization is required, emit the body of the region
10069     // here. It will have to be duplicated: with and without privatization.
10070     if (!Info.CaptureDeviceAddrMap.empty())
10071       CodeGen(CGF);
10072   };
10073 
10074   // Generate code for the closing of the data region.
10075   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10076                                             PrePostActionTy &) {
10077     assert(Info.isValid() && "Invalid data environment closing arguments.");
10078 
10079     llvm::Value *BasePointersArrayArg = nullptr;
10080     llvm::Value *PointersArrayArg = nullptr;
10081     llvm::Value *SizesArrayArg = nullptr;
10082     llvm::Value *MapTypesArrayArg = nullptr;
10083     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10084                                  SizesArrayArg, MapTypesArrayArg, Info);
10085 
10086     // Emit device ID if any.
10087     llvm::Value *DeviceID = nullptr;
10088     if (Device) {
10089       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10090                                            CGF.Int64Ty, /*isSigned=*/true);
10091     } else {
10092       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10093     }
10094 
10095     // Emit the number of elements in the offloading arrays.
10096     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10097 
10098     llvm::Value *OffloadingArgs[] = {
10099         DeviceID,         PointerNum,    BasePointersArrayArg,
10100         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10101     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10102                         OffloadingArgs);
10103   };
10104 
10105   // If we need device pointer privatization, we need to emit the body of the
10106   // region with no privatization in the 'else' branch of the conditional.
10107   // Otherwise, we don't have to do anything.
10108   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10109                                                          PrePostActionTy &) {
10110     if (!Info.CaptureDeviceAddrMap.empty()) {
10111       CodeGen.setAction(NoPrivAction);
10112       CodeGen(CGF);
10113     }
10114   };
10115 
10116   // We don't have to do anything to close the region if the if clause evaluates
10117   // to false.
10118   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10119 
10120   if (IfCond) {
10121     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10122   } else {
10123     RegionCodeGenTy RCG(BeginThenGen);
10124     RCG(CGF);
10125   }
10126 
10127   // If we don't require privatization of device pointers, we emit the body in
10128   // between the runtime calls. This avoids duplicating the body code.
10129   if (Info.CaptureDeviceAddrMap.empty()) {
10130     CodeGen.setAction(NoPrivAction);
10131     CodeGen(CGF);
10132   }
10133 
10134   if (IfCond) {
10135     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10136   } else {
10137     RegionCodeGenTy RCG(EndThenGen);
10138     RCG(CGF);
10139   }
10140 }
10141 
10142 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10143     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10144     const Expr *Device) {
10145   if (!CGF.HaveInsertPoint())
10146     return;
10147 
10148   assert((isa<OMPTargetEnterDataDirective>(D) ||
10149           isa<OMPTargetExitDataDirective>(D) ||
10150           isa<OMPTargetUpdateDirective>(D)) &&
10151          "Expecting either target enter, exit data, or update directives.");
10152 
10153   CodeGenFunction::OMPTargetDataInfo InputInfo;
10154   llvm::Value *MapTypesArray = nullptr;
10155   // Generate the code for the opening of the data environment.
10156   auto &&ThenGen = [this, &D, Device, &InputInfo,
10157                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10158     // Emit device ID if any.
10159     llvm::Value *DeviceID = nullptr;
10160     if (Device) {
10161       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10162                                            CGF.Int64Ty, /*isSigned=*/true);
10163     } else {
10164       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10165     }
10166 
10167     // Emit the number of elements in the offloading arrays.
10168     llvm::Constant *PointerNum =
10169         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10170 
10171     llvm::Value *OffloadingArgs[] = {DeviceID,
10172                                      PointerNum,
10173                                      InputInfo.BasePointersArray.getPointer(),
10174                                      InputInfo.PointersArray.getPointer(),
10175                                      InputInfo.SizesArray.getPointer(),
10176                                      MapTypesArray};
10177 
10178     // Select the right runtime function call for each expected standalone
10179     // directive.
10180     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10181     OpenMPRTLFunction RTLFn;
10182     switch (D.getDirectiveKind()) {
10183     case OMPD_target_enter_data:
10184       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10185                         : OMPRTL__tgt_target_data_begin;
10186       break;
10187     case OMPD_target_exit_data:
10188       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10189                         : OMPRTL__tgt_target_data_end;
10190       break;
10191     case OMPD_target_update:
10192       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10193                         : OMPRTL__tgt_target_data_update;
10194       break;
10195     case OMPD_parallel:
10196     case OMPD_for:
10197     case OMPD_parallel_for:
10198     case OMPD_parallel_master:
10199     case OMPD_parallel_sections:
10200     case OMPD_for_simd:
10201     case OMPD_parallel_for_simd:
10202     case OMPD_cancel:
10203     case OMPD_cancellation_point:
10204     case OMPD_ordered:
10205     case OMPD_threadprivate:
10206     case OMPD_allocate:
10207     case OMPD_task:
10208     case OMPD_simd:
10209     case OMPD_sections:
10210     case OMPD_section:
10211     case OMPD_single:
10212     case OMPD_master:
10213     case OMPD_critical:
10214     case OMPD_taskyield:
10215     case OMPD_barrier:
10216     case OMPD_taskwait:
10217     case OMPD_taskgroup:
10218     case OMPD_atomic:
10219     case OMPD_flush:
10220     case OMPD_teams:
10221     case OMPD_target_data:
10222     case OMPD_distribute:
10223     case OMPD_distribute_simd:
10224     case OMPD_distribute_parallel_for:
10225     case OMPD_distribute_parallel_for_simd:
10226     case OMPD_teams_distribute:
10227     case OMPD_teams_distribute_simd:
10228     case OMPD_teams_distribute_parallel_for:
10229     case OMPD_teams_distribute_parallel_for_simd:
10230     case OMPD_declare_simd:
10231     case OMPD_declare_variant:
10232     case OMPD_declare_target:
10233     case OMPD_end_declare_target:
10234     case OMPD_declare_reduction:
10235     case OMPD_declare_mapper:
10236     case OMPD_taskloop:
10237     case OMPD_taskloop_simd:
10238     case OMPD_master_taskloop:
10239     case OMPD_master_taskloop_simd:
10240     case OMPD_parallel_master_taskloop:
10241     case OMPD_parallel_master_taskloop_simd:
10242     case OMPD_target:
10243     case OMPD_target_simd:
10244     case OMPD_target_teams_distribute:
10245     case OMPD_target_teams_distribute_simd:
10246     case OMPD_target_teams_distribute_parallel_for:
10247     case OMPD_target_teams_distribute_parallel_for_simd:
10248     case OMPD_target_teams:
10249     case OMPD_target_parallel:
10250     case OMPD_target_parallel_for:
10251     case OMPD_target_parallel_for_simd:
10252     case OMPD_requires:
10253     case OMPD_unknown:
10254       llvm_unreachable("Unexpected standalone target data directive.");
10255       break;
10256     }
10257     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10258   };
10259 
10260   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10261                              CodeGenFunction &CGF, PrePostActionTy &) {
10262     // Fill up the arrays with all the mapped variables.
10263     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10264     MappableExprsHandler::MapValuesArrayTy Pointers;
10265     MappableExprsHandler::MapValuesArrayTy Sizes;
10266     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10267 
10268     // Get map clause information.
10269     MappableExprsHandler MEHandler(D, CGF);
10270     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10271 
10272     TargetDataInfo Info;
10273     // Fill up the arrays and create the arguments.
10274     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10275     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10276                                  Info.PointersArray, Info.SizesArray,
10277                                  Info.MapTypesArray, Info);
10278     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10279     InputInfo.BasePointersArray =
10280         Address(Info.BasePointersArray, CGM.getPointerAlign());
10281     InputInfo.PointersArray =
10282         Address(Info.PointersArray, CGM.getPointerAlign());
10283     InputInfo.SizesArray =
10284         Address(Info.SizesArray, CGM.getPointerAlign());
10285     MapTypesArray = Info.MapTypesArray;
10286     if (D.hasClausesOfKind<OMPDependClause>())
10287       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10288     else
10289       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10290   };
10291 
10292   if (IfCond) {
10293     emitIfClause(CGF, IfCond, TargetThenGen,
10294                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10295   } else {
10296     RegionCodeGenTy ThenRCG(TargetThenGen);
10297     ThenRCG(CGF);
10298   }
10299 }
10300 
10301 namespace {
10302   /// Kind of parameter in a function with 'declare simd' directive.
10303   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10304   /// Attribute set of the parameter.
10305   struct ParamAttrTy {
10306     ParamKindTy Kind = Vector;
10307     llvm::APSInt StrideOrArg;
10308     llvm::APSInt Alignment;
10309   };
10310 } // namespace
10311 
10312 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10313                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10314   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10315   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10316   // of that clause. The VLEN value must be power of 2.
10317   // In other case the notion of the function`s "characteristic data type" (CDT)
10318   // is used to compute the vector length.
10319   // CDT is defined in the following order:
10320   //   a) For non-void function, the CDT is the return type.
10321   //   b) If the function has any non-uniform, non-linear parameters, then the
10322   //   CDT is the type of the first such parameter.
10323   //   c) If the CDT determined by a) or b) above is struct, union, or class
10324   //   type which is pass-by-value (except for the type that maps to the
10325   //   built-in complex data type), the characteristic data type is int.
10326   //   d) If none of the above three cases is applicable, the CDT is int.
10327   // The VLEN is then determined based on the CDT and the size of vector
10328   // register of that ISA for which current vector version is generated. The
10329   // VLEN is computed using the formula below:
10330   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10331   // where vector register size specified in section 3.2.1 Registers and the
10332   // Stack Frame of original AMD64 ABI document.
10333   QualType RetType = FD->getReturnType();
10334   if (RetType.isNull())
10335     return 0;
10336   ASTContext &C = FD->getASTContext();
10337   QualType CDT;
10338   if (!RetType.isNull() && !RetType->isVoidType()) {
10339     CDT = RetType;
10340   } else {
10341     unsigned Offset = 0;
10342     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10343       if (ParamAttrs[Offset].Kind == Vector)
10344         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10345       ++Offset;
10346     }
10347     if (CDT.isNull()) {
10348       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10349         if (ParamAttrs[I + Offset].Kind == Vector) {
10350           CDT = FD->getParamDecl(I)->getType();
10351           break;
10352         }
10353       }
10354     }
10355   }
10356   if (CDT.isNull())
10357     CDT = C.IntTy;
10358   CDT = CDT->getCanonicalTypeUnqualified();
10359   if (CDT->isRecordType() || CDT->isUnionType())
10360     CDT = C.IntTy;
10361   return C.getTypeSize(CDT);
10362 }
10363 
10364 static void
10365 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10366                            const llvm::APSInt &VLENVal,
10367                            ArrayRef<ParamAttrTy> ParamAttrs,
10368                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10369   struct ISADataTy {
10370     char ISA;
10371     unsigned VecRegSize;
10372   };
10373   ISADataTy ISAData[] = {
10374       {
10375           'b', 128
10376       }, // SSE
10377       {
10378           'c', 256
10379       }, // AVX
10380       {
10381           'd', 256
10382       }, // AVX2
10383       {
10384           'e', 512
10385       }, // AVX512
10386   };
10387   llvm::SmallVector<char, 2> Masked;
10388   switch (State) {
10389   case OMPDeclareSimdDeclAttr::BS_Undefined:
10390     Masked.push_back('N');
10391     Masked.push_back('M');
10392     break;
10393   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10394     Masked.push_back('N');
10395     break;
10396   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10397     Masked.push_back('M');
10398     break;
10399   }
10400   for (char Mask : Masked) {
10401     for (const ISADataTy &Data : ISAData) {
10402       SmallString<256> Buffer;
10403       llvm::raw_svector_ostream Out(Buffer);
10404       Out << "_ZGV" << Data.ISA << Mask;
10405       if (!VLENVal) {
10406         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10407         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10408         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10409       } else {
10410         Out << VLENVal;
10411       }
10412       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10413         switch (ParamAttr.Kind){
10414         case LinearWithVarStride:
10415           Out << 's' << ParamAttr.StrideOrArg;
10416           break;
10417         case Linear:
10418           Out << 'l';
10419           if (!!ParamAttr.StrideOrArg)
10420             Out << ParamAttr.StrideOrArg;
10421           break;
10422         case Uniform:
10423           Out << 'u';
10424           break;
10425         case Vector:
10426           Out << 'v';
10427           break;
10428         }
10429         if (!!ParamAttr.Alignment)
10430           Out << 'a' << ParamAttr.Alignment;
10431       }
10432       Out << '_' << Fn->getName();
10433       Fn->addFnAttr(Out.str());
10434     }
10435   }
10436 }
10437 
10438 // This are the Functions that are needed to mangle the name of the
10439 // vector functions generated by the compiler, according to the rules
10440 // defined in the "Vector Function ABI specifications for AArch64",
10441 // available at
10442 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10443 
10444 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10445 ///
10446 /// TODO: Need to implement the behavior for reference marked with a
10447 /// var or no linear modifiers (1.b in the section). For this, we
10448 /// need to extend ParamKindTy to support the linear modifiers.
10449 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10450   QT = QT.getCanonicalType();
10451 
10452   if (QT->isVoidType())
10453     return false;
10454 
10455   if (Kind == ParamKindTy::Uniform)
10456     return false;
10457 
10458   if (Kind == ParamKindTy::Linear)
10459     return false;
10460 
10461   // TODO: Handle linear references with modifiers
10462 
10463   if (Kind == ParamKindTy::LinearWithVarStride)
10464     return false;
10465 
10466   return true;
10467 }
10468 
10469 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10470 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10471   QT = QT.getCanonicalType();
10472   unsigned Size = C.getTypeSize(QT);
10473 
10474   // Only scalars and complex within 16 bytes wide set PVB to true.
10475   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10476     return false;
10477 
10478   if (QT->isFloatingType())
10479     return true;
10480 
10481   if (QT->isIntegerType())
10482     return true;
10483 
10484   if (QT->isPointerType())
10485     return true;
10486 
10487   // TODO: Add support for complex types (section 3.1.2, item 2).
10488 
10489   return false;
10490 }
10491 
10492 /// Computes the lane size (LS) of a return type or of an input parameter,
10493 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10494 /// TODO: Add support for references, section 3.2.1, item 1.
10495 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10496   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10497     QualType PTy = QT.getCanonicalType()->getPointeeType();
10498     if (getAArch64PBV(PTy, C))
10499       return C.getTypeSize(PTy);
10500   }
10501   if (getAArch64PBV(QT, C))
10502     return C.getTypeSize(QT);
10503 
10504   return C.getTypeSize(C.getUIntPtrType());
10505 }
10506 
10507 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10508 // signature of the scalar function, as defined in 3.2.2 of the
10509 // AAVFABI.
10510 static std::tuple<unsigned, unsigned, bool>
10511 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10512   QualType RetType = FD->getReturnType().getCanonicalType();
10513 
10514   ASTContext &C = FD->getASTContext();
10515 
10516   bool OutputBecomesInput = false;
10517 
10518   llvm::SmallVector<unsigned, 8> Sizes;
10519   if (!RetType->isVoidType()) {
10520     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10521     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10522       OutputBecomesInput = true;
10523   }
10524   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10525     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10526     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10527   }
10528 
10529   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10530   // The LS of a function parameter / return value can only be a power
10531   // of 2, starting from 8 bits, up to 128.
10532   assert(std::all_of(Sizes.begin(), Sizes.end(),
10533                      [](unsigned Size) {
10534                        return Size == 8 || Size == 16 || Size == 32 ||
10535                               Size == 64 || Size == 128;
10536                      }) &&
10537          "Invalid size");
10538 
10539   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10540                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10541                          OutputBecomesInput);
10542 }
10543 
10544 /// Mangle the parameter part of the vector function name according to
10545 /// their OpenMP classification. The mangling function is defined in
10546 /// section 3.5 of the AAVFABI.
10547 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10548   SmallString<256> Buffer;
10549   llvm::raw_svector_ostream Out(Buffer);
10550   for (const auto &ParamAttr : ParamAttrs) {
10551     switch (ParamAttr.Kind) {
10552     case LinearWithVarStride:
10553       Out << "ls" << ParamAttr.StrideOrArg;
10554       break;
10555     case Linear:
10556       Out << 'l';
10557       // Don't print the step value if it is not present or if it is
10558       // equal to 1.
10559       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10560         Out << ParamAttr.StrideOrArg;
10561       break;
10562     case Uniform:
10563       Out << 'u';
10564       break;
10565     case Vector:
10566       Out << 'v';
10567       break;
10568     }
10569 
10570     if (!!ParamAttr.Alignment)
10571       Out << 'a' << ParamAttr.Alignment;
10572   }
10573 
10574   return Out.str();
10575 }
10576 
10577 // Function used to add the attribute. The parameter `VLEN` is
10578 // templated to allow the use of "x" when targeting scalable functions
10579 // for SVE.
10580 template <typename T>
10581 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10582                                  char ISA, StringRef ParSeq,
10583                                  StringRef MangledName, bool OutputBecomesInput,
10584                                  llvm::Function *Fn) {
10585   SmallString<256> Buffer;
10586   llvm::raw_svector_ostream Out(Buffer);
10587   Out << Prefix << ISA << LMask << VLEN;
10588   if (OutputBecomesInput)
10589     Out << "v";
10590   Out << ParSeq << "_" << MangledName;
10591   Fn->addFnAttr(Out.str());
10592 }
10593 
10594 // Helper function to generate the Advanced SIMD names depending on
10595 // the value of the NDS when simdlen is not present.
10596 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10597                                       StringRef Prefix, char ISA,
10598                                       StringRef ParSeq, StringRef MangledName,
10599                                       bool OutputBecomesInput,
10600                                       llvm::Function *Fn) {
10601   switch (NDS) {
10602   case 8:
10603     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10604                          OutputBecomesInput, Fn);
10605     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10606                          OutputBecomesInput, Fn);
10607     break;
10608   case 16:
10609     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10610                          OutputBecomesInput, Fn);
10611     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10612                          OutputBecomesInput, Fn);
10613     break;
10614   case 32:
10615     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10616                          OutputBecomesInput, Fn);
10617     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10618                          OutputBecomesInput, Fn);
10619     break;
10620   case 64:
10621   case 128:
10622     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10623                          OutputBecomesInput, Fn);
10624     break;
10625   default:
10626     llvm_unreachable("Scalar type is too wide.");
10627   }
10628 }
10629 
10630 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10631 static void emitAArch64DeclareSimdFunction(
10632     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10633     ArrayRef<ParamAttrTy> ParamAttrs,
10634     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10635     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10636 
10637   // Get basic data for building the vector signature.
10638   const auto Data = getNDSWDS(FD, ParamAttrs);
10639   const unsigned NDS = std::get<0>(Data);
10640   const unsigned WDS = std::get<1>(Data);
10641   const bool OutputBecomesInput = std::get<2>(Data);
10642 
10643   // Check the values provided via `simdlen` by the user.
10644   // 1. A `simdlen(1)` doesn't produce vector signatures,
10645   if (UserVLEN == 1) {
10646     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10647         DiagnosticsEngine::Warning,
10648         "The clause simdlen(1) has no effect when targeting aarch64.");
10649     CGM.getDiags().Report(SLoc, DiagID);
10650     return;
10651   }
10652 
10653   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10654   // Advanced SIMD output.
10655   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10656     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10657         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10658                                     "power of 2 when targeting Advanced SIMD.");
10659     CGM.getDiags().Report(SLoc, DiagID);
10660     return;
10661   }
10662 
10663   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10664   // limits.
10665   if (ISA == 's' && UserVLEN != 0) {
10666     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10667       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10668           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10669                                       "lanes in the architectural constraints "
10670                                       "for SVE (min is 128-bit, max is "
10671                                       "2048-bit, by steps of 128-bit)");
10672       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10673       return;
10674     }
10675   }
10676 
10677   // Sort out parameter sequence.
10678   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10679   StringRef Prefix = "_ZGV";
10680   // Generate simdlen from user input (if any).
10681   if (UserVLEN) {
10682     if (ISA == 's') {
10683       // SVE generates only a masked function.
10684       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10685                            OutputBecomesInput, Fn);
10686     } else {
10687       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10688       // Advanced SIMD generates one or two functions, depending on
10689       // the `[not]inbranch` clause.
10690       switch (State) {
10691       case OMPDeclareSimdDeclAttr::BS_Undefined:
10692         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10693                              OutputBecomesInput, Fn);
10694         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10695                              OutputBecomesInput, Fn);
10696         break;
10697       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10698         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10699                              OutputBecomesInput, Fn);
10700         break;
10701       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10702         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10703                              OutputBecomesInput, Fn);
10704         break;
10705       }
10706     }
10707   } else {
10708     // If no user simdlen is provided, follow the AAVFABI rules for
10709     // generating the vector length.
10710     if (ISA == 's') {
10711       // SVE, section 3.4.1, item 1.
10712       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10713                            OutputBecomesInput, Fn);
10714     } else {
10715       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10716       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10717       // two vector names depending on the use of the clause
10718       // `[not]inbranch`.
10719       switch (State) {
10720       case OMPDeclareSimdDeclAttr::BS_Undefined:
10721         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10722                                   OutputBecomesInput, Fn);
10723         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10724                                   OutputBecomesInput, Fn);
10725         break;
10726       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10727         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10728                                   OutputBecomesInput, Fn);
10729         break;
10730       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10731         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10732                                   OutputBecomesInput, Fn);
10733         break;
10734       }
10735     }
10736   }
10737 }
10738 
10739 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10740                                               llvm::Function *Fn) {
10741   ASTContext &C = CGM.getContext();
10742   FD = FD->getMostRecentDecl();
10743   // Map params to their positions in function decl.
10744   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10745   if (isa<CXXMethodDecl>(FD))
10746     ParamPositions.try_emplace(FD, 0);
10747   unsigned ParamPos = ParamPositions.size();
10748   for (const ParmVarDecl *P : FD->parameters()) {
10749     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10750     ++ParamPos;
10751   }
10752   while (FD) {
10753     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10754       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10755       // Mark uniform parameters.
10756       for (const Expr *E : Attr->uniforms()) {
10757         E = E->IgnoreParenImpCasts();
10758         unsigned Pos;
10759         if (isa<CXXThisExpr>(E)) {
10760           Pos = ParamPositions[FD];
10761         } else {
10762           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10763                                 ->getCanonicalDecl();
10764           Pos = ParamPositions[PVD];
10765         }
10766         ParamAttrs[Pos].Kind = Uniform;
10767       }
10768       // Get alignment info.
10769       auto NI = Attr->alignments_begin();
10770       for (const Expr *E : Attr->aligneds()) {
10771         E = E->IgnoreParenImpCasts();
10772         unsigned Pos;
10773         QualType ParmTy;
10774         if (isa<CXXThisExpr>(E)) {
10775           Pos = ParamPositions[FD];
10776           ParmTy = E->getType();
10777         } else {
10778           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10779                                 ->getCanonicalDecl();
10780           Pos = ParamPositions[PVD];
10781           ParmTy = PVD->getType();
10782         }
10783         ParamAttrs[Pos].Alignment =
10784             (*NI)
10785                 ? (*NI)->EvaluateKnownConstInt(C)
10786                 : llvm::APSInt::getUnsigned(
10787                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10788                           .getQuantity());
10789         ++NI;
10790       }
10791       // Mark linear parameters.
10792       auto SI = Attr->steps_begin();
10793       auto MI = Attr->modifiers_begin();
10794       for (const Expr *E : Attr->linears()) {
10795         E = E->IgnoreParenImpCasts();
10796         unsigned Pos;
10797         if (isa<CXXThisExpr>(E)) {
10798           Pos = ParamPositions[FD];
10799         } else {
10800           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10801                                 ->getCanonicalDecl();
10802           Pos = ParamPositions[PVD];
10803         }
10804         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10805         ParamAttr.Kind = Linear;
10806         if (*SI) {
10807           Expr::EvalResult Result;
10808           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10809             if (const auto *DRE =
10810                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10811               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10812                 ParamAttr.Kind = LinearWithVarStride;
10813                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10814                     ParamPositions[StridePVD->getCanonicalDecl()]);
10815               }
10816             }
10817           } else {
10818             ParamAttr.StrideOrArg = Result.Val.getInt();
10819           }
10820         }
10821         ++SI;
10822         ++MI;
10823       }
10824       llvm::APSInt VLENVal;
10825       SourceLocation ExprLoc;
10826       const Expr *VLENExpr = Attr->getSimdlen();
10827       if (VLENExpr) {
10828         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10829         ExprLoc = VLENExpr->getExprLoc();
10830       }
10831       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10832       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10833           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10834         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10835       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10836         unsigned VLEN = VLENVal.getExtValue();
10837         StringRef MangledName = Fn->getName();
10838         if (CGM.getTarget().hasFeature("sve"))
10839           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10840                                          MangledName, 's', 128, Fn, ExprLoc);
10841         if (CGM.getTarget().hasFeature("neon"))
10842           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10843                                          MangledName, 'n', 128, Fn, ExprLoc);
10844       }
10845     }
10846     FD = FD->getPreviousDecl();
10847   }
10848 }
10849 
10850 namespace {
10851 /// Cleanup action for doacross support.
10852 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10853 public:
10854   static const int DoacrossFinArgs = 2;
10855 
10856 private:
10857   llvm::FunctionCallee RTLFn;
10858   llvm::Value *Args[DoacrossFinArgs];
10859 
10860 public:
10861   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10862                     ArrayRef<llvm::Value *> CallArgs)
10863       : RTLFn(RTLFn) {
10864     assert(CallArgs.size() == DoacrossFinArgs);
10865     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10866   }
10867   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10868     if (!CGF.HaveInsertPoint())
10869       return;
10870     CGF.EmitRuntimeCall(RTLFn, Args);
10871   }
10872 };
10873 } // namespace
10874 
10875 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10876                                        const OMPLoopDirective &D,
10877                                        ArrayRef<Expr *> NumIterations) {
10878   if (!CGF.HaveInsertPoint())
10879     return;
10880 
10881   ASTContext &C = CGM.getContext();
10882   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10883   RecordDecl *RD;
10884   if (KmpDimTy.isNull()) {
10885     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10886     //  kmp_int64 lo; // lower
10887     //  kmp_int64 up; // upper
10888     //  kmp_int64 st; // stride
10889     // };
10890     RD = C.buildImplicitRecord("kmp_dim");
10891     RD->startDefinition();
10892     addFieldToRecordDecl(C, RD, Int64Ty);
10893     addFieldToRecordDecl(C, RD, Int64Ty);
10894     addFieldToRecordDecl(C, RD, Int64Ty);
10895     RD->completeDefinition();
10896     KmpDimTy = C.getRecordType(RD);
10897   } else {
10898     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10899   }
10900   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10901   QualType ArrayTy =
10902       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10903 
10904   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10905   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10906   enum { LowerFD = 0, UpperFD, StrideFD };
10907   // Fill dims with data.
10908   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10909     LValue DimsLVal = CGF.MakeAddrLValue(
10910         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10911     // dims.upper = num_iterations;
10912     LValue UpperLVal = CGF.EmitLValueForField(
10913         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10914     llvm::Value *NumIterVal =
10915         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10916                                  D.getNumIterations()->getType(), Int64Ty,
10917                                  D.getNumIterations()->getExprLoc());
10918     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10919     // dims.stride = 1;
10920     LValue StrideLVal = CGF.EmitLValueForField(
10921         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10922     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10923                           StrideLVal);
10924   }
10925 
10926   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10927   // kmp_int32 num_dims, struct kmp_dim * dims);
10928   llvm::Value *Args[] = {
10929       emitUpdateLocation(CGF, D.getBeginLoc()),
10930       getThreadID(CGF, D.getBeginLoc()),
10931       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10932       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10933           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10934           CGM.VoidPtrTy)};
10935 
10936   llvm::FunctionCallee RTLFn =
10937       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10938   CGF.EmitRuntimeCall(RTLFn, Args);
10939   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10940       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10941   llvm::FunctionCallee FiniRTLFn =
10942       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10943   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10944                                              llvm::makeArrayRef(FiniArgs));
10945 }
10946 
10947 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10948                                           const OMPDependClause *C) {
10949   QualType Int64Ty =
10950       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10951   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10952   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10953       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10954   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10955   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10956     const Expr *CounterVal = C->getLoopData(I);
10957     assert(CounterVal);
10958     llvm::Value *CntVal = CGF.EmitScalarConversion(
10959         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10960         CounterVal->getExprLoc());
10961     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10962                           /*Volatile=*/false, Int64Ty);
10963   }
10964   llvm::Value *Args[] = {
10965       emitUpdateLocation(CGF, C->getBeginLoc()),
10966       getThreadID(CGF, C->getBeginLoc()),
10967       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10968   llvm::FunctionCallee RTLFn;
10969   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10970     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10971   } else {
10972     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10973     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10974   }
10975   CGF.EmitRuntimeCall(RTLFn, Args);
10976 }
10977 
10978 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10979                                llvm::FunctionCallee Callee,
10980                                ArrayRef<llvm::Value *> Args) const {
10981   assert(Loc.isValid() && "Outlined function call location must be valid.");
10982   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10983 
10984   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10985     if (Fn->doesNotThrow()) {
10986       CGF.EmitNounwindRuntimeCall(Fn, Args);
10987       return;
10988     }
10989   }
10990   CGF.EmitRuntimeCall(Callee, Args);
10991 }
10992 
10993 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10994     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10995     ArrayRef<llvm::Value *> Args) const {
10996   emitCall(CGF, Loc, OutlinedFn, Args);
10997 }
10998 
10999 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11000   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11001     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11002       HasEmittedDeclareTargetRegion = true;
11003 }
11004 
11005 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11006                                              const VarDecl *NativeParam,
11007                                              const VarDecl *TargetParam) const {
11008   return CGF.GetAddrOfLocalVar(NativeParam);
11009 }
11010 
11011 namespace {
11012 /// Cleanup action for allocate support.
11013 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11014 public:
11015   static const int CleanupArgs = 3;
11016 
11017 private:
11018   llvm::FunctionCallee RTLFn;
11019   llvm::Value *Args[CleanupArgs];
11020 
11021 public:
11022   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11023                        ArrayRef<llvm::Value *> CallArgs)
11024       : RTLFn(RTLFn) {
11025     assert(CallArgs.size() == CleanupArgs &&
11026            "Size of arguments does not match.");
11027     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11028   }
11029   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11030     if (!CGF.HaveInsertPoint())
11031       return;
11032     CGF.EmitRuntimeCall(RTLFn, Args);
11033   }
11034 };
11035 } // namespace
11036 
11037 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11038                                                    const VarDecl *VD) {
11039   if (!VD)
11040     return Address::invalid();
11041   const VarDecl *CVD = VD->getCanonicalDecl();
11042   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11043     return Address::invalid();
11044   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11045   // Use the default allocation.
11046   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
11047       !AA->getAllocator())
11048     return Address::invalid();
11049   llvm::Value *Size;
11050   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11051   if (CVD->getType()->isVariablyModifiedType()) {
11052     Size = CGF.getTypeSize(CVD->getType());
11053     // Align the size: ((size + align - 1) / align) * align
11054     Size = CGF.Builder.CreateNUWAdd(
11055         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11056     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11057     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11058   } else {
11059     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11060     Size = CGM.getSize(Sz.alignTo(Align));
11061   }
11062   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11063   assert(AA->getAllocator() &&
11064          "Expected allocator expression for non-default allocator.");
11065   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11066   // According to the standard, the original allocator type is a enum (integer).
11067   // Convert to pointer type, if required.
11068   if (Allocator->getType()->isIntegerTy())
11069     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11070   else if (Allocator->getType()->isPointerTy())
11071     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11072                                                                 CGM.VoidPtrTy);
11073   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11074 
11075   llvm::Value *Addr =
11076       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11077                           CVD->getName() + ".void.addr");
11078   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11079                                                               Allocator};
11080   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11081 
11082   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11083                                                 llvm::makeArrayRef(FiniArgs));
11084   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11085       Addr,
11086       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11087       CVD->getName() + ".addr");
11088   return Address(Addr, Align);
11089 }
11090 
11091 namespace {
11092 using OMPContextSelectorData =
11093     OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
11094 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
11095 } // anonymous namespace
11096 
11097 /// Checks current context and returns true if it matches the context selector.
11098 template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
11099           typename... Arguments>
11100 static bool checkContext(const OMPContextSelectorData &Data,
11101                          Arguments... Params) {
11102   assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
11103          "Unknown context selector or context selector set.");
11104   return false;
11105 }
11106 
11107 /// Checks for implementation={vendor(<vendor>)} context selector.
11108 /// \returns true iff <vendor>="llvm", false otherwise.
11109 template <>
11110 bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
11111     const OMPContextSelectorData &Data) {
11112   return llvm::all_of(Data.Names,
11113                       [](StringRef S) { return !S.compare_lower("llvm"); });
11114 }
11115 
11116 /// Checks for device={kind(<kind>)} context selector.
11117 /// \returns true if <kind>="host" and compilation is for host.
11118 /// true if <kind>="nohost" and compilation is for device.
11119 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
11120 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
11121 /// false otherwise.
11122 template <>
11123 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
11124     const OMPContextSelectorData &Data, CodeGenModule &CGM) {
11125   for (StringRef Name : Data.Names) {
11126     if (!Name.compare_lower("host")) {
11127       if (CGM.getLangOpts().OpenMPIsDevice)
11128         return false;
11129       continue;
11130     }
11131     if (!Name.compare_lower("nohost")) {
11132       if (!CGM.getLangOpts().OpenMPIsDevice)
11133         return false;
11134       continue;
11135     }
11136     switch (CGM.getTriple().getArch()) {
11137     case llvm::Triple::arm:
11138     case llvm::Triple::armeb:
11139     case llvm::Triple::aarch64:
11140     case llvm::Triple::aarch64_be:
11141     case llvm::Triple::aarch64_32:
11142     case llvm::Triple::ppc:
11143     case llvm::Triple::ppc64:
11144     case llvm::Triple::ppc64le:
11145     case llvm::Triple::x86:
11146     case llvm::Triple::x86_64:
11147       if (Name.compare_lower("cpu"))
11148         return false;
11149       break;
11150     case llvm::Triple::amdgcn:
11151     case llvm::Triple::nvptx:
11152     case llvm::Triple::nvptx64:
11153       if (Name.compare_lower("gpu"))
11154         return false;
11155       break;
11156     case llvm::Triple::UnknownArch:
11157     case llvm::Triple::arc:
11158     case llvm::Triple::avr:
11159     case llvm::Triple::bpfel:
11160     case llvm::Triple::bpfeb:
11161     case llvm::Triple::hexagon:
11162     case llvm::Triple::mips:
11163     case llvm::Triple::mipsel:
11164     case llvm::Triple::mips64:
11165     case llvm::Triple::mips64el:
11166     case llvm::Triple::msp430:
11167     case llvm::Triple::r600:
11168     case llvm::Triple::riscv32:
11169     case llvm::Triple::riscv64:
11170     case llvm::Triple::sparc:
11171     case llvm::Triple::sparcv9:
11172     case llvm::Triple::sparcel:
11173     case llvm::Triple::systemz:
11174     case llvm::Triple::tce:
11175     case llvm::Triple::tcele:
11176     case llvm::Triple::thumb:
11177     case llvm::Triple::thumbeb:
11178     case llvm::Triple::xcore:
11179     case llvm::Triple::le32:
11180     case llvm::Triple::le64:
11181     case llvm::Triple::amdil:
11182     case llvm::Triple::amdil64:
11183     case llvm::Triple::hsail:
11184     case llvm::Triple::hsail64:
11185     case llvm::Triple::spir:
11186     case llvm::Triple::spir64:
11187     case llvm::Triple::kalimba:
11188     case llvm::Triple::shave:
11189     case llvm::Triple::lanai:
11190     case llvm::Triple::wasm32:
11191     case llvm::Triple::wasm64:
11192     case llvm::Triple::renderscript32:
11193     case llvm::Triple::renderscript64:
11194       return false;
11195     }
11196   }
11197   return true;
11198 }
11199 
11200 bool matchesContext(CodeGenModule &CGM,
11201                     const CompleteOMPContextSelectorData &ContextData) {
11202   for (const OMPContextSelectorData &Data : ContextData) {
11203     switch (Data.Ctx) {
11204     case OMP_CTX_vendor:
11205       assert(Data.CtxSet == OMP_CTX_SET_implementation &&
11206              "Expected implementation context selector set.");
11207       if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
11208         return false;
11209       break;
11210     case OMP_CTX_kind:
11211       assert(Data.CtxSet == OMP_CTX_SET_device &&
11212              "Expected device context selector set.");
11213       if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
11214                                                                            CGM))
11215         return false;
11216       break;
11217     case OMP_CTX_unknown:
11218       llvm_unreachable("Unknown context selector kind.");
11219     }
11220   }
11221   return true;
11222 }
11223 
11224 static CompleteOMPContextSelectorData
11225 translateAttrToContextSelectorData(ASTContext &C,
11226                                    const OMPDeclareVariantAttr *A) {
11227   CompleteOMPContextSelectorData Data;
11228   for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
11229     Data.emplace_back();
11230     auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
11231         *std::next(A->ctxSelectorSets_begin(), I));
11232     auto Ctx = static_cast<OpenMPContextSelectorKind>(
11233         *std::next(A->ctxSelectors_begin(), I));
11234     Data.back().CtxSet = CtxSet;
11235     Data.back().Ctx = Ctx;
11236     const Expr *Score = *std::next(A->scores_begin(), I);
11237     Data.back().Score = Score->EvaluateKnownConstInt(C);
11238     switch (Ctx) {
11239     case OMP_CTX_vendor:
11240       assert(CtxSet == OMP_CTX_SET_implementation &&
11241              "Expected implementation context selector set.");
11242       Data.back().Names =
11243           llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
11244       break;
11245     case OMP_CTX_kind:
11246       assert(CtxSet == OMP_CTX_SET_device &&
11247              "Expected device context selector set.");
11248       Data.back().Names =
11249           llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
11250       break;
11251     case OMP_CTX_unknown:
11252       llvm_unreachable("Unknown context selector kind.");
11253     }
11254   }
11255   return Data;
11256 }
11257 
11258 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
11259                            const CompleteOMPContextSelectorData &RHS) {
11260   llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
11261   for (const OMPContextSelectorData &D : RHS) {
11262     auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
11263     Pair.getSecond().insert(D.Names.begin(), D.Names.end());
11264   }
11265   bool AllSetsAreEqual = true;
11266   for (const OMPContextSelectorData &D : LHS) {
11267     auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
11268     if (It == RHSData.end())
11269       return false;
11270     if (D.Names.size() > It->getSecond().size())
11271       return false;
11272     if (llvm::set_union(It->getSecond(), D.Names))
11273       return false;
11274     AllSetsAreEqual =
11275         AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
11276   }
11277 
11278   return LHS.size() != RHS.size() || !AllSetsAreEqual;
11279 }
11280 
11281 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
11282                             const CompleteOMPContextSelectorData &RHS) {
11283   // Score is calculated as sum of all scores + 1.
11284   llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11285   bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
11286   if (RHSIsSubsetOfLHS) {
11287     LHSScore = llvm::APSInt::get(0);
11288   } else {
11289     for (const OMPContextSelectorData &Data : LHS) {
11290       if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
11291         LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11292       } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
11293         LHSScore += Data.Score.extend(LHSScore.getBitWidth());
11294       } else {
11295         LHSScore += Data.Score;
11296       }
11297     }
11298   }
11299   llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11300   if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
11301     RHSScore = llvm::APSInt::get(0);
11302   } else {
11303     for (const OMPContextSelectorData &Data : RHS) {
11304       if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
11305         RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11306       } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
11307         RHSScore += Data.Score.extend(RHSScore.getBitWidth());
11308       } else {
11309         RHSScore += Data.Score;
11310       }
11311     }
11312   }
11313   return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
11314 }
11315 
11316 /// Finds the variant function that matches current context with its context
11317 /// selector.
11318 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11319                                                      const FunctionDecl *FD) {
11320   if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11321     return FD;
11322   // Iterate through all DeclareVariant attributes and check context selectors.
11323   const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11324   CompleteOMPContextSelectorData TopMostData;
11325   for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11326     CompleteOMPContextSelectorData Data =
11327         translateAttrToContextSelectorData(CGM.getContext(), A);
11328     if (!matchesContext(CGM, Data))
11329       continue;
11330     // If the attribute matches the context, find the attribute with the highest
11331     // score.
11332     if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
11333       TopMostAttr = A;
11334       TopMostData.swap(Data);
11335     }
11336   }
11337   if (!TopMostAttr)
11338     return FD;
11339   return cast<FunctionDecl>(
11340       cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11341           ->getDecl());
11342 }
11343 
11344 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11345   const auto *D = cast<FunctionDecl>(GD.getDecl());
11346   // If the original function is defined already, use its definition.
11347   StringRef MangledName = CGM.getMangledName(GD);
11348   llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11349   if (Orig && !Orig->isDeclaration())
11350     return false;
11351   const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11352   // Emit original function if it does not have declare variant attribute or the
11353   // context does not match.
11354   if (NewFD == D)
11355     return false;
11356   GlobalDecl NewGD = GD.getWithDecl(NewFD);
11357   if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11358     DeferredVariantFunction.erase(D);
11359     return true;
11360   }
11361   DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11362   return true;
11363 }
11364 
11365 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11366     CodeGenModule &CGM, const OMPLoopDirective &S)
11367     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11368   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11369   if (!NeedToPush)
11370     return;
11371   NontemporalDeclsSet &DS =
11372       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11373   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11374     for (const Stmt *Ref : C->private_refs()) {
11375       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11376       const ValueDecl *VD;
11377       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11378         VD = DRE->getDecl();
11379       } else {
11380         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11381         assert((ME->isImplicitCXXThis() ||
11382                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11383                "Expected member of current class.");
11384         VD = ME->getMemberDecl();
11385       }
11386       DS.insert(VD);
11387     }
11388   }
11389 }
11390 
11391 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11392   if (!NeedToPush)
11393     return;
11394   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11395 }
11396 
11397 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11398   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11399 
11400   return llvm::any_of(
11401       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11402       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11403 }
11404 
11405 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11406     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11407     : CGM(CGF.CGM),
11408       NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11409                               [](const OMPLastprivateClause *C) {
11410                                 return C->getKind() ==
11411                                        OMPC_LASTPRIVATE_conditional;
11412                               })) {
11413   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11414   if (!NeedToPush)
11415     return;
11416   LastprivateConditionalData &Data =
11417       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11418   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11419     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11420       continue;
11421 
11422     for (const Expr *Ref : C->varlists()) {
11423       Data.DeclToUniqeName.try_emplace(
11424           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11425           generateUniqueName(CGM, "pl_cond", Ref));
11426     }
11427   }
11428   Data.IVLVal = IVLVal;
11429   // In simd only mode or for simd directives no need to generate threadprivate
11430   // references for the loop iteration counter, we can use the original one
11431   // since outlining cannot happen in simd regions.
11432   if (CGF.getLangOpts().OpenMPSimd ||
11433       isOpenMPSimdDirective(S.getDirectiveKind())) {
11434     Data.UseOriginalIV = true;
11435     return;
11436   }
11437   llvm::SmallString<16> Buffer;
11438   llvm::raw_svector_ostream OS(Buffer);
11439   PresumedLoc PLoc =
11440       CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
11441   assert(PLoc.isValid() && "Source location is expected to be always valid.");
11442 
11443   llvm::sys::fs::UniqueID ID;
11444   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
11445     CGM.getDiags().Report(diag::err_cannot_open_file)
11446         << PLoc.getFilename() << EC.message();
11447   OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
11448      << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
11449   Data.IVName = OS.str();
11450 }
11451 
11452 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11453   if (!NeedToPush)
11454     return;
11455   CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11456 }
11457 
11458 void CGOpenMPRuntime::initLastprivateConditionalCounter(
11459     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11460   if (CGM.getLangOpts().OpenMPSimd ||
11461       !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11462                     [](const OMPLastprivateClause *C) {
11463                       return C->getKind() == OMPC_LASTPRIVATE_conditional;
11464                     }))
11465     return;
11466   const CGOpenMPRuntime::LastprivateConditionalData &Data =
11467       LastprivateConditionalStack.back();
11468   if (Data.UseOriginalIV)
11469     return;
11470   // Global loop counter. Required to handle inner parallel-for regions.
11471   // global_iv = iv;
11472   Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
11473       CGF, Data.IVLVal.getType(), Data.IVName);
11474   LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
11475   llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
11476   CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
11477 }
11478 
11479 namespace {
11480 /// Checks if the lastprivate conditional variable is referenced in LHS.
11481 class LastprivateConditionalRefChecker final
11482     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11483   CodeGenFunction &CGF;
11484   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11485   const Expr *FoundE = nullptr;
11486   const Decl *FoundD = nullptr;
11487   StringRef UniqueDeclName;
11488   LValue IVLVal;
11489   StringRef IVName;
11490   SourceLocation Loc;
11491   bool UseOriginalIV = false;
11492 
11493 public:
11494   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11495     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11496          llvm::reverse(LPM)) {
11497       auto It = D.DeclToUniqeName.find(E->getDecl());
11498       if (It == D.DeclToUniqeName.end())
11499         continue;
11500       FoundE = E;
11501       FoundD = E->getDecl()->getCanonicalDecl();
11502       UniqueDeclName = It->getSecond();
11503       IVLVal = D.IVLVal;
11504       IVName = D.IVName;
11505       UseOriginalIV = D.UseOriginalIV;
11506       break;
11507     }
11508     return FoundE == E;
11509   }
11510   bool VisitMemberExpr(const MemberExpr *E) {
11511     if (!CGF.IsWrappedCXXThis(E->getBase()))
11512       return false;
11513     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11514          llvm::reverse(LPM)) {
11515       auto It = D.DeclToUniqeName.find(E->getMemberDecl());
11516       if (It == D.DeclToUniqeName.end())
11517         continue;
11518       FoundE = E;
11519       FoundD = E->getMemberDecl()->getCanonicalDecl();
11520       UniqueDeclName = It->getSecond();
11521       IVLVal = D.IVLVal;
11522       IVName = D.IVName;
11523       UseOriginalIV = D.UseOriginalIV;
11524       break;
11525     }
11526     return FoundE == E;
11527   }
11528   bool VisitStmt(const Stmt *S) {
11529     for (const Stmt *Child : S->children()) {
11530       if (!Child)
11531         continue;
11532       if (const auto *E = dyn_cast<Expr>(Child))
11533         if (!E->isGLValue())
11534           continue;
11535       if (Visit(Child))
11536         return true;
11537     }
11538     return false;
11539   }
11540   explicit LastprivateConditionalRefChecker(
11541       CodeGenFunction &CGF,
11542       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11543       : CGF(CGF), LPM(LPM) {}
11544   std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
11545   getFoundData() const {
11546     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
11547                            UseOriginalIV);
11548   }
11549 };
11550 } // namespace
11551 
11552 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11553                                                          const Expr *LHS) {
11554   if (CGF.getLangOpts().OpenMP < 50)
11555     return;
11556   LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
11557   if (!Checker.Visit(LHS))
11558     return;
11559   const Expr *FoundE;
11560   const Decl *FoundD;
11561   StringRef UniqueDeclName;
11562   LValue IVLVal;
11563   StringRef IVName;
11564   bool UseOriginalIV;
11565   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
11566       Checker.getFoundData();
11567 
11568   // Last updated loop counter for the lastprivate conditional var.
11569   // int<xx> last_iv = 0;
11570   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11571   llvm::Constant *LastIV =
11572       getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
11573   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11574       IVLVal.getAlignment().getAsAlign());
11575   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11576 
11577   // Private address of the lastprivate conditional in the current context.
11578   // priv_a
11579   LValue LVal = CGF.EmitLValue(FoundE);
11580   // Last value of the lastprivate conditional.
11581   // decltype(priv_a) last_a;
11582   llvm::Constant *Last = getOrCreateInternalVariable(
11583       LVal.getAddress(CGF).getElementType(), UniqueDeclName);
11584   cast<llvm::GlobalVariable>(Last)->setAlignment(
11585       LVal.getAlignment().getAsAlign());
11586   LValue LastLVal =
11587       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11588 
11589   // Global loop counter. Required to handle inner parallel-for regions.
11590   // global_iv
11591   if (!UseOriginalIV) {
11592     Address IVAddr =
11593         getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
11594     IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
11595   }
11596   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
11597 
11598   // #pragma omp critical(a)
11599   // if (last_iv <= iv) {
11600   //   last_iv = iv;
11601   //   last_a = priv_a;
11602   // }
11603   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11604                     FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
11605     Action.Enter(CGF);
11606     llvm::Value *LastIVVal =
11607         CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
11608     // (last_iv <= global_iv) ? Check if the variable is updated and store new
11609     // value in global var.
11610     llvm::Value *CmpRes;
11611     if (IVLVal.getType()->isSignedIntegerType()) {
11612       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11613     } else {
11614       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11615              "Loop iteration variable must be integer.");
11616       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11617     }
11618     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11619     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11620     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11621     // {
11622     CGF.EmitBlock(ThenBB);
11623 
11624     //   last_iv = global_iv;
11625     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11626 
11627     //   last_a = priv_a;
11628     switch (CGF.getEvaluationKind(LVal.getType())) {
11629     case TEK_Scalar: {
11630       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
11631       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11632       break;
11633     }
11634     case TEK_Complex: {
11635       CodeGenFunction::ComplexPairTy PrivVal =
11636           CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
11637       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11638       break;
11639     }
11640     case TEK_Aggregate:
11641       llvm_unreachable(
11642           "Aggregates are not supported in lastprivate conditional.");
11643     }
11644     // }
11645     CGF.EmitBranch(ExitBB);
11646     // There is no need to emit line number for unconditional branch.
11647     (void)ApplyDebugLocation::CreateEmpty(CGF);
11648     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11649   };
11650 
11651   if (CGM.getLangOpts().OpenMPSimd) {
11652     // Do not emit as a critical region as no parallel region could be emitted.
11653     RegionCodeGenTy ThenRCG(CodeGen);
11654     ThenRCG(CGF);
11655   } else {
11656     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
11657   }
11658 }
11659 
11660 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11661     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11662     SourceLocation Loc) {
11663   if (CGF.getLangOpts().OpenMP < 50)
11664     return;
11665   auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
11666   assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
11667          "Unknown lastprivate conditional variable.");
11668   StringRef UniqueName = It->getSecond();
11669   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11670   // The variable was not updated in the region - exit.
11671   if (!GV)
11672     return;
11673   LValue LPLVal = CGF.MakeAddrLValue(
11674       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11675   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11676   CGF.EmitStoreOfScalar(Res, PrivLVal);
11677 }
11678 
11679 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11680     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11681     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11682   llvm_unreachable("Not supported in SIMD-only mode");
11683 }
11684 
11685 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11686     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11687     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11688   llvm_unreachable("Not supported in SIMD-only mode");
11689 }
11690 
11691 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11692     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11693     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11694     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11695     bool Tied, unsigned &NumberOfParts) {
11696   llvm_unreachable("Not supported in SIMD-only mode");
11697 }
11698 
11699 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11700                                            SourceLocation Loc,
11701                                            llvm::Function *OutlinedFn,
11702                                            ArrayRef<llvm::Value *> CapturedVars,
11703                                            const Expr *IfCond) {
11704   llvm_unreachable("Not supported in SIMD-only mode");
11705 }
11706 
11707 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11708     CodeGenFunction &CGF, StringRef CriticalName,
11709     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11710     const Expr *Hint) {
11711   llvm_unreachable("Not supported in SIMD-only mode");
11712 }
11713 
11714 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11715                                            const RegionCodeGenTy &MasterOpGen,
11716                                            SourceLocation Loc) {
11717   llvm_unreachable("Not supported in SIMD-only mode");
11718 }
11719 
11720 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11721                                             SourceLocation Loc) {
11722   llvm_unreachable("Not supported in SIMD-only mode");
11723 }
11724 
11725 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11726     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11727     SourceLocation Loc) {
11728   llvm_unreachable("Not supported in SIMD-only mode");
11729 }
11730 
11731 void CGOpenMPSIMDRuntime::emitSingleRegion(
11732     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11733     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11734     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11735     ArrayRef<const Expr *> AssignmentOps) {
11736   llvm_unreachable("Not supported in SIMD-only mode");
11737 }
11738 
11739 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11740                                             const RegionCodeGenTy &OrderedOpGen,
11741                                             SourceLocation Loc,
11742                                             bool IsThreads) {
11743   llvm_unreachable("Not supported in SIMD-only mode");
11744 }
11745 
11746 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11747                                           SourceLocation Loc,
11748                                           OpenMPDirectiveKind Kind,
11749                                           bool EmitChecks,
11750                                           bool ForceSimpleCall) {
11751   llvm_unreachable("Not supported in SIMD-only mode");
11752 }
11753 
11754 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11755     CodeGenFunction &CGF, SourceLocation Loc,
11756     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11757     bool Ordered, const DispatchRTInput &DispatchValues) {
11758   llvm_unreachable("Not supported in SIMD-only mode");
11759 }
11760 
11761 void CGOpenMPSIMDRuntime::emitForStaticInit(
11762     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11763     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11764   llvm_unreachable("Not supported in SIMD-only mode");
11765 }
11766 
11767 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11768     CodeGenFunction &CGF, SourceLocation Loc,
11769     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11770   llvm_unreachable("Not supported in SIMD-only mode");
11771 }
11772 
11773 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11774                                                      SourceLocation Loc,
11775                                                      unsigned IVSize,
11776                                                      bool IVSigned) {
11777   llvm_unreachable("Not supported in SIMD-only mode");
11778 }
11779 
11780 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11781                                               SourceLocation Loc,
11782                                               OpenMPDirectiveKind DKind) {
11783   llvm_unreachable("Not supported in SIMD-only mode");
11784 }
11785 
11786 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11787                                               SourceLocation Loc,
11788                                               unsigned IVSize, bool IVSigned,
11789                                               Address IL, Address LB,
11790                                               Address UB, Address ST) {
11791   llvm_unreachable("Not supported in SIMD-only mode");
11792 }
11793 
11794 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11795                                                llvm::Value *NumThreads,
11796                                                SourceLocation Loc) {
11797   llvm_unreachable("Not supported in SIMD-only mode");
11798 }
11799 
11800 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11801                                              ProcBindKind ProcBind,
11802                                              SourceLocation Loc) {
11803   llvm_unreachable("Not supported in SIMD-only mode");
11804 }
11805 
11806 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11807                                                     const VarDecl *VD,
11808                                                     Address VDAddr,
11809                                                     SourceLocation Loc) {
11810   llvm_unreachable("Not supported in SIMD-only mode");
11811 }
11812 
11813 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11814     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11815     CodeGenFunction *CGF) {
11816   llvm_unreachable("Not supported in SIMD-only mode");
11817 }
11818 
11819 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11820     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11821   llvm_unreachable("Not supported in SIMD-only mode");
11822 }
11823 
11824 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11825                                     ArrayRef<const Expr *> Vars,
11826                                     SourceLocation Loc) {
11827   llvm_unreachable("Not supported in SIMD-only mode");
11828 }
11829 
11830 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11831                                        const OMPExecutableDirective &D,
11832                                        llvm::Function *TaskFunction,
11833                                        QualType SharedsTy, Address Shareds,
11834                                        const Expr *IfCond,
11835                                        const OMPTaskDataTy &Data) {
11836   llvm_unreachable("Not supported in SIMD-only mode");
11837 }
11838 
11839 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11840     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11841     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11842     const Expr *IfCond, const OMPTaskDataTy &Data) {
11843   llvm_unreachable("Not supported in SIMD-only mode");
11844 }
11845 
11846 void CGOpenMPSIMDRuntime::emitReduction(
11847     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11848     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11849     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11850   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11851   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11852                                  ReductionOps, Options);
11853 }
11854 
11855 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11856     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11857     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11858   llvm_unreachable("Not supported in SIMD-only mode");
11859 }
11860 
11861 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11862                                                   SourceLocation Loc,
11863                                                   ReductionCodeGen &RCG,
11864                                                   unsigned N) {
11865   llvm_unreachable("Not supported in SIMD-only mode");
11866 }
11867 
11868 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11869                                                   SourceLocation Loc,
11870                                                   llvm::Value *ReductionsPtr,
11871                                                   LValue SharedLVal) {
11872   llvm_unreachable("Not supported in SIMD-only mode");
11873 }
11874 
11875 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11876                                            SourceLocation Loc) {
11877   llvm_unreachable("Not supported in SIMD-only mode");
11878 }
11879 
11880 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11881     CodeGenFunction &CGF, SourceLocation Loc,
11882     OpenMPDirectiveKind CancelRegion) {
11883   llvm_unreachable("Not supported in SIMD-only mode");
11884 }
11885 
11886 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11887                                          SourceLocation Loc, const Expr *IfCond,
11888                                          OpenMPDirectiveKind CancelRegion) {
11889   llvm_unreachable("Not supported in SIMD-only mode");
11890 }
11891 
11892 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11893     const OMPExecutableDirective &D, StringRef ParentName,
11894     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11895     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11896   llvm_unreachable("Not supported in SIMD-only mode");
11897 }
11898 
11899 void CGOpenMPSIMDRuntime::emitTargetCall(
11900     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11901     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11902     const Expr *Device,
11903     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11904                                      const OMPLoopDirective &D)>
11905         SizeEmitter) {
11906   llvm_unreachable("Not supported in SIMD-only mode");
11907 }
11908 
11909 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11910   llvm_unreachable("Not supported in SIMD-only mode");
11911 }
11912 
11913 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11914   llvm_unreachable("Not supported in SIMD-only mode");
11915 }
11916 
11917 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11918   return false;
11919 }
11920 
11921 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11922                                         const OMPExecutableDirective &D,
11923                                         SourceLocation Loc,
11924                                         llvm::Function *OutlinedFn,
11925                                         ArrayRef<llvm::Value *> CapturedVars) {
11926   llvm_unreachable("Not supported in SIMD-only mode");
11927 }
11928 
11929 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11930                                              const Expr *NumTeams,
11931                                              const Expr *ThreadLimit,
11932                                              SourceLocation Loc) {
11933   llvm_unreachable("Not supported in SIMD-only mode");
11934 }
11935 
11936 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11937     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11938     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11939   llvm_unreachable("Not supported in SIMD-only mode");
11940 }
11941 
11942 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11943     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11944     const Expr *Device) {
11945   llvm_unreachable("Not supported in SIMD-only mode");
11946 }
11947 
11948 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11949                                            const OMPLoopDirective &D,
11950                                            ArrayRef<Expr *> NumIterations) {
11951   llvm_unreachable("Not supported in SIMD-only mode");
11952 }
11953 
11954 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11955                                               const OMPDependClause *C) {
11956   llvm_unreachable("Not supported in SIMD-only mode");
11957 }
11958 
11959 const VarDecl *
11960 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11961                                         const VarDecl *NativeParam) const {
11962   llvm_unreachable("Not supported in SIMD-only mode");
11963 }
11964 
11965 Address
11966 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11967                                          const VarDecl *NativeParam,
11968                                          const VarDecl *TargetParam) const {
11969   llvm_unreachable("Not supported in SIMD-only mode");
11970 }
11971