1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558 
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705 
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
756   OMPRTL__tgt_mapper_num_components,
757   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
758   // *base, void *begin, int64_t size, int64_t type);
759   OMPRTL__tgt_push_mapper_component,
760 };
761 
762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
763 /// region.
764 class CleanupTy final : public EHScopeStack::Cleanup {
765   PrePostActionTy *Action;
766 
767 public:
768   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
769   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
770     if (!CGF.HaveInsertPoint())
771       return;
772     Action->Exit(CGF);
773   }
774 };
775 
776 } // anonymous namespace
777 
778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
779   CodeGenFunction::RunCleanupsScope Scope(CGF);
780   if (PrePostAction) {
781     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
782     Callback(CodeGen, CGF, *PrePostAction);
783   } else {
784     PrePostActionTy Action;
785     Callback(CodeGen, CGF, Action);
786   }
787 }
788 
789 /// Check if the combiner is a call to UDR combiner and if it is so return the
790 /// UDR decl used for reduction.
791 static const OMPDeclareReductionDecl *
792 getReductionInit(const Expr *ReductionOp) {
793   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
794     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
795       if (const auto *DRE =
796               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
797         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
798           return DRD;
799   return nullptr;
800 }
801 
802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
803                                              const OMPDeclareReductionDecl *DRD,
804                                              const Expr *InitOp,
805                                              Address Private, Address Original,
806                                              QualType Ty) {
807   if (DRD->getInitializer()) {
808     std::pair<llvm::Function *, llvm::Function *> Reduction =
809         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
810     const auto *CE = cast<CallExpr>(InitOp);
811     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
812     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
813     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
814     const auto *LHSDRE =
815         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
816     const auto *RHSDRE =
817         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
818     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
819     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
820                             [=]() { return Private; });
821     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
822                             [=]() { return Original; });
823     (void)PrivateScope.Privatize();
824     RValue Func = RValue::get(Reduction.second);
825     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
826     CGF.EmitIgnoredExpr(InitOp);
827   } else {
828     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
829     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
830     auto *GV = new llvm::GlobalVariable(
831         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
832         llvm::GlobalValue::PrivateLinkage, Init, Name);
833     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
834     RValue InitRVal;
835     switch (CGF.getEvaluationKind(Ty)) {
836     case TEK_Scalar:
837       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
838       break;
839     case TEK_Complex:
840       InitRVal =
841           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
842       break;
843     case TEK_Aggregate:
844       InitRVal = RValue::getAggregate(LV.getAddress());
845       break;
846     }
847     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
848     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
849     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
850                          /*IsInitializer=*/false);
851   }
852 }
853 
854 /// Emit initialization of arrays of complex types.
855 /// \param DestAddr Address of the array.
856 /// \param Type Type of array.
857 /// \param Init Initial expression of array.
858 /// \param SrcAddr Address of the original array.
859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
860                                  QualType Type, bool EmitDeclareReductionInit,
861                                  const Expr *Init,
862                                  const OMPDeclareReductionDecl *DRD,
863                                  Address SrcAddr = Address::invalid()) {
864   // Perform element-by-element initialization.
865   QualType ElementTy;
866 
867   // Drill down to the base element type on both arrays.
868   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
869   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
870   DestAddr =
871       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
872   if (DRD)
873     SrcAddr =
874         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
875 
876   llvm::Value *SrcBegin = nullptr;
877   if (DRD)
878     SrcBegin = SrcAddr.getPointer();
879   llvm::Value *DestBegin = DestAddr.getPointer();
880   // Cast from pointer to array type to pointer to single element.
881   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
882   // The basic structure here is a while-do loop.
883   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
884   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
885   llvm::Value *IsEmpty =
886       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
887   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
888 
889   // Enter the loop body, making that address the current address.
890   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
891   CGF.EmitBlock(BodyBB);
892 
893   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
894 
895   llvm::PHINode *SrcElementPHI = nullptr;
896   Address SrcElementCurrent = Address::invalid();
897   if (DRD) {
898     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
899                                           "omp.arraycpy.srcElementPast");
900     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
901     SrcElementCurrent =
902         Address(SrcElementPHI,
903                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
904   }
905   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
906       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
907   DestElementPHI->addIncoming(DestBegin, EntryBB);
908   Address DestElementCurrent =
909       Address(DestElementPHI,
910               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
911 
912   // Emit copy.
913   {
914     CodeGenFunction::RunCleanupsScope InitScope(CGF);
915     if (EmitDeclareReductionInit) {
916       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
917                                        SrcElementCurrent, ElementTy);
918     } else
919       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
920                            /*IsInitializer=*/false);
921   }
922 
923   if (DRD) {
924     // Shift the address forward by one element.
925     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
926         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
927     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
928   }
929 
930   // Shift the address forward by one element.
931   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
932       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
933   // Check whether we've reached the end.
934   llvm::Value *Done =
935       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
936   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
937   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
938 
939   // Done.
940   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
941 }
942 
943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
944   return CGF.EmitOMPSharedLValue(E);
945 }
946 
947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
948                                             const Expr *E) {
949   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
950     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
951   return LValue();
952 }
953 
954 void ReductionCodeGen::emitAggregateInitialization(
955     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
956     const OMPDeclareReductionDecl *DRD) {
957   // Emit VarDecl with copy init for arrays.
958   // Get the address of the original variable captured in current
959   // captured region.
960   const auto *PrivateVD =
961       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
962   bool EmitDeclareReductionInit =
963       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
964   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
965                        EmitDeclareReductionInit,
966                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
967                                                 : PrivateVD->getInit(),
968                        DRD, SharedLVal.getAddress());
969 }
970 
971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
972                                    ArrayRef<const Expr *> Privates,
973                                    ArrayRef<const Expr *> ReductionOps) {
974   ClausesData.reserve(Shareds.size());
975   SharedAddresses.reserve(Shareds.size());
976   Sizes.reserve(Shareds.size());
977   BaseDecls.reserve(Shareds.size());
978   auto IPriv = Privates.begin();
979   auto IRed = ReductionOps.begin();
980   for (const Expr *Ref : Shareds) {
981     ClausesData.emplace_back(Ref, *IPriv, *IRed);
982     std::advance(IPriv, 1);
983     std::advance(IRed, 1);
984   }
985 }
986 
987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
988   assert(SharedAddresses.size() == N &&
989          "Number of generated lvalues must be exactly N.");
990   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
991   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
992   SharedAddresses.emplace_back(First, Second);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
996   const auto *PrivateVD =
997       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
998   QualType PrivateType = PrivateVD->getType();
999   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     Sizes.emplace_back(
1002         CGF.getTypeSize(
1003             SharedAddresses[N].first.getType().getNonReferenceType()),
1004         nullptr);
1005     return;
1006   }
1007   llvm::Value *Size;
1008   llvm::Value *SizeInChars;
1009   auto *ElemType =
1010       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1011           ->getElementType();
1012   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1013   if (AsArraySection) {
1014     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1015                                      SharedAddresses[N].first.getPointer());
1016     Size = CGF.Builder.CreateNUWAdd(
1017         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1018     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1019   } else {
1020     SizeInChars = CGF.getTypeSize(
1021         SharedAddresses[N].first.getType().getNonReferenceType());
1022     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1023   }
1024   Sizes.emplace_back(SizeInChars, Size);
1025   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1026       CGF,
1027       cast<OpaqueValueExpr>(
1028           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1029       RValue::get(Size));
1030   CGF.EmitVariablyModifiedType(PrivateType);
1031 }
1032 
1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1034                                          llvm::Value *Size) {
1035   const auto *PrivateVD =
1036       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1037   QualType PrivateType = PrivateVD->getType();
1038   if (!PrivateType->isVariablyModifiedType()) {
1039     assert(!Size && !Sizes[N].second &&
1040            "Size should be nullptr for non-variably modified reduction "
1041            "items.");
1042     return;
1043   }
1044   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1045       CGF,
1046       cast<OpaqueValueExpr>(
1047           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1048       RValue::get(Size));
1049   CGF.EmitVariablyModifiedType(PrivateType);
1050 }
1051 
1052 void ReductionCodeGen::emitInitialization(
1053     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1054     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1055   assert(SharedAddresses.size() > N && "No variable was generated");
1056   const auto *PrivateVD =
1057       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1058   const OMPDeclareReductionDecl *DRD =
1059       getReductionInit(ClausesData[N].ReductionOp);
1060   QualType PrivateType = PrivateVD->getType();
1061   PrivateAddr = CGF.Builder.CreateElementBitCast(
1062       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1063   QualType SharedType = SharedAddresses[N].first.getType();
1064   SharedLVal = CGF.MakeAddrLValue(
1065       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1066                                        CGF.ConvertTypeForMem(SharedType)),
1067       SharedType, SharedAddresses[N].first.getBaseInfo(),
1068       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1069   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1070     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1071   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1072     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1073                                      PrivateAddr, SharedLVal.getAddress(),
1074                                      SharedLVal.getType());
1075   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1076              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1077     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1078                          PrivateVD->getType().getQualifiers(),
1079                          /*IsInitializer=*/false);
1080   }
1081 }
1082 
1083 bool ReductionCodeGen::needCleanups(unsigned N) {
1084   const auto *PrivateVD =
1085       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1086   QualType PrivateType = PrivateVD->getType();
1087   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1088   return DTorKind != QualType::DK_none;
1089 }
1090 
1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1092                                     Address PrivateAddr) {
1093   const auto *PrivateVD =
1094       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1095   QualType PrivateType = PrivateVD->getType();
1096   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1097   if (needCleanups(N)) {
1098     PrivateAddr = CGF.Builder.CreateElementBitCast(
1099         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1100     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1101   }
1102 }
1103 
1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1105                           LValue BaseLV) {
1106   BaseTy = BaseTy.getNonReferenceType();
1107   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1108          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1109     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1110       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1111     } else {
1112       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1113       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1114     }
1115     BaseTy = BaseTy->getPointeeType();
1116   }
1117   return CGF.MakeAddrLValue(
1118       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1119                                        CGF.ConvertTypeForMem(ElTy)),
1120       BaseLV.getType(), BaseLV.getBaseInfo(),
1121       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1122 }
1123 
1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1125                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1126                           llvm::Value *Addr) {
1127   Address Tmp = Address::invalid();
1128   Address TopTmp = Address::invalid();
1129   Address MostTopTmp = Address::invalid();
1130   BaseTy = BaseTy.getNonReferenceType();
1131   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1132          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1133     Tmp = CGF.CreateMemTemp(BaseTy);
1134     if (TopTmp.isValid())
1135       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1136     else
1137       MostTopTmp = Tmp;
1138     TopTmp = Tmp;
1139     BaseTy = BaseTy->getPointeeType();
1140   }
1141   llvm::Type *Ty = BaseLVType;
1142   if (Tmp.isValid())
1143     Ty = Tmp.getElementType();
1144   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1145   if (Tmp.isValid()) {
1146     CGF.Builder.CreateStore(Addr, Tmp);
1147     return MostTopTmp;
1148   }
1149   return Address(Addr, BaseLVAlignment);
1150 }
1151 
1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1153   const VarDecl *OrigVD = nullptr;
1154   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1155     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1156     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1157       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1158     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1159       Base = TempASE->getBase()->IgnoreParenImpCasts();
1160     DE = cast<DeclRefExpr>(Base);
1161     OrigVD = cast<VarDecl>(DE->getDecl());
1162   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1163     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1164     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1165       Base = TempASE->getBase()->IgnoreParenImpCasts();
1166     DE = cast<DeclRefExpr>(Base);
1167     OrigVD = cast<VarDecl>(DE->getDecl());
1168   }
1169   return OrigVD;
1170 }
1171 
1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1173                                                Address PrivateAddr) {
1174   const DeclRefExpr *DE;
1175   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1176     BaseDecls.emplace_back(OrigVD);
1177     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1178     LValue BaseLValue =
1179         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1180                     OriginalBaseLValue);
1181     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1182         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1183     llvm::Value *PrivatePointer =
1184         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1185             PrivateAddr.getPointer(),
1186             SharedAddresses[N].first.getAddress().getType());
1187     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1188     return castToBase(CGF, OrigVD->getType(),
1189                       SharedAddresses[N].first.getType(),
1190                       OriginalBaseLValue.getAddress().getType(),
1191                       OriginalBaseLValue.getAlignment(), Ptr);
1192   }
1193   BaseDecls.emplace_back(
1194       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1195   return PrivateAddr;
1196 }
1197 
1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1199   const OMPDeclareReductionDecl *DRD =
1200       getReductionInit(ClausesData[N].ReductionOp);
1201   return DRD && DRD->getInitializer();
1202 }
1203 
1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1205   return CGF.EmitLoadOfPointerLValue(
1206       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1207       getThreadIDVariable()->getType()->castAs<PointerType>());
1208 }
1209 
1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1211   if (!CGF.HaveInsertPoint())
1212     return;
1213   // 1.2.2 OpenMP Language Terminology
1214   // Structured block - An executable statement with a single entry at the
1215   // top and a single exit at the bottom.
1216   // The point of exit cannot be a branch out of the structured block.
1217   // longjmp() and throw() must not violate the entry/exit criteria.
1218   CGF.EHStack.pushTerminate();
1219   CodeGen(CGF);
1220   CGF.EHStack.popTerminate();
1221 }
1222 
1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1224     CodeGenFunction &CGF) {
1225   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1226                             getThreadIDVariable()->getType(),
1227                             AlignmentSource::Decl);
1228 }
1229 
1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1231                                        QualType FieldTy) {
1232   auto *Field = FieldDecl::Create(
1233       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1234       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1235       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1236   Field->setAccess(AS_public);
1237   DC->addDecl(Field);
1238   return Field;
1239 }
1240 
1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1242                                  StringRef Separator)
1243     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1244       OffloadEntriesInfoManager(CGM) {
1245   ASTContext &C = CGM.getContext();
1246   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1247   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1248   RD->startDefinition();
1249   // reserved_1
1250   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1251   // flags
1252   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253   // reserved_2
1254   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255   // reserved_3
1256   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257   // psource
1258   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1259   RD->completeDefinition();
1260   IdentQTy = C.getRecordType(RD);
1261   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1262   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1263 
1264   loadOffloadInfoMetadata();
1265 }
1266 
1267 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1268                                             const GlobalDecl &OldGD,
1269                                             llvm::GlobalValue *OrigAddr,
1270                                             bool IsForDefinition) {
1271   // Emit at least a definition for the aliasee if the the address of the
1272   // original function is requested.
1273   if (IsForDefinition || OrigAddr)
1274     (void)CGM.GetAddrOfGlobal(NewGD);
1275   StringRef NewMangledName = CGM.getMangledName(NewGD);
1276   llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1277   if (Addr && !Addr->isDeclaration()) {
1278     const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1279     const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD);
1280     llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1281 
1282     // Create a reference to the named value.  This ensures that it is emitted
1283     // if a deferred decl.
1284     llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1285 
1286     // Create the new alias itself, but don't set a name yet.
1287     auto *GA =
1288         llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1289 
1290     if (OrigAddr) {
1291       assert(OrigAddr->isDeclaration() && "Expected declaration");
1292 
1293       GA->takeName(OrigAddr);
1294       OrigAddr->replaceAllUsesWith(
1295           llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1296       OrigAddr->eraseFromParent();
1297     } else {
1298       GA->setName(CGM.getMangledName(OldGD));
1299     }
1300 
1301     // Set attributes which are particular to an alias; this is a
1302     // specialization of the attributes which may be set on a global function.
1303     if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1304         D->isWeakImported())
1305       GA->setLinkage(llvm::Function::WeakAnyLinkage);
1306 
1307     CGM.SetCommonAttributes(OldGD, GA);
1308     return true;
1309   }
1310   return false;
1311 }
1312 
1313 void CGOpenMPRuntime::clear() {
1314   InternalVars.clear();
1315   // Clean non-target variable declarations possibly used only in debug info.
1316   for (const auto &Data : EmittedNonTargetVariables) {
1317     if (!Data.getValue().pointsToAliveValue())
1318       continue;
1319     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1320     if (!GV)
1321       continue;
1322     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1323       continue;
1324     GV->eraseFromParent();
1325   }
1326   // Emit aliases for the deferred aliasees.
1327   for (const auto &Pair : DeferredVariantFunction) {
1328     StringRef MangledName = CGM.getMangledName(Pair.second.second);
1329     llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1330     // If not able to emit alias, just emit original declaration.
1331     (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1332                                 /*IsForDefinition=*/false);
1333   }
1334 }
1335 
1336 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1337   SmallString<128> Buffer;
1338   llvm::raw_svector_ostream OS(Buffer);
1339   StringRef Sep = FirstSeparator;
1340   for (StringRef Part : Parts) {
1341     OS << Sep << Part;
1342     Sep = Separator;
1343   }
1344   return OS.str();
1345 }
1346 
1347 static llvm::Function *
1348 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1349                           const Expr *CombinerInitializer, const VarDecl *In,
1350                           const VarDecl *Out, bool IsCombiner) {
1351   // void .omp_combiner.(Ty *in, Ty *out);
1352   ASTContext &C = CGM.getContext();
1353   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1354   FunctionArgList Args;
1355   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1356                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1357   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1358                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1359   Args.push_back(&OmpOutParm);
1360   Args.push_back(&OmpInParm);
1361   const CGFunctionInfo &FnInfo =
1362       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1363   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1364   std::string Name = CGM.getOpenMPRuntime().getName(
1365       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1366   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1367                                     Name, &CGM.getModule());
1368   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1369   if (CGM.getLangOpts().Optimize) {
1370     Fn->removeFnAttr(llvm::Attribute::NoInline);
1371     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1372     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1373   }
1374   CodeGenFunction CGF(CGM);
1375   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1376   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1377   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1378                     Out->getLocation());
1379   CodeGenFunction::OMPPrivateScope Scope(CGF);
1380   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1381   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1382     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1383         .getAddress();
1384   });
1385   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1386   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1387     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1388         .getAddress();
1389   });
1390   (void)Scope.Privatize();
1391   if (!IsCombiner && Out->hasInit() &&
1392       !CGF.isTrivialInitializer(Out->getInit())) {
1393     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1394                          Out->getType().getQualifiers(),
1395                          /*IsInitializer=*/true);
1396   }
1397   if (CombinerInitializer)
1398     CGF.EmitIgnoredExpr(CombinerInitializer);
1399   Scope.ForceCleanup();
1400   CGF.FinishFunction();
1401   return Fn;
1402 }
1403 
1404 void CGOpenMPRuntime::emitUserDefinedReduction(
1405     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1406   if (UDRMap.count(D) > 0)
1407     return;
1408   llvm::Function *Combiner = emitCombinerOrInitializer(
1409       CGM, D->getType(), D->getCombiner(),
1410       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1411       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1412       /*IsCombiner=*/true);
1413   llvm::Function *Initializer = nullptr;
1414   if (const Expr *Init = D->getInitializer()) {
1415     Initializer = emitCombinerOrInitializer(
1416         CGM, D->getType(),
1417         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1418                                                                      : nullptr,
1419         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1420         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1421         /*IsCombiner=*/false);
1422   }
1423   UDRMap.try_emplace(D, Combiner, Initializer);
1424   if (CGF) {
1425     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1426     Decls.second.push_back(D);
1427   }
1428 }
1429 
1430 std::pair<llvm::Function *, llvm::Function *>
1431 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1432   auto I = UDRMap.find(D);
1433   if (I != UDRMap.end())
1434     return I->second;
1435   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1436   return UDRMap.lookup(D);
1437 }
1438 
1439 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1440     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1441     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1442     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1443   assert(ThreadIDVar->getType()->isPointerType() &&
1444          "thread id variable must be of type kmp_int32 *");
1445   CodeGenFunction CGF(CGM, true);
1446   bool HasCancel = false;
1447   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1448     HasCancel = OPD->hasCancel();
1449   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1450     HasCancel = OPSD->hasCancel();
1451   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1452     HasCancel = OPFD->hasCancel();
1453   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1454     HasCancel = OPFD->hasCancel();
1455   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1456     HasCancel = OPFD->hasCancel();
1457   else if (const auto *OPFD =
1458                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1459     HasCancel = OPFD->hasCancel();
1460   else if (const auto *OPFD =
1461                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1462     HasCancel = OPFD->hasCancel();
1463   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1464                                     HasCancel, OutlinedHelperName);
1465   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1466   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1467 }
1468 
1469 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1470     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1471     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1472   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1473   return emitParallelOrTeamsOutlinedFunction(
1474       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1475 }
1476 
1477 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1478     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1479     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1480   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1481   return emitParallelOrTeamsOutlinedFunction(
1482       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1483 }
1484 
1485 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1486     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1487     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1488     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1489     bool Tied, unsigned &NumberOfParts) {
1490   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1491                                               PrePostActionTy &) {
1492     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1493     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1494     llvm::Value *TaskArgs[] = {
1495         UpLoc, ThreadID,
1496         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1497                                     TaskTVar->getType()->castAs<PointerType>())
1498             .getPointer()};
1499     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1500   };
1501   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1502                                                             UntiedCodeGen);
1503   CodeGen.setAction(Action);
1504   assert(!ThreadIDVar->getType()->isPointerType() &&
1505          "thread id variable must be of type kmp_int32 for tasks");
1506   const OpenMPDirectiveKind Region =
1507       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1508                                                       : OMPD_task;
1509   const CapturedStmt *CS = D.getCapturedStmt(Region);
1510   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1511   CodeGenFunction CGF(CGM, true);
1512   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1513                                         InnermostKind,
1514                                         TD ? TD->hasCancel() : false, Action);
1515   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1516   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1517   if (!Tied)
1518     NumberOfParts = Action.getNumberOfParts();
1519   return Res;
1520 }
1521 
1522 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1523                              const RecordDecl *RD, const CGRecordLayout &RL,
1524                              ArrayRef<llvm::Constant *> Data) {
1525   llvm::StructType *StructTy = RL.getLLVMType();
1526   unsigned PrevIdx = 0;
1527   ConstantInitBuilder CIBuilder(CGM);
1528   auto DI = Data.begin();
1529   for (const FieldDecl *FD : RD->fields()) {
1530     unsigned Idx = RL.getLLVMFieldNo(FD);
1531     // Fill the alignment.
1532     for (unsigned I = PrevIdx; I < Idx; ++I)
1533       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1534     PrevIdx = Idx + 1;
1535     Fields.add(*DI);
1536     ++DI;
1537   }
1538 }
1539 
1540 template <class... As>
1541 static llvm::GlobalVariable *
1542 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1543                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1544                    As &&... Args) {
1545   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1546   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1547   ConstantInitBuilder CIBuilder(CGM);
1548   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1549   buildStructValue(Fields, CGM, RD, RL, Data);
1550   return Fields.finishAndCreateGlobal(
1551       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1552       std::forward<As>(Args)...);
1553 }
1554 
1555 template <typename T>
1556 static void
1557 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1558                                          ArrayRef<llvm::Constant *> Data,
1559                                          T &Parent) {
1560   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1561   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1562   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1563   buildStructValue(Fields, CGM, RD, RL, Data);
1564   Fields.finishAndAddTo(Parent);
1565 }
1566 
1567 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1568   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1569   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1570   FlagsTy FlagsKey(Flags, Reserved2Flags);
1571   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1572   if (!Entry) {
1573     if (!DefaultOpenMPPSource) {
1574       // Initialize default location for psource field of ident_t structure of
1575       // all ident_t objects. Format is ";file;function;line;column;;".
1576       // Taken from
1577       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1578       DefaultOpenMPPSource =
1579           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1580       DefaultOpenMPPSource =
1581           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1582     }
1583 
1584     llvm::Constant *Data[] = {
1585         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1586         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1587         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1588         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1589     llvm::GlobalValue *DefaultOpenMPLocation =
1590         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1591                            llvm::GlobalValue::PrivateLinkage);
1592     DefaultOpenMPLocation->setUnnamedAddr(
1593         llvm::GlobalValue::UnnamedAddr::Global);
1594 
1595     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1596   }
1597   return Address(Entry, Align);
1598 }
1599 
1600 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1601                                              bool AtCurrentPoint) {
1602   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1603   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1604 
1605   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1606   if (AtCurrentPoint) {
1607     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1608         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1609   } else {
1610     Elem.second.ServiceInsertPt =
1611         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1612     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1613   }
1614 }
1615 
1616 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1617   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1618   if (Elem.second.ServiceInsertPt) {
1619     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1620     Elem.second.ServiceInsertPt = nullptr;
1621     Ptr->eraseFromParent();
1622   }
1623 }
1624 
1625 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1626                                                  SourceLocation Loc,
1627                                                  unsigned Flags) {
1628   Flags |= OMP_IDENT_KMPC;
1629   // If no debug info is generated - return global default location.
1630   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1631       Loc.isInvalid())
1632     return getOrCreateDefaultLocation(Flags).getPointer();
1633 
1634   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1635 
1636   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1637   Address LocValue = Address::invalid();
1638   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1639   if (I != OpenMPLocThreadIDMap.end())
1640     LocValue = Address(I->second.DebugLoc, Align);
1641 
1642   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1643   // GetOpenMPThreadID was called before this routine.
1644   if (!LocValue.isValid()) {
1645     // Generate "ident_t .kmpc_loc.addr;"
1646     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1647     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1648     Elem.second.DebugLoc = AI.getPointer();
1649     LocValue = AI;
1650 
1651     if (!Elem.second.ServiceInsertPt)
1652       setLocThreadIdInsertPt(CGF);
1653     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1654     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1655     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1656                              CGF.getTypeSize(IdentQTy));
1657   }
1658 
1659   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1660   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1661   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1662   LValue PSource =
1663       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1664 
1665   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1666   if (OMPDebugLoc == nullptr) {
1667     SmallString<128> Buffer2;
1668     llvm::raw_svector_ostream OS2(Buffer2);
1669     // Build debug location
1670     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1671     OS2 << ";" << PLoc.getFilename() << ";";
1672     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1673       OS2 << FD->getQualifiedNameAsString();
1674     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1675     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1676     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1677   }
1678   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1679   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1680 
1681   // Our callers always pass this to a runtime function, so for
1682   // convenience, go ahead and return a naked pointer.
1683   return LocValue.getPointer();
1684 }
1685 
1686 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1687                                           SourceLocation Loc) {
1688   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1689 
1690   llvm::Value *ThreadID = nullptr;
1691   // Check whether we've already cached a load of the thread id in this
1692   // function.
1693   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1694   if (I != OpenMPLocThreadIDMap.end()) {
1695     ThreadID = I->second.ThreadID;
1696     if (ThreadID != nullptr)
1697       return ThreadID;
1698   }
1699   // If exceptions are enabled, do not use parameter to avoid possible crash.
1700   if (auto *OMPRegionInfo =
1701           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1702     if (OMPRegionInfo->getThreadIDVariable()) {
1703       // Check if this an outlined function with thread id passed as argument.
1704       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1705       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1706       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1707           !CGF.getLangOpts().CXXExceptions ||
1708           CGF.Builder.GetInsertBlock() == TopBlock ||
1709           !isa<llvm::Instruction>(LVal.getPointer()) ||
1710           cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock ||
1711           cast<llvm::Instruction>(LVal.getPointer())->getParent() ==
1712               CGF.Builder.GetInsertBlock()) {
1713         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1714         // If value loaded in entry block, cache it and use it everywhere in
1715         // function.
1716         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1717           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1718           Elem.second.ThreadID = ThreadID;
1719         }
1720         return ThreadID;
1721       }
1722     }
1723   }
1724 
1725   // This is not an outlined function region - need to call __kmpc_int32
1726   // kmpc_global_thread_num(ident_t *loc).
1727   // Generate thread id value and cache this value for use across the
1728   // function.
1729   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1730   if (!Elem.second.ServiceInsertPt)
1731     setLocThreadIdInsertPt(CGF);
1732   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1733   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1734   llvm::CallInst *Call = CGF.Builder.CreateCall(
1735       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1736       emitUpdateLocation(CGF, Loc));
1737   Call->setCallingConv(CGF.getRuntimeCC());
1738   Elem.second.ThreadID = Call;
1739   return Call;
1740 }
1741 
1742 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1743   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1744   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1745     clearLocThreadIdInsertPt(CGF);
1746     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1747   }
1748   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1749     for(auto *D : FunctionUDRMap[CGF.CurFn])
1750       UDRMap.erase(D);
1751     FunctionUDRMap.erase(CGF.CurFn);
1752   }
1753   auto I = FunctionUDMMap.find(CGF.CurFn);
1754   if (I != FunctionUDMMap.end()) {
1755     for(auto *D : I->second)
1756       UDMMap.erase(D);
1757     FunctionUDMMap.erase(I);
1758   }
1759 }
1760 
1761 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1762   return IdentTy->getPointerTo();
1763 }
1764 
1765 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1766   if (!Kmpc_MicroTy) {
1767     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1768     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1769                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1770     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1771   }
1772   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1773 }
1774 
1775 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1776   llvm::FunctionCallee RTLFn = nullptr;
1777   switch (static_cast<OpenMPRTLFunction>(Function)) {
1778   case OMPRTL__kmpc_fork_call: {
1779     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1780     // microtask, ...);
1781     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1782                                 getKmpc_MicroPointerTy()};
1783     auto *FnTy =
1784         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1785     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1786     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1787       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1788         llvm::LLVMContext &Ctx = F->getContext();
1789         llvm::MDBuilder MDB(Ctx);
1790         // Annotate the callback behavior of the __kmpc_fork_call:
1791         //  - The callback callee is argument number 2 (microtask).
1792         //  - The first two arguments of the callback callee are unknown (-1).
1793         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1794         //    callback callee.
1795         F->addMetadata(
1796             llvm::LLVMContext::MD_callback,
1797             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1798                                         2, {-1, -1},
1799                                         /* VarArgsArePassed */ true)}));
1800       }
1801     }
1802     break;
1803   }
1804   case OMPRTL__kmpc_global_thread_num: {
1805     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1806     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1807     auto *FnTy =
1808         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1809     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1810     break;
1811   }
1812   case OMPRTL__kmpc_threadprivate_cached: {
1813     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1814     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1815     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1816                                 CGM.VoidPtrTy, CGM.SizeTy,
1817                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1818     auto *FnTy =
1819         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1820     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1821     break;
1822   }
1823   case OMPRTL__kmpc_critical: {
1824     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1825     // kmp_critical_name *crit);
1826     llvm::Type *TypeParams[] = {
1827         getIdentTyPointerTy(), CGM.Int32Ty,
1828         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1829     auto *FnTy =
1830         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1831     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1832     break;
1833   }
1834   case OMPRTL__kmpc_critical_with_hint: {
1835     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1836     // kmp_critical_name *crit, uintptr_t hint);
1837     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1838                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1839                                 CGM.IntPtrTy};
1840     auto *FnTy =
1841         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1842     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1843     break;
1844   }
1845   case OMPRTL__kmpc_threadprivate_register: {
1846     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1847     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1848     // typedef void *(*kmpc_ctor)(void *);
1849     auto *KmpcCtorTy =
1850         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1851                                 /*isVarArg*/ false)->getPointerTo();
1852     // typedef void *(*kmpc_cctor)(void *, void *);
1853     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1854     auto *KmpcCopyCtorTy =
1855         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1856                                 /*isVarArg*/ false)
1857             ->getPointerTo();
1858     // typedef void (*kmpc_dtor)(void *);
1859     auto *KmpcDtorTy =
1860         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1861             ->getPointerTo();
1862     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1863                               KmpcCopyCtorTy, KmpcDtorTy};
1864     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1865                                         /*isVarArg*/ false);
1866     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1867     break;
1868   }
1869   case OMPRTL__kmpc_end_critical: {
1870     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1871     // kmp_critical_name *crit);
1872     llvm::Type *TypeParams[] = {
1873         getIdentTyPointerTy(), CGM.Int32Ty,
1874         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1875     auto *FnTy =
1876         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1877     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1878     break;
1879   }
1880   case OMPRTL__kmpc_cancel_barrier: {
1881     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1882     // global_tid);
1883     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1884     auto *FnTy =
1885         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1886     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1887     break;
1888   }
1889   case OMPRTL__kmpc_barrier: {
1890     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1891     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1892     auto *FnTy =
1893         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1894     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1895     break;
1896   }
1897   case OMPRTL__kmpc_for_static_fini: {
1898     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1899     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1900     auto *FnTy =
1901         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1902     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1903     break;
1904   }
1905   case OMPRTL__kmpc_push_num_threads: {
1906     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1907     // kmp_int32 num_threads)
1908     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1909                                 CGM.Int32Ty};
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1913     break;
1914   }
1915   case OMPRTL__kmpc_serialized_parallel: {
1916     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1917     // global_tid);
1918     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1919     auto *FnTy =
1920         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1921     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1922     break;
1923   }
1924   case OMPRTL__kmpc_end_serialized_parallel: {
1925     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1926     // global_tid);
1927     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1928     auto *FnTy =
1929         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1930     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1931     break;
1932   }
1933   case OMPRTL__kmpc_flush: {
1934     // Build void __kmpc_flush(ident_t *loc);
1935     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1936     auto *FnTy =
1937         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1938     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1939     break;
1940   }
1941   case OMPRTL__kmpc_master: {
1942     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1943     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1944     auto *FnTy =
1945         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1946     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1947     break;
1948   }
1949   case OMPRTL__kmpc_end_master: {
1950     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1951     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1952     auto *FnTy =
1953         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1954     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1955     break;
1956   }
1957   case OMPRTL__kmpc_omp_taskyield: {
1958     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1959     // int end_part);
1960     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1961     auto *FnTy =
1962         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1963     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1964     break;
1965   }
1966   case OMPRTL__kmpc_single: {
1967     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1968     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1969     auto *FnTy =
1970         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1971     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1972     break;
1973   }
1974   case OMPRTL__kmpc_end_single: {
1975     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1976     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1977     auto *FnTy =
1978         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1979     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1980     break;
1981   }
1982   case OMPRTL__kmpc_omp_task_alloc: {
1983     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1984     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1985     // kmp_routine_entry_t *task_entry);
1986     assert(KmpRoutineEntryPtrTy != nullptr &&
1987            "Type kmp_routine_entry_t must be created.");
1988     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1989                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1990     // Return void * and then cast to particular kmp_task_t type.
1991     auto *FnTy =
1992         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1993     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1994     break;
1995   }
1996   case OMPRTL__kmpc_omp_target_task_alloc: {
1997     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1998     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1999     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2000     assert(KmpRoutineEntryPtrTy != nullptr &&
2001            "Type kmp_routine_entry_t must be created.");
2002     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2003                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2004                                 CGM.Int64Ty};
2005     // Return void * and then cast to particular kmp_task_t type.
2006     auto *FnTy =
2007         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2008     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2009     break;
2010   }
2011   case OMPRTL__kmpc_omp_task: {
2012     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2013     // *new_task);
2014     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2015                                 CGM.VoidPtrTy};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2018     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2019     break;
2020   }
2021   case OMPRTL__kmpc_copyprivate: {
2022     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2023     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2024     // kmp_int32 didit);
2025     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2026     auto *CpyFnTy =
2027         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2028     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2029                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2030                                 CGM.Int32Ty};
2031     auto *FnTy =
2032         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2033     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2034     break;
2035   }
2036   case OMPRTL__kmpc_reduce: {
2037     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2038     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2039     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2040     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2041     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2042                                                /*isVarArg=*/false);
2043     llvm::Type *TypeParams[] = {
2044         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2045         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2046         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2047     auto *FnTy =
2048         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2050     break;
2051   }
2052   case OMPRTL__kmpc_reduce_nowait: {
2053     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2054     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2055     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2056     // *lck);
2057     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2058     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2059                                                /*isVarArg=*/false);
2060     llvm::Type *TypeParams[] = {
2061         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2062         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2063         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2064     auto *FnTy =
2065         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2066     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2067     break;
2068   }
2069   case OMPRTL__kmpc_end_reduce: {
2070     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2071     // kmp_critical_name *lck);
2072     llvm::Type *TypeParams[] = {
2073         getIdentTyPointerTy(), CGM.Int32Ty,
2074         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2075     auto *FnTy =
2076         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2077     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2078     break;
2079   }
2080   case OMPRTL__kmpc_end_reduce_nowait: {
2081     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2082     // kmp_critical_name *lck);
2083     llvm::Type *TypeParams[] = {
2084         getIdentTyPointerTy(), CGM.Int32Ty,
2085         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2086     auto *FnTy =
2087         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2088     RTLFn =
2089         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2090     break;
2091   }
2092   case OMPRTL__kmpc_omp_task_begin_if0: {
2093     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2094     // *new_task);
2095     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2096                                 CGM.VoidPtrTy};
2097     auto *FnTy =
2098         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2099     RTLFn =
2100         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2101     break;
2102   }
2103   case OMPRTL__kmpc_omp_task_complete_if0: {
2104     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2105     // *new_task);
2106     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2107                                 CGM.VoidPtrTy};
2108     auto *FnTy =
2109         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2110     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2111                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2112     break;
2113   }
2114   case OMPRTL__kmpc_ordered: {
2115     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2116     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2117     auto *FnTy =
2118         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2119     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2120     break;
2121   }
2122   case OMPRTL__kmpc_end_ordered: {
2123     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2124     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2125     auto *FnTy =
2126         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2127     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2128     break;
2129   }
2130   case OMPRTL__kmpc_omp_taskwait: {
2131     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2132     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2133     auto *FnTy =
2134         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2135     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2136     break;
2137   }
2138   case OMPRTL__kmpc_taskgroup: {
2139     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2140     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2141     auto *FnTy =
2142         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2143     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2144     break;
2145   }
2146   case OMPRTL__kmpc_end_taskgroup: {
2147     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2148     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2149     auto *FnTy =
2150         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2151     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2152     break;
2153   }
2154   case OMPRTL__kmpc_push_proc_bind: {
2155     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2156     // int proc_bind)
2157     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2158     auto *FnTy =
2159         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2160     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2161     break;
2162   }
2163   case OMPRTL__kmpc_omp_task_with_deps: {
2164     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2165     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2166     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2167     llvm::Type *TypeParams[] = {
2168         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2169         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2170     auto *FnTy =
2171         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2172     RTLFn =
2173         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2174     break;
2175   }
2176   case OMPRTL__kmpc_omp_wait_deps: {
2177     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2178     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2179     // kmp_depend_info_t *noalias_dep_list);
2180     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2181                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2182                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2183     auto *FnTy =
2184         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2185     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2186     break;
2187   }
2188   case OMPRTL__kmpc_cancellationpoint: {
2189     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2190     // global_tid, kmp_int32 cncl_kind)
2191     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2192     auto *FnTy =
2193         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2194     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2195     break;
2196   }
2197   case OMPRTL__kmpc_cancel: {
2198     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2199     // kmp_int32 cncl_kind)
2200     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2201     auto *FnTy =
2202         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2203     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2204     break;
2205   }
2206   case OMPRTL__kmpc_push_num_teams: {
2207     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2208     // kmp_int32 num_teams, kmp_int32 num_threads)
2209     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2210         CGM.Int32Ty};
2211     auto *FnTy =
2212         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2213     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2214     break;
2215   }
2216   case OMPRTL__kmpc_fork_teams: {
2217     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2218     // microtask, ...);
2219     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2220                                 getKmpc_MicroPointerTy()};
2221     auto *FnTy =
2222         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2223     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2224     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2225       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2226         llvm::LLVMContext &Ctx = F->getContext();
2227         llvm::MDBuilder MDB(Ctx);
2228         // Annotate the callback behavior of the __kmpc_fork_teams:
2229         //  - The callback callee is argument number 2 (microtask).
2230         //  - The first two arguments of the callback callee are unknown (-1).
2231         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2232         //    callback callee.
2233         F->addMetadata(
2234             llvm::LLVMContext::MD_callback,
2235             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2236                                         2, {-1, -1},
2237                                         /* VarArgsArePassed */ true)}));
2238       }
2239     }
2240     break;
2241   }
2242   case OMPRTL__kmpc_taskloop: {
2243     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2244     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2245     // sched, kmp_uint64 grainsize, void *task_dup);
2246     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2247                                 CGM.IntTy,
2248                                 CGM.VoidPtrTy,
2249                                 CGM.IntTy,
2250                                 CGM.Int64Ty->getPointerTo(),
2251                                 CGM.Int64Ty->getPointerTo(),
2252                                 CGM.Int64Ty,
2253                                 CGM.IntTy,
2254                                 CGM.IntTy,
2255                                 CGM.Int64Ty,
2256                                 CGM.VoidPtrTy};
2257     auto *FnTy =
2258         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2259     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2260     break;
2261   }
2262   case OMPRTL__kmpc_doacross_init: {
2263     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2264     // num_dims, struct kmp_dim *dims);
2265     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2266                                 CGM.Int32Ty,
2267                                 CGM.Int32Ty,
2268                                 CGM.VoidPtrTy};
2269     auto *FnTy =
2270         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2271     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2272     break;
2273   }
2274   case OMPRTL__kmpc_doacross_fini: {
2275     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2276     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2277     auto *FnTy =
2278         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2279     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2280     break;
2281   }
2282   case OMPRTL__kmpc_doacross_post: {
2283     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2284     // *vec);
2285     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2286                                 CGM.Int64Ty->getPointerTo()};
2287     auto *FnTy =
2288         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2289     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2290     break;
2291   }
2292   case OMPRTL__kmpc_doacross_wait: {
2293     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2294     // *vec);
2295     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2296                                 CGM.Int64Ty->getPointerTo()};
2297     auto *FnTy =
2298         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2299     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2300     break;
2301   }
2302   case OMPRTL__kmpc_task_reduction_init: {
2303     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2304     // *data);
2305     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2306     auto *FnTy =
2307         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2308     RTLFn =
2309         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2310     break;
2311   }
2312   case OMPRTL__kmpc_task_reduction_get_th_data: {
2313     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2314     // *d);
2315     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2316     auto *FnTy =
2317         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2318     RTLFn = CGM.CreateRuntimeFunction(
2319         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2320     break;
2321   }
2322   case OMPRTL__kmpc_alloc: {
2323     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2324     // al); omp_allocator_handle_t type is void *.
2325     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2326     auto *FnTy =
2327         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2328     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2329     break;
2330   }
2331   case OMPRTL__kmpc_free: {
2332     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2333     // al); omp_allocator_handle_t type is void *.
2334     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2335     auto *FnTy =
2336         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2337     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2338     break;
2339   }
2340   case OMPRTL__kmpc_push_target_tripcount: {
2341     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2342     // size);
2343     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2344     llvm::FunctionType *FnTy =
2345         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2346     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2347     break;
2348   }
2349   case OMPRTL__tgt_target: {
2350     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2351     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2352     // *arg_types);
2353     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2354                                 CGM.VoidPtrTy,
2355                                 CGM.Int32Ty,
2356                                 CGM.VoidPtrPtrTy,
2357                                 CGM.VoidPtrPtrTy,
2358                                 CGM.Int64Ty->getPointerTo(),
2359                                 CGM.Int64Ty->getPointerTo()};
2360     auto *FnTy =
2361         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2362     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2363     break;
2364   }
2365   case OMPRTL__tgt_target_nowait: {
2366     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2367     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2368     // int64_t *arg_types);
2369     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2370                                 CGM.VoidPtrTy,
2371                                 CGM.Int32Ty,
2372                                 CGM.VoidPtrPtrTy,
2373                                 CGM.VoidPtrPtrTy,
2374                                 CGM.Int64Ty->getPointerTo(),
2375                                 CGM.Int64Ty->getPointerTo()};
2376     auto *FnTy =
2377         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2378     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2379     break;
2380   }
2381   case OMPRTL__tgt_target_teams: {
2382     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2383     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2384     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2385     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2386                                 CGM.VoidPtrTy,
2387                                 CGM.Int32Ty,
2388                                 CGM.VoidPtrPtrTy,
2389                                 CGM.VoidPtrPtrTy,
2390                                 CGM.Int64Ty->getPointerTo(),
2391                                 CGM.Int64Ty->getPointerTo(),
2392                                 CGM.Int32Ty,
2393                                 CGM.Int32Ty};
2394     auto *FnTy =
2395         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2396     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2397     break;
2398   }
2399   case OMPRTL__tgt_target_teams_nowait: {
2400     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2401     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2402     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2403     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2404                                 CGM.VoidPtrTy,
2405                                 CGM.Int32Ty,
2406                                 CGM.VoidPtrPtrTy,
2407                                 CGM.VoidPtrPtrTy,
2408                                 CGM.Int64Ty->getPointerTo(),
2409                                 CGM.Int64Ty->getPointerTo(),
2410                                 CGM.Int32Ty,
2411                                 CGM.Int32Ty};
2412     auto *FnTy =
2413         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2414     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2415     break;
2416   }
2417   case OMPRTL__tgt_register_requires: {
2418     // Build void __tgt_register_requires(int64_t flags);
2419     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2420     auto *FnTy =
2421         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2422     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2423     break;
2424   }
2425   case OMPRTL__tgt_register_lib: {
2426     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2427     QualType ParamTy =
2428         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2429     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2430     auto *FnTy =
2431         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2432     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2433     break;
2434   }
2435   case OMPRTL__tgt_unregister_lib: {
2436     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2437     QualType ParamTy =
2438         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2439     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2440     auto *FnTy =
2441         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2442     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2443     break;
2444   }
2445   case OMPRTL__tgt_target_data_begin: {
2446     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2447     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2448     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2449                                 CGM.Int32Ty,
2450                                 CGM.VoidPtrPtrTy,
2451                                 CGM.VoidPtrPtrTy,
2452                                 CGM.Int64Ty->getPointerTo(),
2453                                 CGM.Int64Ty->getPointerTo()};
2454     auto *FnTy =
2455         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2456     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2457     break;
2458   }
2459   case OMPRTL__tgt_target_data_begin_nowait: {
2460     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2461     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2462     // *arg_types);
2463     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2464                                 CGM.Int32Ty,
2465                                 CGM.VoidPtrPtrTy,
2466                                 CGM.VoidPtrPtrTy,
2467                                 CGM.Int64Ty->getPointerTo(),
2468                                 CGM.Int64Ty->getPointerTo()};
2469     auto *FnTy =
2470         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2471     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2472     break;
2473   }
2474   case OMPRTL__tgt_target_data_end: {
2475     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2476     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2477     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2478                                 CGM.Int32Ty,
2479                                 CGM.VoidPtrPtrTy,
2480                                 CGM.VoidPtrPtrTy,
2481                                 CGM.Int64Ty->getPointerTo(),
2482                                 CGM.Int64Ty->getPointerTo()};
2483     auto *FnTy =
2484         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2485     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2486     break;
2487   }
2488   case OMPRTL__tgt_target_data_end_nowait: {
2489     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2490     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2491     // *arg_types);
2492     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2493                                 CGM.Int32Ty,
2494                                 CGM.VoidPtrPtrTy,
2495                                 CGM.VoidPtrPtrTy,
2496                                 CGM.Int64Ty->getPointerTo(),
2497                                 CGM.Int64Ty->getPointerTo()};
2498     auto *FnTy =
2499         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2500     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2501     break;
2502   }
2503   case OMPRTL__tgt_target_data_update: {
2504     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2505     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2506     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2507                                 CGM.Int32Ty,
2508                                 CGM.VoidPtrPtrTy,
2509                                 CGM.VoidPtrPtrTy,
2510                                 CGM.Int64Ty->getPointerTo(),
2511                                 CGM.Int64Ty->getPointerTo()};
2512     auto *FnTy =
2513         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2514     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2515     break;
2516   }
2517   case OMPRTL__tgt_target_data_update_nowait: {
2518     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2519     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2520     // *arg_types);
2521     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2522                                 CGM.Int32Ty,
2523                                 CGM.VoidPtrPtrTy,
2524                                 CGM.VoidPtrPtrTy,
2525                                 CGM.Int64Ty->getPointerTo(),
2526                                 CGM.Int64Ty->getPointerTo()};
2527     auto *FnTy =
2528         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2529     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2530     break;
2531   }
2532   case OMPRTL__tgt_mapper_num_components: {
2533     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2534     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2535     auto *FnTy =
2536         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2537     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2538     break;
2539   }
2540   case OMPRTL__tgt_push_mapper_component: {
2541     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2542     // *base, void *begin, int64_t size, int64_t type);
2543     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2544                                 CGM.Int64Ty, CGM.Int64Ty};
2545     auto *FnTy =
2546         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2547     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2548     break;
2549   }
2550   }
2551   assert(RTLFn && "Unable to find OpenMP runtime function");
2552   return RTLFn;
2553 }
2554 
2555 llvm::FunctionCallee
2556 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2557   assert((IVSize == 32 || IVSize == 64) &&
2558          "IV size is not compatible with the omp runtime");
2559   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2560                                             : "__kmpc_for_static_init_4u")
2561                                 : (IVSigned ? "__kmpc_for_static_init_8"
2562                                             : "__kmpc_for_static_init_8u");
2563   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2564   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2565   llvm::Type *TypeParams[] = {
2566     getIdentTyPointerTy(),                     // loc
2567     CGM.Int32Ty,                               // tid
2568     CGM.Int32Ty,                               // schedtype
2569     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2570     PtrTy,                                     // p_lower
2571     PtrTy,                                     // p_upper
2572     PtrTy,                                     // p_stride
2573     ITy,                                       // incr
2574     ITy                                        // chunk
2575   };
2576   auto *FnTy =
2577       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2578   return CGM.CreateRuntimeFunction(FnTy, Name);
2579 }
2580 
2581 llvm::FunctionCallee
2582 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2583   assert((IVSize == 32 || IVSize == 64) &&
2584          "IV size is not compatible with the omp runtime");
2585   StringRef Name =
2586       IVSize == 32
2587           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2588           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2589   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2590   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2591                                CGM.Int32Ty,           // tid
2592                                CGM.Int32Ty,           // schedtype
2593                                ITy,                   // lower
2594                                ITy,                   // upper
2595                                ITy,                   // stride
2596                                ITy                    // chunk
2597   };
2598   auto *FnTy =
2599       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2600   return CGM.CreateRuntimeFunction(FnTy, Name);
2601 }
2602 
2603 llvm::FunctionCallee
2604 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2605   assert((IVSize == 32 || IVSize == 64) &&
2606          "IV size is not compatible with the omp runtime");
2607   StringRef Name =
2608       IVSize == 32
2609           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2610           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2611   llvm::Type *TypeParams[] = {
2612       getIdentTyPointerTy(), // loc
2613       CGM.Int32Ty,           // tid
2614   };
2615   auto *FnTy =
2616       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2617   return CGM.CreateRuntimeFunction(FnTy, Name);
2618 }
2619 
2620 llvm::FunctionCallee
2621 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2622   assert((IVSize == 32 || IVSize == 64) &&
2623          "IV size is not compatible with the omp runtime");
2624   StringRef Name =
2625       IVSize == 32
2626           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2627           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2628   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2629   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2630   llvm::Type *TypeParams[] = {
2631     getIdentTyPointerTy(),                     // loc
2632     CGM.Int32Ty,                               // tid
2633     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2634     PtrTy,                                     // p_lower
2635     PtrTy,                                     // p_upper
2636     PtrTy                                      // p_stride
2637   };
2638   auto *FnTy =
2639       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2640   return CGM.CreateRuntimeFunction(FnTy, Name);
2641 }
2642 
2643 /// Obtain information that uniquely identifies a target entry. This
2644 /// consists of the file and device IDs as well as line number associated with
2645 /// the relevant entry source location.
2646 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2647                                      unsigned &DeviceID, unsigned &FileID,
2648                                      unsigned &LineNum) {
2649   SourceManager &SM = C.getSourceManager();
2650 
2651   // The loc should be always valid and have a file ID (the user cannot use
2652   // #pragma directives in macros)
2653 
2654   assert(Loc.isValid() && "Source location is expected to be always valid.");
2655 
2656   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2657   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2658 
2659   llvm::sys::fs::UniqueID ID;
2660   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2661     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2662         << PLoc.getFilename() << EC.message();
2663 
2664   DeviceID = ID.getDevice();
2665   FileID = ID.getFile();
2666   LineNum = PLoc.getLine();
2667 }
2668 
2669 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2670   if (CGM.getLangOpts().OpenMPSimd)
2671     return Address::invalid();
2672   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2673       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2674   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2675               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2676                HasRequiresUnifiedSharedMemory))) {
2677     SmallString<64> PtrName;
2678     {
2679       llvm::raw_svector_ostream OS(PtrName);
2680       OS << CGM.getMangledName(GlobalDecl(VD));
2681       if (!VD->isExternallyVisible()) {
2682         unsigned DeviceID, FileID, Line;
2683         getTargetEntryUniqueInfo(CGM.getContext(),
2684                                  VD->getCanonicalDecl()->getBeginLoc(),
2685                                  DeviceID, FileID, Line);
2686         OS << llvm::format("_%x", FileID);
2687       }
2688       OS << "_decl_tgt_ref_ptr";
2689     }
2690     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2691     if (!Ptr) {
2692       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2693       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2694                                         PtrName);
2695 
2696       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2697       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2698 
2699       if (!CGM.getLangOpts().OpenMPIsDevice)
2700         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2701       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2702     }
2703     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2704   }
2705   return Address::invalid();
2706 }
2707 
2708 llvm::Constant *
2709 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2710   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2711          !CGM.getContext().getTargetInfo().isTLSSupported());
2712   // Lookup the entry, lazily creating it if necessary.
2713   std::string Suffix = getName({"cache", ""});
2714   return getOrCreateInternalVariable(
2715       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2716 }
2717 
2718 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2719                                                 const VarDecl *VD,
2720                                                 Address VDAddr,
2721                                                 SourceLocation Loc) {
2722   if (CGM.getLangOpts().OpenMPUseTLS &&
2723       CGM.getContext().getTargetInfo().isTLSSupported())
2724     return VDAddr;
2725 
2726   llvm::Type *VarTy = VDAddr.getElementType();
2727   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2728                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2729                                                        CGM.Int8PtrTy),
2730                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2731                          getOrCreateThreadPrivateCache(VD)};
2732   return Address(CGF.EmitRuntimeCall(
2733       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2734                  VDAddr.getAlignment());
2735 }
2736 
2737 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2738     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2739     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2740   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2741   // library.
2742   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2743   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2744                       OMPLoc);
2745   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2746   // to register constructor/destructor for variable.
2747   llvm::Value *Args[] = {
2748       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2749       Ctor, CopyCtor, Dtor};
2750   CGF.EmitRuntimeCall(
2751       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2752 }
2753 
2754 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2755     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2756     bool PerformInit, CodeGenFunction *CGF) {
2757   if (CGM.getLangOpts().OpenMPUseTLS &&
2758       CGM.getContext().getTargetInfo().isTLSSupported())
2759     return nullptr;
2760 
2761   VD = VD->getDefinition(CGM.getContext());
2762   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2763     QualType ASTTy = VD->getType();
2764 
2765     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2766     const Expr *Init = VD->getAnyInitializer();
2767     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2768       // Generate function that re-emits the declaration's initializer into the
2769       // threadprivate copy of the variable VD
2770       CodeGenFunction CtorCGF(CGM);
2771       FunctionArgList Args;
2772       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2773                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2774                             ImplicitParamDecl::Other);
2775       Args.push_back(&Dst);
2776 
2777       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2778           CGM.getContext().VoidPtrTy, Args);
2779       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2780       std::string Name = getName({"__kmpc_global_ctor_", ""});
2781       llvm::Function *Fn =
2782           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2783       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2784                             Args, Loc, Loc);
2785       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2786           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2787           CGM.getContext().VoidPtrTy, Dst.getLocation());
2788       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2789       Arg = CtorCGF.Builder.CreateElementBitCast(
2790           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2791       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2792                                /*IsInitializer=*/true);
2793       ArgVal = CtorCGF.EmitLoadOfScalar(
2794           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2795           CGM.getContext().VoidPtrTy, Dst.getLocation());
2796       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2797       CtorCGF.FinishFunction();
2798       Ctor = Fn;
2799     }
2800     if (VD->getType().isDestructedType() != QualType::DK_none) {
2801       // Generate function that emits destructor call for the threadprivate copy
2802       // of the variable VD
2803       CodeGenFunction DtorCGF(CGM);
2804       FunctionArgList Args;
2805       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2806                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2807                             ImplicitParamDecl::Other);
2808       Args.push_back(&Dst);
2809 
2810       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2811           CGM.getContext().VoidTy, Args);
2812       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2813       std::string Name = getName({"__kmpc_global_dtor_", ""});
2814       llvm::Function *Fn =
2815           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2816       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2817       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2818                             Loc, Loc);
2819       // Create a scope with an artificial location for the body of this function.
2820       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2821       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2822           DtorCGF.GetAddrOfLocalVar(&Dst),
2823           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2824       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2825                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2826                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2827       DtorCGF.FinishFunction();
2828       Dtor = Fn;
2829     }
2830     // Do not emit init function if it is not required.
2831     if (!Ctor && !Dtor)
2832       return nullptr;
2833 
2834     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2835     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2836                                                /*isVarArg=*/false)
2837                            ->getPointerTo();
2838     // Copying constructor for the threadprivate variable.
2839     // Must be NULL - reserved by runtime, but currently it requires that this
2840     // parameter is always NULL. Otherwise it fires assertion.
2841     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2842     if (Ctor == nullptr) {
2843       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2844                                              /*isVarArg=*/false)
2845                          ->getPointerTo();
2846       Ctor = llvm::Constant::getNullValue(CtorTy);
2847     }
2848     if (Dtor == nullptr) {
2849       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2850                                              /*isVarArg=*/false)
2851                          ->getPointerTo();
2852       Dtor = llvm::Constant::getNullValue(DtorTy);
2853     }
2854     if (!CGF) {
2855       auto *InitFunctionTy =
2856           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2857       std::string Name = getName({"__omp_threadprivate_init_", ""});
2858       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2859           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2860       CodeGenFunction InitCGF(CGM);
2861       FunctionArgList ArgList;
2862       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2863                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2864                             Loc, Loc);
2865       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2866       InitCGF.FinishFunction();
2867       return InitFunction;
2868     }
2869     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2870   }
2871   return nullptr;
2872 }
2873 
2874 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2875                                                      llvm::GlobalVariable *Addr,
2876                                                      bool PerformInit) {
2877   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2878       !CGM.getLangOpts().OpenMPIsDevice)
2879     return false;
2880   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2881       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2882   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2883       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2884        HasRequiresUnifiedSharedMemory))
2885     return CGM.getLangOpts().OpenMPIsDevice;
2886   VD = VD->getDefinition(CGM.getContext());
2887   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2888     return CGM.getLangOpts().OpenMPIsDevice;
2889 
2890   QualType ASTTy = VD->getType();
2891 
2892   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2893   // Produce the unique prefix to identify the new target regions. We use
2894   // the source location of the variable declaration which we know to not
2895   // conflict with any target region.
2896   unsigned DeviceID;
2897   unsigned FileID;
2898   unsigned Line;
2899   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2900   SmallString<128> Buffer, Out;
2901   {
2902     llvm::raw_svector_ostream OS(Buffer);
2903     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2904        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2905   }
2906 
2907   const Expr *Init = VD->getAnyInitializer();
2908   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2909     llvm::Constant *Ctor;
2910     llvm::Constant *ID;
2911     if (CGM.getLangOpts().OpenMPIsDevice) {
2912       // Generate function that re-emits the declaration's initializer into
2913       // the threadprivate copy of the variable VD
2914       CodeGenFunction CtorCGF(CGM);
2915 
2916       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2917       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2918       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2919           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2920       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2921       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2922                             FunctionArgList(), Loc, Loc);
2923       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2924       CtorCGF.EmitAnyExprToMem(Init,
2925                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2926                                Init->getType().getQualifiers(),
2927                                /*IsInitializer=*/true);
2928       CtorCGF.FinishFunction();
2929       Ctor = Fn;
2930       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2931       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2932     } else {
2933       Ctor = new llvm::GlobalVariable(
2934           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2935           llvm::GlobalValue::PrivateLinkage,
2936           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2937       ID = Ctor;
2938     }
2939 
2940     // Register the information for the entry associated with the constructor.
2941     Out.clear();
2942     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2943         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2944         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2945   }
2946   if (VD->getType().isDestructedType() != QualType::DK_none) {
2947     llvm::Constant *Dtor;
2948     llvm::Constant *ID;
2949     if (CGM.getLangOpts().OpenMPIsDevice) {
2950       // Generate function that emits destructor call for the threadprivate
2951       // copy of the variable VD
2952       CodeGenFunction DtorCGF(CGM);
2953 
2954       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2955       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2956       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2957           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2958       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2959       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2960                             FunctionArgList(), Loc, Loc);
2961       // Create a scope with an artificial location for the body of this
2962       // function.
2963       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2964       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2965                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2966                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2967       DtorCGF.FinishFunction();
2968       Dtor = Fn;
2969       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2970       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2971     } else {
2972       Dtor = new llvm::GlobalVariable(
2973           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2974           llvm::GlobalValue::PrivateLinkage,
2975           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2976       ID = Dtor;
2977     }
2978     // Register the information for the entry associated with the destructor.
2979     Out.clear();
2980     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2981         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2982         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2983   }
2984   return CGM.getLangOpts().OpenMPIsDevice;
2985 }
2986 
2987 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2988                                                           QualType VarType,
2989                                                           StringRef Name) {
2990   std::string Suffix = getName({"artificial", ""});
2991   std::string CacheSuffix = getName({"cache", ""});
2992   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2993   llvm::Value *GAddr =
2994       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2995   llvm::Value *Args[] = {
2996       emitUpdateLocation(CGF, SourceLocation()),
2997       getThreadID(CGF, SourceLocation()),
2998       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2999       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3000                                 /*isSigned=*/false),
3001       getOrCreateInternalVariable(
3002           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3003   return Address(
3004       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3005           CGF.EmitRuntimeCall(
3006               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3007           VarLVType->getPointerTo(/*AddrSpace=*/0)),
3008       CGM.getPointerAlign());
3009 }
3010 
3011 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
3012                                       const RegionCodeGenTy &ThenGen,
3013                                       const RegionCodeGenTy &ElseGen) {
3014   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3015 
3016   // If the condition constant folds and can be elided, try to avoid emitting
3017   // the condition and the dead arm of the if/else.
3018   bool CondConstant;
3019   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3020     if (CondConstant)
3021       ThenGen(CGF);
3022     else
3023       ElseGen(CGF);
3024     return;
3025   }
3026 
3027   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3028   // emit the conditional branch.
3029   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3030   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3031   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3032   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3033 
3034   // Emit the 'then' code.
3035   CGF.EmitBlock(ThenBlock);
3036   ThenGen(CGF);
3037   CGF.EmitBranch(ContBlock);
3038   // Emit the 'else' code if present.
3039   // There is no need to emit line number for unconditional branch.
3040   (void)ApplyDebugLocation::CreateEmpty(CGF);
3041   CGF.EmitBlock(ElseBlock);
3042   ElseGen(CGF);
3043   // There is no need to emit line number for unconditional branch.
3044   (void)ApplyDebugLocation::CreateEmpty(CGF);
3045   CGF.EmitBranch(ContBlock);
3046   // Emit the continuation block for code after the if.
3047   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3048 }
3049 
3050 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3051                                        llvm::Function *OutlinedFn,
3052                                        ArrayRef<llvm::Value *> CapturedVars,
3053                                        const Expr *IfCond) {
3054   if (!CGF.HaveInsertPoint())
3055     return;
3056   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3057   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3058                                                      PrePostActionTy &) {
3059     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3060     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3061     llvm::Value *Args[] = {
3062         RTLoc,
3063         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3064         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3065     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3066     RealArgs.append(std::begin(Args), std::end(Args));
3067     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3068 
3069     llvm::FunctionCallee RTLFn =
3070         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3071     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3072   };
3073   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3074                                                           PrePostActionTy &) {
3075     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3076     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3077     // Build calls:
3078     // __kmpc_serialized_parallel(&Loc, GTid);
3079     llvm::Value *Args[] = {RTLoc, ThreadID};
3080     CGF.EmitRuntimeCall(
3081         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3082 
3083     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3084     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3085     Address ZeroAddrBound =
3086         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3087                                          /*Name=*/".bound.zero.addr");
3088     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3089     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3090     // ThreadId for serialized parallels is 0.
3091     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3092     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3093     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3094     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3095 
3096     // __kmpc_end_serialized_parallel(&Loc, GTid);
3097     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3098     CGF.EmitRuntimeCall(
3099         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3100         EndArgs);
3101   };
3102   if (IfCond) {
3103     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3104   } else {
3105     RegionCodeGenTy ThenRCG(ThenGen);
3106     ThenRCG(CGF);
3107   }
3108 }
3109 
3110 // If we're inside an (outlined) parallel region, use the region info's
3111 // thread-ID variable (it is passed in a first argument of the outlined function
3112 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3113 // regular serial code region, get thread ID by calling kmp_int32
3114 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3115 // return the address of that temp.
3116 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3117                                              SourceLocation Loc) {
3118   if (auto *OMPRegionInfo =
3119           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3120     if (OMPRegionInfo->getThreadIDVariable())
3121       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3122 
3123   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3124   QualType Int32Ty =
3125       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3126   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3127   CGF.EmitStoreOfScalar(ThreadID,
3128                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3129 
3130   return ThreadIDTemp;
3131 }
3132 
3133 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3134     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3135   SmallString<256> Buffer;
3136   llvm::raw_svector_ostream Out(Buffer);
3137   Out << Name;
3138   StringRef RuntimeName = Out.str();
3139   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3140   if (Elem.second) {
3141     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3142            "OMP internal variable has different type than requested");
3143     return &*Elem.second;
3144   }
3145 
3146   return Elem.second = new llvm::GlobalVariable(
3147              CGM.getModule(), Ty, /*IsConstant*/ false,
3148              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3149              Elem.first(), /*InsertBefore=*/nullptr,
3150              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3151 }
3152 
3153 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3154   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3155   std::string Name = getName({Prefix, "var"});
3156   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3157 }
3158 
3159 namespace {
3160 /// Common pre(post)-action for different OpenMP constructs.
3161 class CommonActionTy final : public PrePostActionTy {
3162   llvm::FunctionCallee EnterCallee;
3163   ArrayRef<llvm::Value *> EnterArgs;
3164   llvm::FunctionCallee ExitCallee;
3165   ArrayRef<llvm::Value *> ExitArgs;
3166   bool Conditional;
3167   llvm::BasicBlock *ContBlock = nullptr;
3168 
3169 public:
3170   CommonActionTy(llvm::FunctionCallee EnterCallee,
3171                  ArrayRef<llvm::Value *> EnterArgs,
3172                  llvm::FunctionCallee ExitCallee,
3173                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3174       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3175         ExitArgs(ExitArgs), Conditional(Conditional) {}
3176   void Enter(CodeGenFunction &CGF) override {
3177     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3178     if (Conditional) {
3179       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3180       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3181       ContBlock = CGF.createBasicBlock("omp_if.end");
3182       // Generate the branch (If-stmt)
3183       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3184       CGF.EmitBlock(ThenBlock);
3185     }
3186   }
3187   void Done(CodeGenFunction &CGF) {
3188     // Emit the rest of blocks/branches
3189     CGF.EmitBranch(ContBlock);
3190     CGF.EmitBlock(ContBlock, true);
3191   }
3192   void Exit(CodeGenFunction &CGF) override {
3193     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3194   }
3195 };
3196 } // anonymous namespace
3197 
3198 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3199                                          StringRef CriticalName,
3200                                          const RegionCodeGenTy &CriticalOpGen,
3201                                          SourceLocation Loc, const Expr *Hint) {
3202   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3203   // CriticalOpGen();
3204   // __kmpc_end_critical(ident_t *, gtid, Lock);
3205   // Prepare arguments and build a call to __kmpc_critical
3206   if (!CGF.HaveInsertPoint())
3207     return;
3208   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3209                          getCriticalRegionLock(CriticalName)};
3210   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3211                                                 std::end(Args));
3212   if (Hint) {
3213     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3214         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3215   }
3216   CommonActionTy Action(
3217       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3218                                  : OMPRTL__kmpc_critical),
3219       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3220   CriticalOpGen.setAction(Action);
3221   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3222 }
3223 
3224 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3225                                        const RegionCodeGenTy &MasterOpGen,
3226                                        SourceLocation Loc) {
3227   if (!CGF.HaveInsertPoint())
3228     return;
3229   // if(__kmpc_master(ident_t *, gtid)) {
3230   //   MasterOpGen();
3231   //   __kmpc_end_master(ident_t *, gtid);
3232   // }
3233   // Prepare arguments and build a call to __kmpc_master
3234   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3235   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3236                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3237                         /*Conditional=*/true);
3238   MasterOpGen.setAction(Action);
3239   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3240   Action.Done(CGF);
3241 }
3242 
3243 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3244                                         SourceLocation Loc) {
3245   if (!CGF.HaveInsertPoint())
3246     return;
3247   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3248   llvm::Value *Args[] = {
3249       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3250       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3251   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3252   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3253     Region->emitUntiedSwitch(CGF);
3254 }
3255 
3256 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3257                                           const RegionCodeGenTy &TaskgroupOpGen,
3258                                           SourceLocation Loc) {
3259   if (!CGF.HaveInsertPoint())
3260     return;
3261   // __kmpc_taskgroup(ident_t *, gtid);
3262   // TaskgroupOpGen();
3263   // __kmpc_end_taskgroup(ident_t *, gtid);
3264   // Prepare arguments and build a call to __kmpc_taskgroup
3265   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3266   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3267                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3268                         Args);
3269   TaskgroupOpGen.setAction(Action);
3270   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3271 }
3272 
3273 /// Given an array of pointers to variables, project the address of a
3274 /// given variable.
3275 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3276                                       unsigned Index, const VarDecl *Var) {
3277   // Pull out the pointer to the variable.
3278   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3279   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3280 
3281   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3282   Addr = CGF.Builder.CreateElementBitCast(
3283       Addr, CGF.ConvertTypeForMem(Var->getType()));
3284   return Addr;
3285 }
3286 
3287 static llvm::Value *emitCopyprivateCopyFunction(
3288     CodeGenModule &CGM, llvm::Type *ArgsType,
3289     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3290     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3291     SourceLocation Loc) {
3292   ASTContext &C = CGM.getContext();
3293   // void copy_func(void *LHSArg, void *RHSArg);
3294   FunctionArgList Args;
3295   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3296                            ImplicitParamDecl::Other);
3297   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3298                            ImplicitParamDecl::Other);
3299   Args.push_back(&LHSArg);
3300   Args.push_back(&RHSArg);
3301   const auto &CGFI =
3302       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3303   std::string Name =
3304       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3305   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3306                                     llvm::GlobalValue::InternalLinkage, Name,
3307                                     &CGM.getModule());
3308   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3309   Fn->setDoesNotRecurse();
3310   CodeGenFunction CGF(CGM);
3311   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3312   // Dest = (void*[n])(LHSArg);
3313   // Src = (void*[n])(RHSArg);
3314   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3315       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3316       ArgsType), CGF.getPointerAlign());
3317   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3318       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3319       ArgsType), CGF.getPointerAlign());
3320   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3321   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3322   // ...
3323   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3324   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3325     const auto *DestVar =
3326         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3327     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3328 
3329     const auto *SrcVar =
3330         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3331     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3332 
3333     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3334     QualType Type = VD->getType();
3335     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3336   }
3337   CGF.FinishFunction();
3338   return Fn;
3339 }
3340 
3341 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3342                                        const RegionCodeGenTy &SingleOpGen,
3343                                        SourceLocation Loc,
3344                                        ArrayRef<const Expr *> CopyprivateVars,
3345                                        ArrayRef<const Expr *> SrcExprs,
3346                                        ArrayRef<const Expr *> DstExprs,
3347                                        ArrayRef<const Expr *> AssignmentOps) {
3348   if (!CGF.HaveInsertPoint())
3349     return;
3350   assert(CopyprivateVars.size() == SrcExprs.size() &&
3351          CopyprivateVars.size() == DstExprs.size() &&
3352          CopyprivateVars.size() == AssignmentOps.size());
3353   ASTContext &C = CGM.getContext();
3354   // int32 did_it = 0;
3355   // if(__kmpc_single(ident_t *, gtid)) {
3356   //   SingleOpGen();
3357   //   __kmpc_end_single(ident_t *, gtid);
3358   //   did_it = 1;
3359   // }
3360   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3361   // <copy_func>, did_it);
3362 
3363   Address DidIt = Address::invalid();
3364   if (!CopyprivateVars.empty()) {
3365     // int32 did_it = 0;
3366     QualType KmpInt32Ty =
3367         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3368     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3369     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3370   }
3371   // Prepare arguments and build a call to __kmpc_single
3372   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3373   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3374                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3375                         /*Conditional=*/true);
3376   SingleOpGen.setAction(Action);
3377   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3378   if (DidIt.isValid()) {
3379     // did_it = 1;
3380     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3381   }
3382   Action.Done(CGF);
3383   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3384   // <copy_func>, did_it);
3385   if (DidIt.isValid()) {
3386     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3387     QualType CopyprivateArrayTy = C.getConstantArrayType(
3388         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3389         /*IndexTypeQuals=*/0);
3390     // Create a list of all private variables for copyprivate.
3391     Address CopyprivateList =
3392         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3393     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3394       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3395       CGF.Builder.CreateStore(
3396           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3397               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3398           Elem);
3399     }
3400     // Build function that copies private values from single region to all other
3401     // threads in the corresponding parallel region.
3402     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3403         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3404         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3405     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3406     Address CL =
3407       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3408                                                       CGF.VoidPtrTy);
3409     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3410     llvm::Value *Args[] = {
3411         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3412         getThreadID(CGF, Loc),        // i32 <gtid>
3413         BufSize,                      // size_t <buf_size>
3414         CL.getPointer(),              // void *<copyprivate list>
3415         CpyFn,                        // void (*) (void *, void *) <copy_func>
3416         DidItVal                      // i32 did_it
3417     };
3418     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3419   }
3420 }
3421 
3422 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3423                                         const RegionCodeGenTy &OrderedOpGen,
3424                                         SourceLocation Loc, bool IsThreads) {
3425   if (!CGF.HaveInsertPoint())
3426     return;
3427   // __kmpc_ordered(ident_t *, gtid);
3428   // OrderedOpGen();
3429   // __kmpc_end_ordered(ident_t *, gtid);
3430   // Prepare arguments and build a call to __kmpc_ordered
3431   if (IsThreads) {
3432     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3433     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3434                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3435                           Args);
3436     OrderedOpGen.setAction(Action);
3437     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3438     return;
3439   }
3440   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3441 }
3442 
3443 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3444   unsigned Flags;
3445   if (Kind == OMPD_for)
3446     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3447   else if (Kind == OMPD_sections)
3448     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3449   else if (Kind == OMPD_single)
3450     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3451   else if (Kind == OMPD_barrier)
3452     Flags = OMP_IDENT_BARRIER_EXPL;
3453   else
3454     Flags = OMP_IDENT_BARRIER_IMPL;
3455   return Flags;
3456 }
3457 
3458 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3459     CodeGenFunction &CGF, const OMPLoopDirective &S,
3460     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3461   // Check if the loop directive is actually a doacross loop directive. In this
3462   // case choose static, 1 schedule.
3463   if (llvm::any_of(
3464           S.getClausesOfKind<OMPOrderedClause>(),
3465           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3466     ScheduleKind = OMPC_SCHEDULE_static;
3467     // Chunk size is 1 in this case.
3468     llvm::APInt ChunkSize(32, 1);
3469     ChunkExpr = IntegerLiteral::Create(
3470         CGF.getContext(), ChunkSize,
3471         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3472         SourceLocation());
3473   }
3474 }
3475 
3476 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3477                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3478                                       bool ForceSimpleCall) {
3479   if (!CGF.HaveInsertPoint())
3480     return;
3481   // Build call __kmpc_cancel_barrier(loc, thread_id);
3482   // Build call __kmpc_barrier(loc, thread_id);
3483   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3484   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3485   // thread_id);
3486   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3487                          getThreadID(CGF, Loc)};
3488   if (auto *OMPRegionInfo =
3489           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3490     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3491       llvm::Value *Result = CGF.EmitRuntimeCall(
3492           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3493       if (EmitChecks) {
3494         // if (__kmpc_cancel_barrier()) {
3495         //   exit from construct;
3496         // }
3497         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3498         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3499         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3500         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3501         CGF.EmitBlock(ExitBB);
3502         //   exit from construct;
3503         CodeGenFunction::JumpDest CancelDestination =
3504             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3505         CGF.EmitBranchThroughCleanup(CancelDestination);
3506         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3507       }
3508       return;
3509     }
3510   }
3511   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3512 }
3513 
3514 /// Map the OpenMP loop schedule to the runtime enumeration.
3515 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3516                                           bool Chunked, bool Ordered) {
3517   switch (ScheduleKind) {
3518   case OMPC_SCHEDULE_static:
3519     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3520                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3521   case OMPC_SCHEDULE_dynamic:
3522     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3523   case OMPC_SCHEDULE_guided:
3524     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3525   case OMPC_SCHEDULE_runtime:
3526     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3527   case OMPC_SCHEDULE_auto:
3528     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3529   case OMPC_SCHEDULE_unknown:
3530     assert(!Chunked && "chunk was specified but schedule kind not known");
3531     return Ordered ? OMP_ord_static : OMP_sch_static;
3532   }
3533   llvm_unreachable("Unexpected runtime schedule");
3534 }
3535 
3536 /// Map the OpenMP distribute schedule to the runtime enumeration.
3537 static OpenMPSchedType
3538 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3539   // only static is allowed for dist_schedule
3540   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3541 }
3542 
3543 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3544                                          bool Chunked) const {
3545   OpenMPSchedType Schedule =
3546       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3547   return Schedule == OMP_sch_static;
3548 }
3549 
3550 bool CGOpenMPRuntime::isStaticNonchunked(
3551     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3552   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3553   return Schedule == OMP_dist_sch_static;
3554 }
3555 
3556 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3557                                       bool Chunked) const {
3558   OpenMPSchedType Schedule =
3559       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3560   return Schedule == OMP_sch_static_chunked;
3561 }
3562 
3563 bool CGOpenMPRuntime::isStaticChunked(
3564     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3565   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3566   return Schedule == OMP_dist_sch_static_chunked;
3567 }
3568 
3569 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3570   OpenMPSchedType Schedule =
3571       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3572   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3573   return Schedule != OMP_sch_static;
3574 }
3575 
3576 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3577                                   OpenMPScheduleClauseModifier M1,
3578                                   OpenMPScheduleClauseModifier M2) {
3579   int Modifier = 0;
3580   switch (M1) {
3581   case OMPC_SCHEDULE_MODIFIER_monotonic:
3582     Modifier = OMP_sch_modifier_monotonic;
3583     break;
3584   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3585     Modifier = OMP_sch_modifier_nonmonotonic;
3586     break;
3587   case OMPC_SCHEDULE_MODIFIER_simd:
3588     if (Schedule == OMP_sch_static_chunked)
3589       Schedule = OMP_sch_static_balanced_chunked;
3590     break;
3591   case OMPC_SCHEDULE_MODIFIER_last:
3592   case OMPC_SCHEDULE_MODIFIER_unknown:
3593     break;
3594   }
3595   switch (M2) {
3596   case OMPC_SCHEDULE_MODIFIER_monotonic:
3597     Modifier = OMP_sch_modifier_monotonic;
3598     break;
3599   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3600     Modifier = OMP_sch_modifier_nonmonotonic;
3601     break;
3602   case OMPC_SCHEDULE_MODIFIER_simd:
3603     if (Schedule == OMP_sch_static_chunked)
3604       Schedule = OMP_sch_static_balanced_chunked;
3605     break;
3606   case OMPC_SCHEDULE_MODIFIER_last:
3607   case OMPC_SCHEDULE_MODIFIER_unknown:
3608     break;
3609   }
3610   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3611   // If the static schedule kind is specified or if the ordered clause is
3612   // specified, and if the nonmonotonic modifier is not specified, the effect is
3613   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3614   // modifier is specified, the effect is as if the nonmonotonic modifier is
3615   // specified.
3616   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3617     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3618           Schedule == OMP_sch_static_balanced_chunked ||
3619           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static))
3620       Modifier = OMP_sch_modifier_nonmonotonic;
3621   }
3622   return Schedule | Modifier;
3623 }
3624 
3625 void CGOpenMPRuntime::emitForDispatchInit(
3626     CodeGenFunction &CGF, SourceLocation Loc,
3627     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3628     bool Ordered, const DispatchRTInput &DispatchValues) {
3629   if (!CGF.HaveInsertPoint())
3630     return;
3631   OpenMPSchedType Schedule = getRuntimeSchedule(
3632       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3633   assert(Ordered ||
3634          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3635           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3636           Schedule != OMP_sch_static_balanced_chunked));
3637   // Call __kmpc_dispatch_init(
3638   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3639   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3640   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3641 
3642   // If the Chunk was not specified in the clause - use default value 1.
3643   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3644                                             : CGF.Builder.getIntN(IVSize, 1);
3645   llvm::Value *Args[] = {
3646       emitUpdateLocation(CGF, Loc),
3647       getThreadID(CGF, Loc),
3648       CGF.Builder.getInt32(addMonoNonMonoModifier(
3649           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3650       DispatchValues.LB,                                     // Lower
3651       DispatchValues.UB,                                     // Upper
3652       CGF.Builder.getIntN(IVSize, 1),                        // Stride
3653       Chunk                                                  // Chunk
3654   };
3655   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3656 }
3657 
3658 static void emitForStaticInitCall(
3659     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3660     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3661     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3662     const CGOpenMPRuntime::StaticRTInput &Values) {
3663   if (!CGF.HaveInsertPoint())
3664     return;
3665 
3666   assert(!Values.Ordered);
3667   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3668          Schedule == OMP_sch_static_balanced_chunked ||
3669          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3670          Schedule == OMP_dist_sch_static ||
3671          Schedule == OMP_dist_sch_static_chunked);
3672 
3673   // Call __kmpc_for_static_init(
3674   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3675   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3676   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3677   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3678   llvm::Value *Chunk = Values.Chunk;
3679   if (Chunk == nullptr) {
3680     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3681             Schedule == OMP_dist_sch_static) &&
3682            "expected static non-chunked schedule");
3683     // If the Chunk was not specified in the clause - use default value 1.
3684     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3685   } else {
3686     assert((Schedule == OMP_sch_static_chunked ||
3687             Schedule == OMP_sch_static_balanced_chunked ||
3688             Schedule == OMP_ord_static_chunked ||
3689             Schedule == OMP_dist_sch_static_chunked) &&
3690            "expected static chunked schedule");
3691   }
3692   llvm::Value *Args[] = {
3693       UpdateLocation,
3694       ThreadId,
3695       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3696                                                   M2)), // Schedule type
3697       Values.IL.getPointer(),                           // &isLastIter
3698       Values.LB.getPointer(),                           // &LB
3699       Values.UB.getPointer(),                           // &UB
3700       Values.ST.getPointer(),                           // &Stride
3701       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3702       Chunk                                             // Chunk
3703   };
3704   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3705 }
3706 
3707 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3708                                         SourceLocation Loc,
3709                                         OpenMPDirectiveKind DKind,
3710                                         const OpenMPScheduleTy &ScheduleKind,
3711                                         const StaticRTInput &Values) {
3712   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3713       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3714   assert(isOpenMPWorksharingDirective(DKind) &&
3715          "Expected loop-based or sections-based directive.");
3716   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3717                                              isOpenMPLoopDirective(DKind)
3718                                                  ? OMP_IDENT_WORK_LOOP
3719                                                  : OMP_IDENT_WORK_SECTIONS);
3720   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3721   llvm::FunctionCallee StaticInitFunction =
3722       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3723   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3724                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3725 }
3726 
3727 void CGOpenMPRuntime::emitDistributeStaticInit(
3728     CodeGenFunction &CGF, SourceLocation Loc,
3729     OpenMPDistScheduleClauseKind SchedKind,
3730     const CGOpenMPRuntime::StaticRTInput &Values) {
3731   OpenMPSchedType ScheduleNum =
3732       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3733   llvm::Value *UpdatedLocation =
3734       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3735   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3736   llvm::FunctionCallee StaticInitFunction =
3737       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3738   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3739                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3740                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3741 }
3742 
3743 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3744                                           SourceLocation Loc,
3745                                           OpenMPDirectiveKind DKind) {
3746   if (!CGF.HaveInsertPoint())
3747     return;
3748   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3749   llvm::Value *Args[] = {
3750       emitUpdateLocation(CGF, Loc,
3751                          isOpenMPDistributeDirective(DKind)
3752                              ? OMP_IDENT_WORK_DISTRIBUTE
3753                              : isOpenMPLoopDirective(DKind)
3754                                    ? OMP_IDENT_WORK_LOOP
3755                                    : OMP_IDENT_WORK_SECTIONS),
3756       getThreadID(CGF, Loc)};
3757   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3758                       Args);
3759 }
3760 
3761 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3762                                                  SourceLocation Loc,
3763                                                  unsigned IVSize,
3764                                                  bool IVSigned) {
3765   if (!CGF.HaveInsertPoint())
3766     return;
3767   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3768   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3769   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3770 }
3771 
3772 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3773                                           SourceLocation Loc, unsigned IVSize,
3774                                           bool IVSigned, Address IL,
3775                                           Address LB, Address UB,
3776                                           Address ST) {
3777   // Call __kmpc_dispatch_next(
3778   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3779   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3780   //          kmp_int[32|64] *p_stride);
3781   llvm::Value *Args[] = {
3782       emitUpdateLocation(CGF, Loc),
3783       getThreadID(CGF, Loc),
3784       IL.getPointer(), // &isLastIter
3785       LB.getPointer(), // &Lower
3786       UB.getPointer(), // &Upper
3787       ST.getPointer()  // &Stride
3788   };
3789   llvm::Value *Call =
3790       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3791   return CGF.EmitScalarConversion(
3792       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3793       CGF.getContext().BoolTy, Loc);
3794 }
3795 
3796 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3797                                            llvm::Value *NumThreads,
3798                                            SourceLocation Loc) {
3799   if (!CGF.HaveInsertPoint())
3800     return;
3801   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3802   llvm::Value *Args[] = {
3803       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3804       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3805   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3806                       Args);
3807 }
3808 
3809 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3810                                          OpenMPProcBindClauseKind ProcBind,
3811                                          SourceLocation Loc) {
3812   if (!CGF.HaveInsertPoint())
3813     return;
3814   // Constants for proc bind value accepted by the runtime.
3815   enum ProcBindTy {
3816     ProcBindFalse = 0,
3817     ProcBindTrue,
3818     ProcBindMaster,
3819     ProcBindClose,
3820     ProcBindSpread,
3821     ProcBindIntel,
3822     ProcBindDefault
3823   } RuntimeProcBind;
3824   switch (ProcBind) {
3825   case OMPC_PROC_BIND_master:
3826     RuntimeProcBind = ProcBindMaster;
3827     break;
3828   case OMPC_PROC_BIND_close:
3829     RuntimeProcBind = ProcBindClose;
3830     break;
3831   case OMPC_PROC_BIND_spread:
3832     RuntimeProcBind = ProcBindSpread;
3833     break;
3834   case OMPC_PROC_BIND_unknown:
3835     llvm_unreachable("Unsupported proc_bind value.");
3836   }
3837   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3838   llvm::Value *Args[] = {
3839       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3840       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3841   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3842 }
3843 
3844 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3845                                 SourceLocation Loc) {
3846   if (!CGF.HaveInsertPoint())
3847     return;
3848   // Build call void __kmpc_flush(ident_t *loc)
3849   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3850                       emitUpdateLocation(CGF, Loc));
3851 }
3852 
3853 namespace {
3854 /// Indexes of fields for type kmp_task_t.
3855 enum KmpTaskTFields {
3856   /// List of shared variables.
3857   KmpTaskTShareds,
3858   /// Task routine.
3859   KmpTaskTRoutine,
3860   /// Partition id for the untied tasks.
3861   KmpTaskTPartId,
3862   /// Function with call of destructors for private variables.
3863   Data1,
3864   /// Task priority.
3865   Data2,
3866   /// (Taskloops only) Lower bound.
3867   KmpTaskTLowerBound,
3868   /// (Taskloops only) Upper bound.
3869   KmpTaskTUpperBound,
3870   /// (Taskloops only) Stride.
3871   KmpTaskTStride,
3872   /// (Taskloops only) Is last iteration flag.
3873   KmpTaskTLastIter,
3874   /// (Taskloops only) Reduction data.
3875   KmpTaskTReductions,
3876 };
3877 } // anonymous namespace
3878 
3879 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3880   return OffloadEntriesTargetRegion.empty() &&
3881          OffloadEntriesDeviceGlobalVar.empty();
3882 }
3883 
3884 /// Initialize target region entry.
3885 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3886     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3887                                     StringRef ParentName, unsigned LineNum,
3888                                     unsigned Order) {
3889   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3890                                              "only required for the device "
3891                                              "code generation.");
3892   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3893       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3894                                    OMPTargetRegionEntryTargetRegion);
3895   ++OffloadingEntriesNum;
3896 }
3897 
3898 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3899     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3900                                   StringRef ParentName, unsigned LineNum,
3901                                   llvm::Constant *Addr, llvm::Constant *ID,
3902                                   OMPTargetRegionEntryKind Flags) {
3903   // If we are emitting code for a target, the entry is already initialized,
3904   // only has to be registered.
3905   if (CGM.getLangOpts().OpenMPIsDevice) {
3906     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3907       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3908           DiagnosticsEngine::Error,
3909           "Unable to find target region on line '%0' in the device code.");
3910       CGM.getDiags().Report(DiagID) << LineNum;
3911       return;
3912     }
3913     auto &Entry =
3914         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3915     assert(Entry.isValid() && "Entry not initialized!");
3916     Entry.setAddress(Addr);
3917     Entry.setID(ID);
3918     Entry.setFlags(Flags);
3919   } else {
3920     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3921     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3922     ++OffloadingEntriesNum;
3923   }
3924 }
3925 
3926 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3927     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3928     unsigned LineNum) const {
3929   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3930   if (PerDevice == OffloadEntriesTargetRegion.end())
3931     return false;
3932   auto PerFile = PerDevice->second.find(FileID);
3933   if (PerFile == PerDevice->second.end())
3934     return false;
3935   auto PerParentName = PerFile->second.find(ParentName);
3936   if (PerParentName == PerFile->second.end())
3937     return false;
3938   auto PerLine = PerParentName->second.find(LineNum);
3939   if (PerLine == PerParentName->second.end())
3940     return false;
3941   // Fail if this entry is already registered.
3942   if (PerLine->second.getAddress() || PerLine->second.getID())
3943     return false;
3944   return true;
3945 }
3946 
3947 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3948     const OffloadTargetRegionEntryInfoActTy &Action) {
3949   // Scan all target region entries and perform the provided action.
3950   for (const auto &D : OffloadEntriesTargetRegion)
3951     for (const auto &F : D.second)
3952       for (const auto &P : F.second)
3953         for (const auto &L : P.second)
3954           Action(D.first, F.first, P.first(), L.first, L.second);
3955 }
3956 
3957 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3958     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3959                                        OMPTargetGlobalVarEntryKind Flags,
3960                                        unsigned Order) {
3961   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3962                                              "only required for the device "
3963                                              "code generation.");
3964   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3965   ++OffloadingEntriesNum;
3966 }
3967 
3968 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3969     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3970                                      CharUnits VarSize,
3971                                      OMPTargetGlobalVarEntryKind Flags,
3972                                      llvm::GlobalValue::LinkageTypes Linkage) {
3973   if (CGM.getLangOpts().OpenMPIsDevice) {
3974     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3975     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3976            "Entry not initialized!");
3977     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3978            "Resetting with the new address.");
3979     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3980       if (Entry.getVarSize().isZero()) {
3981         Entry.setVarSize(VarSize);
3982         Entry.setLinkage(Linkage);
3983       }
3984       return;
3985     }
3986     Entry.setVarSize(VarSize);
3987     Entry.setLinkage(Linkage);
3988     Entry.setAddress(Addr);
3989   } else {
3990     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3991       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3992       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3993              "Entry not initialized!");
3994       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3995              "Resetting with the new address.");
3996       if (Entry.getVarSize().isZero()) {
3997         Entry.setVarSize(VarSize);
3998         Entry.setLinkage(Linkage);
3999       }
4000       return;
4001     }
4002     OffloadEntriesDeviceGlobalVar.try_emplace(
4003         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4004     ++OffloadingEntriesNum;
4005   }
4006 }
4007 
4008 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4009     actOnDeviceGlobalVarEntriesInfo(
4010         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4011   // Scan all target region entries and perform the provided action.
4012   for (const auto &E : OffloadEntriesDeviceGlobalVar)
4013     Action(E.getKey(), E.getValue());
4014 }
4015 
4016 void CGOpenMPRuntime::createOffloadEntry(
4017     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4018     llvm::GlobalValue::LinkageTypes Linkage) {
4019   StringRef Name = Addr->getName();
4020   llvm::Module &M = CGM.getModule();
4021   llvm::LLVMContext &C = M.getContext();
4022 
4023   // Create constant string with the name.
4024   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4025 
4026   std::string StringName = getName({"omp_offloading", "entry_name"});
4027   auto *Str = new llvm::GlobalVariable(
4028       M, StrPtrInit->getType(), /*isConstant=*/true,
4029       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4030   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4031 
4032   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4033                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4034                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4035                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4036                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4037   std::string EntryName = getName({"omp_offloading", "entry", ""});
4038   llvm::GlobalVariable *Entry = createGlobalStruct(
4039       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4040       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4041 
4042   // The entry has to be created in the section the linker expects it to be.
4043   Entry->setSection("omp_offloading_entries");
4044 }
4045 
4046 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4047   // Emit the offloading entries and metadata so that the device codegen side
4048   // can easily figure out what to emit. The produced metadata looks like
4049   // this:
4050   //
4051   // !omp_offload.info = !{!1, ...}
4052   //
4053   // Right now we only generate metadata for function that contain target
4054   // regions.
4055 
4056   // If we are in simd mode or there are no entries, we don't need to do
4057   // anything.
4058   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4059     return;
4060 
4061   llvm::Module &M = CGM.getModule();
4062   llvm::LLVMContext &C = M.getContext();
4063   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4064                          SourceLocation, StringRef>,
4065               16>
4066       OrderedEntries(OffloadEntriesInfoManager.size());
4067   llvm::SmallVector<StringRef, 16> ParentFunctions(
4068       OffloadEntriesInfoManager.size());
4069 
4070   // Auxiliary methods to create metadata values and strings.
4071   auto &&GetMDInt = [this](unsigned V) {
4072     return llvm::ConstantAsMetadata::get(
4073         llvm::ConstantInt::get(CGM.Int32Ty, V));
4074   };
4075 
4076   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4077 
4078   // Create the offloading info metadata node.
4079   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4080 
4081   // Create function that emits metadata for each target region entry;
4082   auto &&TargetRegionMetadataEmitter =
4083       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4084        &GetMDString](
4085           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4086           unsigned Line,
4087           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4088         // Generate metadata for target regions. Each entry of this metadata
4089         // contains:
4090         // - Entry 0 -> Kind of this type of metadata (0).
4091         // - Entry 1 -> Device ID of the file where the entry was identified.
4092         // - Entry 2 -> File ID of the file where the entry was identified.
4093         // - Entry 3 -> Mangled name of the function where the entry was
4094         // identified.
4095         // - Entry 4 -> Line in the file where the entry was identified.
4096         // - Entry 5 -> Order the entry was created.
4097         // The first element of the metadata node is the kind.
4098         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4099                                  GetMDInt(FileID),      GetMDString(ParentName),
4100                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4101 
4102         SourceLocation Loc;
4103         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4104                   E = CGM.getContext().getSourceManager().fileinfo_end();
4105              I != E; ++I) {
4106           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4107               I->getFirst()->getUniqueID().getFile() == FileID) {
4108             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4109                 I->getFirst(), Line, 1);
4110             break;
4111           }
4112         }
4113         // Save this entry in the right position of the ordered entries array.
4114         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4115         ParentFunctions[E.getOrder()] = ParentName;
4116 
4117         // Add metadata to the named metadata node.
4118         MD->addOperand(llvm::MDNode::get(C, Ops));
4119       };
4120 
4121   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4122       TargetRegionMetadataEmitter);
4123 
4124   // Create function that emits metadata for each device global variable entry;
4125   auto &&DeviceGlobalVarMetadataEmitter =
4126       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4127        MD](StringRef MangledName,
4128            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4129                &E) {
4130         // Generate metadata for global variables. Each entry of this metadata
4131         // contains:
4132         // - Entry 0 -> Kind of this type of metadata (1).
4133         // - Entry 1 -> Mangled name of the variable.
4134         // - Entry 2 -> Declare target kind.
4135         // - Entry 3 -> Order the entry was created.
4136         // The first element of the metadata node is the kind.
4137         llvm::Metadata *Ops[] = {
4138             GetMDInt(E.getKind()), GetMDString(MangledName),
4139             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4140 
4141         // Save this entry in the right position of the ordered entries array.
4142         OrderedEntries[E.getOrder()] =
4143             std::make_tuple(&E, SourceLocation(), MangledName);
4144 
4145         // Add metadata to the named metadata node.
4146         MD->addOperand(llvm::MDNode::get(C, Ops));
4147       };
4148 
4149   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4150       DeviceGlobalVarMetadataEmitter);
4151 
4152   for (const auto &E : OrderedEntries) {
4153     assert(std::get<0>(E) && "All ordered entries must exist!");
4154     if (const auto *CE =
4155             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4156                 std::get<0>(E))) {
4157       if (!CE->getID() || !CE->getAddress()) {
4158         // Do not blame the entry if the parent funtion is not emitted.
4159         StringRef FnName = ParentFunctions[CE->getOrder()];
4160         if (!CGM.GetGlobalValue(FnName))
4161           continue;
4162         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4163             DiagnosticsEngine::Error,
4164             "Offloading entry for target region in %0 is incorrect: either the "
4165             "address or the ID is invalid.");
4166         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4167         continue;
4168       }
4169       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4170                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4171     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4172                                              OffloadEntryInfoDeviceGlobalVar>(
4173                    std::get<0>(E))) {
4174       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4175           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4176               CE->getFlags());
4177       switch (Flags) {
4178       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4179         if (CGM.getLangOpts().OpenMPIsDevice &&
4180             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4181           continue;
4182         if (!CE->getAddress()) {
4183           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4184               DiagnosticsEngine::Error, "Offloading entry for declare target "
4185                                         "variable %0 is incorrect: the "
4186                                         "address is invalid.");
4187           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4188           continue;
4189         }
4190         // The vaiable has no definition - no need to add the entry.
4191         if (CE->getVarSize().isZero())
4192           continue;
4193         break;
4194       }
4195       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4196         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4197                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4198                "Declaret target link address is set.");
4199         if (CGM.getLangOpts().OpenMPIsDevice)
4200           continue;
4201         if (!CE->getAddress()) {
4202           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4203               DiagnosticsEngine::Error,
4204               "Offloading entry for declare target variable is incorrect: the "
4205               "address is invalid.");
4206           CGM.getDiags().Report(DiagID);
4207           continue;
4208         }
4209         break;
4210       }
4211       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4212                          CE->getVarSize().getQuantity(), Flags,
4213                          CE->getLinkage());
4214     } else {
4215       llvm_unreachable("Unsupported entry kind.");
4216     }
4217   }
4218 }
4219 
4220 /// Loads all the offload entries information from the host IR
4221 /// metadata.
4222 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4223   // If we are in target mode, load the metadata from the host IR. This code has
4224   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4225 
4226   if (!CGM.getLangOpts().OpenMPIsDevice)
4227     return;
4228 
4229   if (CGM.getLangOpts().OMPHostIRFile.empty())
4230     return;
4231 
4232   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4233   if (auto EC = Buf.getError()) {
4234     CGM.getDiags().Report(diag::err_cannot_open_file)
4235         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4236     return;
4237   }
4238 
4239   llvm::LLVMContext C;
4240   auto ME = expectedToErrorOrAndEmitErrors(
4241       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4242 
4243   if (auto EC = ME.getError()) {
4244     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4245         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4246     CGM.getDiags().Report(DiagID)
4247         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4248     return;
4249   }
4250 
4251   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4252   if (!MD)
4253     return;
4254 
4255   for (llvm::MDNode *MN : MD->operands()) {
4256     auto &&GetMDInt = [MN](unsigned Idx) {
4257       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4258       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4259     };
4260 
4261     auto &&GetMDString = [MN](unsigned Idx) {
4262       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4263       return V->getString();
4264     };
4265 
4266     switch (GetMDInt(0)) {
4267     default:
4268       llvm_unreachable("Unexpected metadata!");
4269       break;
4270     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4271         OffloadingEntryInfoTargetRegion:
4272       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4273           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4274           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4275           /*Order=*/GetMDInt(5));
4276       break;
4277     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4278         OffloadingEntryInfoDeviceGlobalVar:
4279       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4280           /*MangledName=*/GetMDString(1),
4281           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4282               /*Flags=*/GetMDInt(2)),
4283           /*Order=*/GetMDInt(3));
4284       break;
4285     }
4286   }
4287 }
4288 
4289 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4290   if (!KmpRoutineEntryPtrTy) {
4291     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4292     ASTContext &C = CGM.getContext();
4293     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4294     FunctionProtoType::ExtProtoInfo EPI;
4295     KmpRoutineEntryPtrQTy = C.getPointerType(
4296         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4297     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4298   }
4299 }
4300 
4301 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4302   // Make sure the type of the entry is already created. This is the type we
4303   // have to create:
4304   // struct __tgt_offload_entry{
4305   //   void      *addr;       // Pointer to the offload entry info.
4306   //                          // (function or global)
4307   //   char      *name;       // Name of the function or global.
4308   //   size_t     size;       // Size of the entry info (0 if it a function).
4309   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4310   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4311   // };
4312   if (TgtOffloadEntryQTy.isNull()) {
4313     ASTContext &C = CGM.getContext();
4314     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4315     RD->startDefinition();
4316     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4317     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4318     addFieldToRecordDecl(C, RD, C.getSizeType());
4319     addFieldToRecordDecl(
4320         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4321     addFieldToRecordDecl(
4322         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4323     RD->completeDefinition();
4324     RD->addAttr(PackedAttr::CreateImplicit(C));
4325     TgtOffloadEntryQTy = C.getRecordType(RD);
4326   }
4327   return TgtOffloadEntryQTy;
4328 }
4329 
4330 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4331   // These are the types we need to build:
4332   // struct __tgt_device_image{
4333   // void   *ImageStart;       // Pointer to the target code start.
4334   // void   *ImageEnd;         // Pointer to the target code end.
4335   // // We also add the host entries to the device image, as it may be useful
4336   // // for the target runtime to have access to that information.
4337   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4338   //                                       // the entries.
4339   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4340   //                                       // entries (non inclusive).
4341   // };
4342   if (TgtDeviceImageQTy.isNull()) {
4343     ASTContext &C = CGM.getContext();
4344     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4345     RD->startDefinition();
4346     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4347     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4348     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4349     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4350     RD->completeDefinition();
4351     TgtDeviceImageQTy = C.getRecordType(RD);
4352   }
4353   return TgtDeviceImageQTy;
4354 }
4355 
4356 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4357   // struct __tgt_bin_desc{
4358   //   int32_t              NumDevices;      // Number of devices supported.
4359   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4360   //                                         // (one per device).
4361   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4362   //                                         // entries.
4363   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4364   //                                         // entries (non inclusive).
4365   // };
4366   if (TgtBinaryDescriptorQTy.isNull()) {
4367     ASTContext &C = CGM.getContext();
4368     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4369     RD->startDefinition();
4370     addFieldToRecordDecl(
4371         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4372     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4373     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4374     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4375     RD->completeDefinition();
4376     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4377   }
4378   return TgtBinaryDescriptorQTy;
4379 }
4380 
4381 namespace {
4382 struct PrivateHelpersTy {
4383   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4384                    const VarDecl *PrivateElemInit)
4385       : Original(Original), PrivateCopy(PrivateCopy),
4386         PrivateElemInit(PrivateElemInit) {}
4387   const VarDecl *Original;
4388   const VarDecl *PrivateCopy;
4389   const VarDecl *PrivateElemInit;
4390 };
4391 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4392 } // anonymous namespace
4393 
4394 static RecordDecl *
4395 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4396   if (!Privates.empty()) {
4397     ASTContext &C = CGM.getContext();
4398     // Build struct .kmp_privates_t. {
4399     //         /*  private vars  */
4400     //       };
4401     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4402     RD->startDefinition();
4403     for (const auto &Pair : Privates) {
4404       const VarDecl *VD = Pair.second.Original;
4405       QualType Type = VD->getType().getNonReferenceType();
4406       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4407       if (VD->hasAttrs()) {
4408         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4409              E(VD->getAttrs().end());
4410              I != E; ++I)
4411           FD->addAttr(*I);
4412       }
4413     }
4414     RD->completeDefinition();
4415     return RD;
4416   }
4417   return nullptr;
4418 }
4419 
4420 static RecordDecl *
4421 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4422                          QualType KmpInt32Ty,
4423                          QualType KmpRoutineEntryPointerQTy) {
4424   ASTContext &C = CGM.getContext();
4425   // Build struct kmp_task_t {
4426   //         void *              shareds;
4427   //         kmp_routine_entry_t routine;
4428   //         kmp_int32           part_id;
4429   //         kmp_cmplrdata_t data1;
4430   //         kmp_cmplrdata_t data2;
4431   // For taskloops additional fields:
4432   //         kmp_uint64          lb;
4433   //         kmp_uint64          ub;
4434   //         kmp_int64           st;
4435   //         kmp_int32           liter;
4436   //         void *              reductions;
4437   //       };
4438   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4439   UD->startDefinition();
4440   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4441   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4442   UD->completeDefinition();
4443   QualType KmpCmplrdataTy = C.getRecordType(UD);
4444   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4445   RD->startDefinition();
4446   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4447   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4448   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4449   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4450   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4451   if (isOpenMPTaskLoopDirective(Kind)) {
4452     QualType KmpUInt64Ty =
4453         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4454     QualType KmpInt64Ty =
4455         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4456     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4457     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4458     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4459     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4460     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4461   }
4462   RD->completeDefinition();
4463   return RD;
4464 }
4465 
4466 static RecordDecl *
4467 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4468                                      ArrayRef<PrivateDataTy> Privates) {
4469   ASTContext &C = CGM.getContext();
4470   // Build struct kmp_task_t_with_privates {
4471   //         kmp_task_t task_data;
4472   //         .kmp_privates_t. privates;
4473   //       };
4474   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4475   RD->startDefinition();
4476   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4477   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4478     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4479   RD->completeDefinition();
4480   return RD;
4481 }
4482 
4483 /// Emit a proxy function which accepts kmp_task_t as the second
4484 /// argument.
4485 /// \code
4486 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4487 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4488 ///   For taskloops:
4489 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4490 ///   tt->reductions, tt->shareds);
4491 ///   return 0;
4492 /// }
4493 /// \endcode
4494 static llvm::Function *
4495 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4496                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4497                       QualType KmpTaskTWithPrivatesPtrQTy,
4498                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4499                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4500                       llvm::Value *TaskPrivatesMap) {
4501   ASTContext &C = CGM.getContext();
4502   FunctionArgList Args;
4503   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4504                             ImplicitParamDecl::Other);
4505   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4506                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4507                                 ImplicitParamDecl::Other);
4508   Args.push_back(&GtidArg);
4509   Args.push_back(&TaskTypeArg);
4510   const auto &TaskEntryFnInfo =
4511       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4512   llvm::FunctionType *TaskEntryTy =
4513       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4514   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4515   auto *TaskEntry = llvm::Function::Create(
4516       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4517   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4518   TaskEntry->setDoesNotRecurse();
4519   CodeGenFunction CGF(CGM);
4520   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4521                     Loc, Loc);
4522 
4523   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4524   // tt,
4525   // For taskloops:
4526   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4527   // tt->task_data.shareds);
4528   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4529       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4530   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4531       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4532       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4533   const auto *KmpTaskTWithPrivatesQTyRD =
4534       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4535   LValue Base =
4536       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4537   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4538   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4539   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4540   llvm::Value *PartidParam = PartIdLVal.getPointer();
4541 
4542   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4543   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4544   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4545       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4546       CGF.ConvertTypeForMem(SharedsPtrTy));
4547 
4548   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4549   llvm::Value *PrivatesParam;
4550   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4551     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4552     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4553         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4554   } else {
4555     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4556   }
4557 
4558   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4559                                TaskPrivatesMap,
4560                                CGF.Builder
4561                                    .CreatePointerBitCastOrAddrSpaceCast(
4562                                        TDBase.getAddress(), CGF.VoidPtrTy)
4563                                    .getPointer()};
4564   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4565                                           std::end(CommonArgs));
4566   if (isOpenMPTaskLoopDirective(Kind)) {
4567     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4568     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4569     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4570     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4571     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4572     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4573     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4574     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4575     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4576     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4577     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4578     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4579     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4580     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4581     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4582     CallArgs.push_back(LBParam);
4583     CallArgs.push_back(UBParam);
4584     CallArgs.push_back(StParam);
4585     CallArgs.push_back(LIParam);
4586     CallArgs.push_back(RParam);
4587   }
4588   CallArgs.push_back(SharedsParam);
4589 
4590   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4591                                                   CallArgs);
4592   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4593                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4594   CGF.FinishFunction();
4595   return TaskEntry;
4596 }
4597 
4598 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4599                                             SourceLocation Loc,
4600                                             QualType KmpInt32Ty,
4601                                             QualType KmpTaskTWithPrivatesPtrQTy,
4602                                             QualType KmpTaskTWithPrivatesQTy) {
4603   ASTContext &C = CGM.getContext();
4604   FunctionArgList Args;
4605   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4606                             ImplicitParamDecl::Other);
4607   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4608                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4609                                 ImplicitParamDecl::Other);
4610   Args.push_back(&GtidArg);
4611   Args.push_back(&TaskTypeArg);
4612   const auto &DestructorFnInfo =
4613       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4614   llvm::FunctionType *DestructorFnTy =
4615       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4616   std::string Name =
4617       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4618   auto *DestructorFn =
4619       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4620                              Name, &CGM.getModule());
4621   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4622                                     DestructorFnInfo);
4623   DestructorFn->setDoesNotRecurse();
4624   CodeGenFunction CGF(CGM);
4625   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4626                     Args, Loc, Loc);
4627 
4628   LValue Base = CGF.EmitLoadOfPointerLValue(
4629       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4630       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4631   const auto *KmpTaskTWithPrivatesQTyRD =
4632       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4633   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4634   Base = CGF.EmitLValueForField(Base, *FI);
4635   for (const auto *Field :
4636        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4637     if (QualType::DestructionKind DtorKind =
4638             Field->getType().isDestructedType()) {
4639       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4640       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4641     }
4642   }
4643   CGF.FinishFunction();
4644   return DestructorFn;
4645 }
4646 
4647 /// Emit a privates mapping function for correct handling of private and
4648 /// firstprivate variables.
4649 /// \code
4650 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4651 /// **noalias priv1,...,  <tyn> **noalias privn) {
4652 ///   *priv1 = &.privates.priv1;
4653 ///   ...;
4654 ///   *privn = &.privates.privn;
4655 /// }
4656 /// \endcode
4657 static llvm::Value *
4658 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4659                                ArrayRef<const Expr *> PrivateVars,
4660                                ArrayRef<const Expr *> FirstprivateVars,
4661                                ArrayRef<const Expr *> LastprivateVars,
4662                                QualType PrivatesQTy,
4663                                ArrayRef<PrivateDataTy> Privates) {
4664   ASTContext &C = CGM.getContext();
4665   FunctionArgList Args;
4666   ImplicitParamDecl TaskPrivatesArg(
4667       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4668       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4669       ImplicitParamDecl::Other);
4670   Args.push_back(&TaskPrivatesArg);
4671   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4672   unsigned Counter = 1;
4673   for (const Expr *E : PrivateVars) {
4674     Args.push_back(ImplicitParamDecl::Create(
4675         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4676         C.getPointerType(C.getPointerType(E->getType()))
4677             .withConst()
4678             .withRestrict(),
4679         ImplicitParamDecl::Other));
4680     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4681     PrivateVarsPos[VD] = Counter;
4682     ++Counter;
4683   }
4684   for (const Expr *E : FirstprivateVars) {
4685     Args.push_back(ImplicitParamDecl::Create(
4686         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4687         C.getPointerType(C.getPointerType(E->getType()))
4688             .withConst()
4689             .withRestrict(),
4690         ImplicitParamDecl::Other));
4691     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4692     PrivateVarsPos[VD] = Counter;
4693     ++Counter;
4694   }
4695   for (const Expr *E : LastprivateVars) {
4696     Args.push_back(ImplicitParamDecl::Create(
4697         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4698         C.getPointerType(C.getPointerType(E->getType()))
4699             .withConst()
4700             .withRestrict(),
4701         ImplicitParamDecl::Other));
4702     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4703     PrivateVarsPos[VD] = Counter;
4704     ++Counter;
4705   }
4706   const auto &TaskPrivatesMapFnInfo =
4707       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4708   llvm::FunctionType *TaskPrivatesMapTy =
4709       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4710   std::string Name =
4711       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4712   auto *TaskPrivatesMap = llvm::Function::Create(
4713       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4714       &CGM.getModule());
4715   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4716                                     TaskPrivatesMapFnInfo);
4717   if (CGM.getLangOpts().Optimize) {
4718     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4719     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4720     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4721   }
4722   CodeGenFunction CGF(CGM);
4723   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4724                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4725 
4726   // *privi = &.privates.privi;
4727   LValue Base = CGF.EmitLoadOfPointerLValue(
4728       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4729       TaskPrivatesArg.getType()->castAs<PointerType>());
4730   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4731   Counter = 0;
4732   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4733     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4734     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4735     LValue RefLVal =
4736         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4737     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4738         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4739     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4740     ++Counter;
4741   }
4742   CGF.FinishFunction();
4743   return TaskPrivatesMap;
4744 }
4745 
4746 /// Emit initialization for private variables in task-based directives.
4747 static void emitPrivatesInit(CodeGenFunction &CGF,
4748                              const OMPExecutableDirective &D,
4749                              Address KmpTaskSharedsPtr, LValue TDBase,
4750                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4751                              QualType SharedsTy, QualType SharedsPtrTy,
4752                              const OMPTaskDataTy &Data,
4753                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4754   ASTContext &C = CGF.getContext();
4755   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4756   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4757   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4758                                  ? OMPD_taskloop
4759                                  : OMPD_task;
4760   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4761   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4762   LValue SrcBase;
4763   bool IsTargetTask =
4764       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4765       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4766   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4767   // PointersArray and SizesArray. The original variables for these arrays are
4768   // not captured and we get their addresses explicitly.
4769   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4770       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4771     SrcBase = CGF.MakeAddrLValue(
4772         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4773             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4774         SharedsTy);
4775   }
4776   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4777   for (const PrivateDataTy &Pair : Privates) {
4778     const VarDecl *VD = Pair.second.PrivateCopy;
4779     const Expr *Init = VD->getAnyInitializer();
4780     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4781                              !CGF.isTrivialInitializer(Init)))) {
4782       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4783       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4784         const VarDecl *OriginalVD = Pair.second.Original;
4785         // Check if the variable is the target-based BasePointersArray,
4786         // PointersArray or SizesArray.
4787         LValue SharedRefLValue;
4788         QualType Type = PrivateLValue.getType();
4789         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4790         if (IsTargetTask && !SharedField) {
4791           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4792                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4793                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4794                          ->getNumParams() == 0 &&
4795                  isa<TranslationUnitDecl>(
4796                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4797                          ->getDeclContext()) &&
4798                  "Expected artificial target data variable.");
4799           SharedRefLValue =
4800               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4801         } else {
4802           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4803           SharedRefLValue = CGF.MakeAddrLValue(
4804               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4805               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4806               SharedRefLValue.getTBAAInfo());
4807         }
4808         if (Type->isArrayType()) {
4809           // Initialize firstprivate array.
4810           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4811             // Perform simple memcpy.
4812             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4813           } else {
4814             // Initialize firstprivate array using element-by-element
4815             // initialization.
4816             CGF.EmitOMPAggregateAssign(
4817                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4818                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4819                                                   Address SrcElement) {
4820                   // Clean up any temporaries needed by the initialization.
4821                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4822                   InitScope.addPrivate(
4823                       Elem, [SrcElement]() -> Address { return SrcElement; });
4824                   (void)InitScope.Privatize();
4825                   // Emit initialization for single element.
4826                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4827                       CGF, &CapturesInfo);
4828                   CGF.EmitAnyExprToMem(Init, DestElement,
4829                                        Init->getType().getQualifiers(),
4830                                        /*IsInitializer=*/false);
4831                 });
4832           }
4833         } else {
4834           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4835           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4836             return SharedRefLValue.getAddress();
4837           });
4838           (void)InitScope.Privatize();
4839           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4840           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4841                              /*capturedByInit=*/false);
4842         }
4843       } else {
4844         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4845       }
4846     }
4847     ++FI;
4848   }
4849 }
4850 
4851 /// Check if duplication function is required for taskloops.
4852 static bool checkInitIsRequired(CodeGenFunction &CGF,
4853                                 ArrayRef<PrivateDataTy> Privates) {
4854   bool InitRequired = false;
4855   for (const PrivateDataTy &Pair : Privates) {
4856     const VarDecl *VD = Pair.second.PrivateCopy;
4857     const Expr *Init = VD->getAnyInitializer();
4858     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4859                                     !CGF.isTrivialInitializer(Init));
4860     if (InitRequired)
4861       break;
4862   }
4863   return InitRequired;
4864 }
4865 
4866 
4867 /// Emit task_dup function (for initialization of
4868 /// private/firstprivate/lastprivate vars and last_iter flag)
4869 /// \code
4870 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4871 /// lastpriv) {
4872 /// // setup lastprivate flag
4873 ///    task_dst->last = lastpriv;
4874 /// // could be constructor calls here...
4875 /// }
4876 /// \endcode
4877 static llvm::Value *
4878 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4879                     const OMPExecutableDirective &D,
4880                     QualType KmpTaskTWithPrivatesPtrQTy,
4881                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4882                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4883                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4884                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4885   ASTContext &C = CGM.getContext();
4886   FunctionArgList Args;
4887   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4888                            KmpTaskTWithPrivatesPtrQTy,
4889                            ImplicitParamDecl::Other);
4890   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4891                            KmpTaskTWithPrivatesPtrQTy,
4892                            ImplicitParamDecl::Other);
4893   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4894                                 ImplicitParamDecl::Other);
4895   Args.push_back(&DstArg);
4896   Args.push_back(&SrcArg);
4897   Args.push_back(&LastprivArg);
4898   const auto &TaskDupFnInfo =
4899       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4900   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4901   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4902   auto *TaskDup = llvm::Function::Create(
4903       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4904   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4905   TaskDup->setDoesNotRecurse();
4906   CodeGenFunction CGF(CGM);
4907   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4908                     Loc);
4909 
4910   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4911       CGF.GetAddrOfLocalVar(&DstArg),
4912       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4913   // task_dst->liter = lastpriv;
4914   if (WithLastIter) {
4915     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4916     LValue Base = CGF.EmitLValueForField(
4917         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4918     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4919     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4920         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4921     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4922   }
4923 
4924   // Emit initial values for private copies (if any).
4925   assert(!Privates.empty());
4926   Address KmpTaskSharedsPtr = Address::invalid();
4927   if (!Data.FirstprivateVars.empty()) {
4928     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4929         CGF.GetAddrOfLocalVar(&SrcArg),
4930         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4931     LValue Base = CGF.EmitLValueForField(
4932         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4933     KmpTaskSharedsPtr = Address(
4934         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4935                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4936                                                   KmpTaskTShareds)),
4937                              Loc),
4938         CGF.getNaturalTypeAlignment(SharedsTy));
4939   }
4940   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4941                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4942   CGF.FinishFunction();
4943   return TaskDup;
4944 }
4945 
4946 /// Checks if destructor function is required to be generated.
4947 /// \return true if cleanups are required, false otherwise.
4948 static bool
4949 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4950   bool NeedsCleanup = false;
4951   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4952   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4953   for (const FieldDecl *FD : PrivateRD->fields()) {
4954     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4955     if (NeedsCleanup)
4956       break;
4957   }
4958   return NeedsCleanup;
4959 }
4960 
4961 CGOpenMPRuntime::TaskResultTy
4962 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4963                               const OMPExecutableDirective &D,
4964                               llvm::Function *TaskFunction, QualType SharedsTy,
4965                               Address Shareds, const OMPTaskDataTy &Data) {
4966   ASTContext &C = CGM.getContext();
4967   llvm::SmallVector<PrivateDataTy, 4> Privates;
4968   // Aggregate privates and sort them by the alignment.
4969   auto I = Data.PrivateCopies.begin();
4970   for (const Expr *E : Data.PrivateVars) {
4971     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4972     Privates.emplace_back(
4973         C.getDeclAlign(VD),
4974         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4975                          /*PrivateElemInit=*/nullptr));
4976     ++I;
4977   }
4978   I = Data.FirstprivateCopies.begin();
4979   auto IElemInitRef = Data.FirstprivateInits.begin();
4980   for (const Expr *E : Data.FirstprivateVars) {
4981     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4982     Privates.emplace_back(
4983         C.getDeclAlign(VD),
4984         PrivateHelpersTy(
4985             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4986             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4987     ++I;
4988     ++IElemInitRef;
4989   }
4990   I = Data.LastprivateCopies.begin();
4991   for (const Expr *E : Data.LastprivateVars) {
4992     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4993     Privates.emplace_back(
4994         C.getDeclAlign(VD),
4995         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4996                          /*PrivateElemInit=*/nullptr));
4997     ++I;
4998   }
4999   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5000     return L.first > R.first;
5001   });
5002   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5003   // Build type kmp_routine_entry_t (if not built yet).
5004   emitKmpRoutineEntryT(KmpInt32Ty);
5005   // Build type kmp_task_t (if not built yet).
5006   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5007     if (SavedKmpTaskloopTQTy.isNull()) {
5008       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5009           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5010     }
5011     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5012   } else {
5013     assert((D.getDirectiveKind() == OMPD_task ||
5014             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5015             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5016            "Expected taskloop, task or target directive");
5017     if (SavedKmpTaskTQTy.isNull()) {
5018       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5019           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5020     }
5021     KmpTaskTQTy = SavedKmpTaskTQTy;
5022   }
5023   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5024   // Build particular struct kmp_task_t for the given task.
5025   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5026       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5027   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5028   QualType KmpTaskTWithPrivatesPtrQTy =
5029       C.getPointerType(KmpTaskTWithPrivatesQTy);
5030   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5031   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5032       KmpTaskTWithPrivatesTy->getPointerTo();
5033   llvm::Value *KmpTaskTWithPrivatesTySize =
5034       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5035   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5036 
5037   // Emit initial values for private copies (if any).
5038   llvm::Value *TaskPrivatesMap = nullptr;
5039   llvm::Type *TaskPrivatesMapTy =
5040       std::next(TaskFunction->arg_begin(), 3)->getType();
5041   if (!Privates.empty()) {
5042     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5043     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5044         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5045         FI->getType(), Privates);
5046     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5047         TaskPrivatesMap, TaskPrivatesMapTy);
5048   } else {
5049     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5050         cast<llvm::PointerType>(TaskPrivatesMapTy));
5051   }
5052   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5053   // kmp_task_t *tt);
5054   llvm::Function *TaskEntry = emitProxyTaskFunction(
5055       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5056       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5057       TaskPrivatesMap);
5058 
5059   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5060   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5061   // kmp_routine_entry_t *task_entry);
5062   // Task flags. Format is taken from
5063   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5064   // description of kmp_tasking_flags struct.
5065   enum {
5066     TiedFlag = 0x1,
5067     FinalFlag = 0x2,
5068     DestructorsFlag = 0x8,
5069     PriorityFlag = 0x20
5070   };
5071   unsigned Flags = Data.Tied ? TiedFlag : 0;
5072   bool NeedsCleanup = false;
5073   if (!Privates.empty()) {
5074     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5075     if (NeedsCleanup)
5076       Flags = Flags | DestructorsFlag;
5077   }
5078   if (Data.Priority.getInt())
5079     Flags = Flags | PriorityFlag;
5080   llvm::Value *TaskFlags =
5081       Data.Final.getPointer()
5082           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5083                                      CGF.Builder.getInt32(FinalFlag),
5084                                      CGF.Builder.getInt32(/*C=*/0))
5085           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5086   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5087   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5088   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5089       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5090       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5091           TaskEntry, KmpRoutineEntryPtrTy)};
5092   llvm::Value *NewTask;
5093   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5094     // Check if we have any device clause associated with the directive.
5095     const Expr *Device = nullptr;
5096     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5097       Device = C->getDevice();
5098     // Emit device ID if any otherwise use default value.
5099     llvm::Value *DeviceID;
5100     if (Device)
5101       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5102                                            CGF.Int64Ty, /*isSigned=*/true);
5103     else
5104       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5105     AllocArgs.push_back(DeviceID);
5106     NewTask = CGF.EmitRuntimeCall(
5107       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5108   } else {
5109     NewTask = CGF.EmitRuntimeCall(
5110       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5111   }
5112   llvm::Value *NewTaskNewTaskTTy =
5113       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5114           NewTask, KmpTaskTWithPrivatesPtrTy);
5115   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5116                                                KmpTaskTWithPrivatesQTy);
5117   LValue TDBase =
5118       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5119   // Fill the data in the resulting kmp_task_t record.
5120   // Copy shareds if there are any.
5121   Address KmpTaskSharedsPtr = Address::invalid();
5122   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5123     KmpTaskSharedsPtr =
5124         Address(CGF.EmitLoadOfScalar(
5125                     CGF.EmitLValueForField(
5126                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5127                                            KmpTaskTShareds)),
5128                     Loc),
5129                 CGF.getNaturalTypeAlignment(SharedsTy));
5130     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5131     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5132     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5133   }
5134   // Emit initial values for private copies (if any).
5135   TaskResultTy Result;
5136   if (!Privates.empty()) {
5137     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5138                      SharedsTy, SharedsPtrTy, Data, Privates,
5139                      /*ForDup=*/false);
5140     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5141         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5142       Result.TaskDupFn = emitTaskDupFunction(
5143           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5144           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5145           /*WithLastIter=*/!Data.LastprivateVars.empty());
5146     }
5147   }
5148   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5149   enum { Priority = 0, Destructors = 1 };
5150   // Provide pointer to function with destructors for privates.
5151   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5152   const RecordDecl *KmpCmplrdataUD =
5153       (*FI)->getType()->getAsUnionType()->getDecl();
5154   if (NeedsCleanup) {
5155     llvm::Value *DestructorFn = emitDestructorsFunction(
5156         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5157         KmpTaskTWithPrivatesQTy);
5158     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5159     LValue DestructorsLV = CGF.EmitLValueForField(
5160         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5161     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5162                               DestructorFn, KmpRoutineEntryPtrTy),
5163                           DestructorsLV);
5164   }
5165   // Set priority.
5166   if (Data.Priority.getInt()) {
5167     LValue Data2LV = CGF.EmitLValueForField(
5168         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5169     LValue PriorityLV = CGF.EmitLValueForField(
5170         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5171     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5172   }
5173   Result.NewTask = NewTask;
5174   Result.TaskEntry = TaskEntry;
5175   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5176   Result.TDBase = TDBase;
5177   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5178   return Result;
5179 }
5180 
5181 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5182                                    const OMPExecutableDirective &D,
5183                                    llvm::Function *TaskFunction,
5184                                    QualType SharedsTy, Address Shareds,
5185                                    const Expr *IfCond,
5186                                    const OMPTaskDataTy &Data) {
5187   if (!CGF.HaveInsertPoint())
5188     return;
5189 
5190   TaskResultTy Result =
5191       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5192   llvm::Value *NewTask = Result.NewTask;
5193   llvm::Function *TaskEntry = Result.TaskEntry;
5194   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5195   LValue TDBase = Result.TDBase;
5196   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5197   ASTContext &C = CGM.getContext();
5198   // Process list of dependences.
5199   Address DependenciesArray = Address::invalid();
5200   unsigned NumDependencies = Data.Dependences.size();
5201   if (NumDependencies) {
5202     // Dependence kind for RTL.
5203     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5204     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5205     RecordDecl *KmpDependInfoRD;
5206     QualType FlagsTy =
5207         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5208     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5209     if (KmpDependInfoTy.isNull()) {
5210       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5211       KmpDependInfoRD->startDefinition();
5212       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5213       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5214       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5215       KmpDependInfoRD->completeDefinition();
5216       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5217     } else {
5218       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5219     }
5220     // Define type kmp_depend_info[<Dependences.size()>];
5221     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5222         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5223         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5224     // kmp_depend_info[<Dependences.size()>] deps;
5225     DependenciesArray =
5226         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5227     for (unsigned I = 0; I < NumDependencies; ++I) {
5228       const Expr *E = Data.Dependences[I].second;
5229       LValue Addr = CGF.EmitLValue(E);
5230       llvm::Value *Size;
5231       QualType Ty = E->getType();
5232       if (const auto *ASE =
5233               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5234         LValue UpAddrLVal =
5235             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5236         llvm::Value *UpAddr =
5237             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5238         llvm::Value *LowIntPtr =
5239             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5240         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5241         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5242       } else {
5243         Size = CGF.getTypeSize(Ty);
5244       }
5245       LValue Base = CGF.MakeAddrLValue(
5246           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5247           KmpDependInfoTy);
5248       // deps[i].base_addr = &<Dependences[i].second>;
5249       LValue BaseAddrLVal = CGF.EmitLValueForField(
5250           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5251       CGF.EmitStoreOfScalar(
5252           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5253           BaseAddrLVal);
5254       // deps[i].len = sizeof(<Dependences[i].second>);
5255       LValue LenLVal = CGF.EmitLValueForField(
5256           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5257       CGF.EmitStoreOfScalar(Size, LenLVal);
5258       // deps[i].flags = <Dependences[i].first>;
5259       RTLDependenceKindTy DepKind;
5260       switch (Data.Dependences[I].first) {
5261       case OMPC_DEPEND_in:
5262         DepKind = DepIn;
5263         break;
5264       // Out and InOut dependencies must use the same code.
5265       case OMPC_DEPEND_out:
5266       case OMPC_DEPEND_inout:
5267         DepKind = DepInOut;
5268         break;
5269       case OMPC_DEPEND_mutexinoutset:
5270         DepKind = DepMutexInOutSet;
5271         break;
5272       case OMPC_DEPEND_source:
5273       case OMPC_DEPEND_sink:
5274       case OMPC_DEPEND_unknown:
5275         llvm_unreachable("Unknown task dependence type");
5276       }
5277       LValue FlagsLVal = CGF.EmitLValueForField(
5278           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5279       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5280                             FlagsLVal);
5281     }
5282     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5283         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5284   }
5285 
5286   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5287   // libcall.
5288   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5289   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5290   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5291   // list is not empty
5292   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5293   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5294   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5295   llvm::Value *DepTaskArgs[7];
5296   if (NumDependencies) {
5297     DepTaskArgs[0] = UpLoc;
5298     DepTaskArgs[1] = ThreadID;
5299     DepTaskArgs[2] = NewTask;
5300     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5301     DepTaskArgs[4] = DependenciesArray.getPointer();
5302     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5303     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5304   }
5305   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5306                         &TaskArgs,
5307                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5308     if (!Data.Tied) {
5309       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5310       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5311       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5312     }
5313     if (NumDependencies) {
5314       CGF.EmitRuntimeCall(
5315           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5316     } else {
5317       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5318                           TaskArgs);
5319     }
5320     // Check if parent region is untied and build return for untied task;
5321     if (auto *Region =
5322             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5323       Region->emitUntiedSwitch(CGF);
5324   };
5325 
5326   llvm::Value *DepWaitTaskArgs[6];
5327   if (NumDependencies) {
5328     DepWaitTaskArgs[0] = UpLoc;
5329     DepWaitTaskArgs[1] = ThreadID;
5330     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5331     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5332     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5333     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5334   }
5335   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5336                         NumDependencies, &DepWaitTaskArgs,
5337                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5338     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5339     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5340     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5341     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5342     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5343     // is specified.
5344     if (NumDependencies)
5345       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5346                           DepWaitTaskArgs);
5347     // Call proxy_task_entry(gtid, new_task);
5348     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5349                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5350       Action.Enter(CGF);
5351       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5352       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5353                                                           OutlinedFnArgs);
5354     };
5355 
5356     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5357     // kmp_task_t *new_task);
5358     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5359     // kmp_task_t *new_task);
5360     RegionCodeGenTy RCG(CodeGen);
5361     CommonActionTy Action(
5362         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5363         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5364     RCG.setAction(Action);
5365     RCG(CGF);
5366   };
5367 
5368   if (IfCond) {
5369     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5370   } else {
5371     RegionCodeGenTy ThenRCG(ThenCodeGen);
5372     ThenRCG(CGF);
5373   }
5374 }
5375 
5376 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5377                                        const OMPLoopDirective &D,
5378                                        llvm::Function *TaskFunction,
5379                                        QualType SharedsTy, Address Shareds,
5380                                        const Expr *IfCond,
5381                                        const OMPTaskDataTy &Data) {
5382   if (!CGF.HaveInsertPoint())
5383     return;
5384   TaskResultTy Result =
5385       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5386   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5387   // libcall.
5388   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5389   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5390   // sched, kmp_uint64 grainsize, void *task_dup);
5391   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5392   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5393   llvm::Value *IfVal;
5394   if (IfCond) {
5395     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5396                                       /*isSigned=*/true);
5397   } else {
5398     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5399   }
5400 
5401   LValue LBLVal = CGF.EmitLValueForField(
5402       Result.TDBase,
5403       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5404   const auto *LBVar =
5405       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5406   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5407                        /*IsInitializer=*/true);
5408   LValue UBLVal = CGF.EmitLValueForField(
5409       Result.TDBase,
5410       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5411   const auto *UBVar =
5412       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5413   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5414                        /*IsInitializer=*/true);
5415   LValue StLVal = CGF.EmitLValueForField(
5416       Result.TDBase,
5417       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5418   const auto *StVar =
5419       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5420   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5421                        /*IsInitializer=*/true);
5422   // Store reductions address.
5423   LValue RedLVal = CGF.EmitLValueForField(
5424       Result.TDBase,
5425       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5426   if (Data.Reductions) {
5427     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5428   } else {
5429     CGF.EmitNullInitialization(RedLVal.getAddress(),
5430                                CGF.getContext().VoidPtrTy);
5431   }
5432   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5433   llvm::Value *TaskArgs[] = {
5434       UpLoc,
5435       ThreadID,
5436       Result.NewTask,
5437       IfVal,
5438       LBLVal.getPointer(),
5439       UBLVal.getPointer(),
5440       CGF.EmitLoadOfScalar(StLVal, Loc),
5441       llvm::ConstantInt::getSigned(
5442               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5443       llvm::ConstantInt::getSigned(
5444           CGF.IntTy, Data.Schedule.getPointer()
5445                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5446                          : NoSchedule),
5447       Data.Schedule.getPointer()
5448           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5449                                       /*isSigned=*/false)
5450           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5451       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5452                              Result.TaskDupFn, CGF.VoidPtrTy)
5453                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5454   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5455 }
5456 
5457 /// Emit reduction operation for each element of array (required for
5458 /// array sections) LHS op = RHS.
5459 /// \param Type Type of array.
5460 /// \param LHSVar Variable on the left side of the reduction operation
5461 /// (references element of array in original variable).
5462 /// \param RHSVar Variable on the right side of the reduction operation
5463 /// (references element of array in original variable).
5464 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5465 /// RHSVar.
5466 static void EmitOMPAggregateReduction(
5467     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5468     const VarDecl *RHSVar,
5469     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5470                                   const Expr *, const Expr *)> &RedOpGen,
5471     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5472     const Expr *UpExpr = nullptr) {
5473   // Perform element-by-element initialization.
5474   QualType ElementTy;
5475   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5476   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5477 
5478   // Drill down to the base element type on both arrays.
5479   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5480   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5481 
5482   llvm::Value *RHSBegin = RHSAddr.getPointer();
5483   llvm::Value *LHSBegin = LHSAddr.getPointer();
5484   // Cast from pointer to array type to pointer to single element.
5485   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5486   // The basic structure here is a while-do loop.
5487   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5488   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5489   llvm::Value *IsEmpty =
5490       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5491   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5492 
5493   // Enter the loop body, making that address the current address.
5494   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5495   CGF.EmitBlock(BodyBB);
5496 
5497   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5498 
5499   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5500       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5501   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5502   Address RHSElementCurrent =
5503       Address(RHSElementPHI,
5504               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5505 
5506   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5507       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5508   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5509   Address LHSElementCurrent =
5510       Address(LHSElementPHI,
5511               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5512 
5513   // Emit copy.
5514   CodeGenFunction::OMPPrivateScope Scope(CGF);
5515   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5516   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5517   Scope.Privatize();
5518   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5519   Scope.ForceCleanup();
5520 
5521   // Shift the address forward by one element.
5522   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5523       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5524   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5525       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5526   // Check whether we've reached the end.
5527   llvm::Value *Done =
5528       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5529   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5530   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5531   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5532 
5533   // Done.
5534   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5535 }
5536 
5537 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5538 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5539 /// UDR combiner function.
5540 static void emitReductionCombiner(CodeGenFunction &CGF,
5541                                   const Expr *ReductionOp) {
5542   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5543     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5544       if (const auto *DRE =
5545               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5546         if (const auto *DRD =
5547                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5548           std::pair<llvm::Function *, llvm::Function *> Reduction =
5549               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5550           RValue Func = RValue::get(Reduction.first);
5551           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5552           CGF.EmitIgnoredExpr(ReductionOp);
5553           return;
5554         }
5555   CGF.EmitIgnoredExpr(ReductionOp);
5556 }
5557 
5558 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5559     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5560     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5561     ArrayRef<const Expr *> ReductionOps) {
5562   ASTContext &C = CGM.getContext();
5563 
5564   // void reduction_func(void *LHSArg, void *RHSArg);
5565   FunctionArgList Args;
5566   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5567                            ImplicitParamDecl::Other);
5568   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5569                            ImplicitParamDecl::Other);
5570   Args.push_back(&LHSArg);
5571   Args.push_back(&RHSArg);
5572   const auto &CGFI =
5573       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5574   std::string Name = getName({"omp", "reduction", "reduction_func"});
5575   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5576                                     llvm::GlobalValue::InternalLinkage, Name,
5577                                     &CGM.getModule());
5578   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5579   Fn->setDoesNotRecurse();
5580   CodeGenFunction CGF(CGM);
5581   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5582 
5583   // Dst = (void*[n])(LHSArg);
5584   // Src = (void*[n])(RHSArg);
5585   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5586       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5587       ArgsType), CGF.getPointerAlign());
5588   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5589       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5590       ArgsType), CGF.getPointerAlign());
5591 
5592   //  ...
5593   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5594   //  ...
5595   CodeGenFunction::OMPPrivateScope Scope(CGF);
5596   auto IPriv = Privates.begin();
5597   unsigned Idx = 0;
5598   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5599     const auto *RHSVar =
5600         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5601     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5602       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5603     });
5604     const auto *LHSVar =
5605         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5606     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5607       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5608     });
5609     QualType PrivTy = (*IPriv)->getType();
5610     if (PrivTy->isVariablyModifiedType()) {
5611       // Get array size and emit VLA type.
5612       ++Idx;
5613       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5614       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5615       const VariableArrayType *VLA =
5616           CGF.getContext().getAsVariableArrayType(PrivTy);
5617       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5618       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5619           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5620       CGF.EmitVariablyModifiedType(PrivTy);
5621     }
5622   }
5623   Scope.Privatize();
5624   IPriv = Privates.begin();
5625   auto ILHS = LHSExprs.begin();
5626   auto IRHS = RHSExprs.begin();
5627   for (const Expr *E : ReductionOps) {
5628     if ((*IPriv)->getType()->isArrayType()) {
5629       // Emit reduction for array section.
5630       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5631       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5632       EmitOMPAggregateReduction(
5633           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5634           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5635             emitReductionCombiner(CGF, E);
5636           });
5637     } else {
5638       // Emit reduction for array subscript or single variable.
5639       emitReductionCombiner(CGF, E);
5640     }
5641     ++IPriv;
5642     ++ILHS;
5643     ++IRHS;
5644   }
5645   Scope.ForceCleanup();
5646   CGF.FinishFunction();
5647   return Fn;
5648 }
5649 
5650 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5651                                                   const Expr *ReductionOp,
5652                                                   const Expr *PrivateRef,
5653                                                   const DeclRefExpr *LHS,
5654                                                   const DeclRefExpr *RHS) {
5655   if (PrivateRef->getType()->isArrayType()) {
5656     // Emit reduction for array section.
5657     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5658     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5659     EmitOMPAggregateReduction(
5660         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5661         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5662           emitReductionCombiner(CGF, ReductionOp);
5663         });
5664   } else {
5665     // Emit reduction for array subscript or single variable.
5666     emitReductionCombiner(CGF, ReductionOp);
5667   }
5668 }
5669 
5670 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5671                                     ArrayRef<const Expr *> Privates,
5672                                     ArrayRef<const Expr *> LHSExprs,
5673                                     ArrayRef<const Expr *> RHSExprs,
5674                                     ArrayRef<const Expr *> ReductionOps,
5675                                     ReductionOptionsTy Options) {
5676   if (!CGF.HaveInsertPoint())
5677     return;
5678 
5679   bool WithNowait = Options.WithNowait;
5680   bool SimpleReduction = Options.SimpleReduction;
5681 
5682   // Next code should be emitted for reduction:
5683   //
5684   // static kmp_critical_name lock = { 0 };
5685   //
5686   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5687   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5688   //  ...
5689   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5690   //  *(Type<n>-1*)rhs[<n>-1]);
5691   // }
5692   //
5693   // ...
5694   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5695   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5696   // RedList, reduce_func, &<lock>)) {
5697   // case 1:
5698   //  ...
5699   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5700   //  ...
5701   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5702   // break;
5703   // case 2:
5704   //  ...
5705   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5706   //  ...
5707   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5708   // break;
5709   // default:;
5710   // }
5711   //
5712   // if SimpleReduction is true, only the next code is generated:
5713   //  ...
5714   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5715   //  ...
5716 
5717   ASTContext &C = CGM.getContext();
5718 
5719   if (SimpleReduction) {
5720     CodeGenFunction::RunCleanupsScope Scope(CGF);
5721     auto IPriv = Privates.begin();
5722     auto ILHS = LHSExprs.begin();
5723     auto IRHS = RHSExprs.begin();
5724     for (const Expr *E : ReductionOps) {
5725       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5726                                   cast<DeclRefExpr>(*IRHS));
5727       ++IPriv;
5728       ++ILHS;
5729       ++IRHS;
5730     }
5731     return;
5732   }
5733 
5734   // 1. Build a list of reduction variables.
5735   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5736   auto Size = RHSExprs.size();
5737   for (const Expr *E : Privates) {
5738     if (E->getType()->isVariablyModifiedType())
5739       // Reserve place for array size.
5740       ++Size;
5741   }
5742   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5743   QualType ReductionArrayTy =
5744       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5745                              /*IndexTypeQuals=*/0);
5746   Address ReductionList =
5747       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5748   auto IPriv = Privates.begin();
5749   unsigned Idx = 0;
5750   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5751     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5752     CGF.Builder.CreateStore(
5753         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5754             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5755         Elem);
5756     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5757       // Store array size.
5758       ++Idx;
5759       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5760       llvm::Value *Size = CGF.Builder.CreateIntCast(
5761           CGF.getVLASize(
5762                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5763               .NumElts,
5764           CGF.SizeTy, /*isSigned=*/false);
5765       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5766                               Elem);
5767     }
5768   }
5769 
5770   // 2. Emit reduce_func().
5771   llvm::Function *ReductionFn = emitReductionFunction(
5772       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5773       LHSExprs, RHSExprs, ReductionOps);
5774 
5775   // 3. Create static kmp_critical_name lock = { 0 };
5776   std::string Name = getName({"reduction"});
5777   llvm::Value *Lock = getCriticalRegionLock(Name);
5778 
5779   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5780   // RedList, reduce_func, &<lock>);
5781   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5782   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5783   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5784   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5785       ReductionList.getPointer(), CGF.VoidPtrTy);
5786   llvm::Value *Args[] = {
5787       IdentTLoc,                             // ident_t *<loc>
5788       ThreadId,                              // i32 <gtid>
5789       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5790       ReductionArrayTySize,                  // size_type sizeof(RedList)
5791       RL,                                    // void *RedList
5792       ReductionFn, // void (*) (void *, void *) <reduce_func>
5793       Lock         // kmp_critical_name *&<lock>
5794   };
5795   llvm::Value *Res = CGF.EmitRuntimeCall(
5796       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5797                                        : OMPRTL__kmpc_reduce),
5798       Args);
5799 
5800   // 5. Build switch(res)
5801   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5802   llvm::SwitchInst *SwInst =
5803       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5804 
5805   // 6. Build case 1:
5806   //  ...
5807   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5808   //  ...
5809   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5810   // break;
5811   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5812   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5813   CGF.EmitBlock(Case1BB);
5814 
5815   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5816   llvm::Value *EndArgs[] = {
5817       IdentTLoc, // ident_t *<loc>
5818       ThreadId,  // i32 <gtid>
5819       Lock       // kmp_critical_name *&<lock>
5820   };
5821   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5822                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5823     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5824     auto IPriv = Privates.begin();
5825     auto ILHS = LHSExprs.begin();
5826     auto IRHS = RHSExprs.begin();
5827     for (const Expr *E : ReductionOps) {
5828       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5829                                      cast<DeclRefExpr>(*IRHS));
5830       ++IPriv;
5831       ++ILHS;
5832       ++IRHS;
5833     }
5834   };
5835   RegionCodeGenTy RCG(CodeGen);
5836   CommonActionTy Action(
5837       nullptr, llvm::None,
5838       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5839                                        : OMPRTL__kmpc_end_reduce),
5840       EndArgs);
5841   RCG.setAction(Action);
5842   RCG(CGF);
5843 
5844   CGF.EmitBranch(DefaultBB);
5845 
5846   // 7. Build case 2:
5847   //  ...
5848   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5849   //  ...
5850   // break;
5851   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5852   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5853   CGF.EmitBlock(Case2BB);
5854 
5855   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5856                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5857     auto ILHS = LHSExprs.begin();
5858     auto IRHS = RHSExprs.begin();
5859     auto IPriv = Privates.begin();
5860     for (const Expr *E : ReductionOps) {
5861       const Expr *XExpr = nullptr;
5862       const Expr *EExpr = nullptr;
5863       const Expr *UpExpr = nullptr;
5864       BinaryOperatorKind BO = BO_Comma;
5865       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5866         if (BO->getOpcode() == BO_Assign) {
5867           XExpr = BO->getLHS();
5868           UpExpr = BO->getRHS();
5869         }
5870       }
5871       // Try to emit update expression as a simple atomic.
5872       const Expr *RHSExpr = UpExpr;
5873       if (RHSExpr) {
5874         // Analyze RHS part of the whole expression.
5875         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5876                 RHSExpr->IgnoreParenImpCasts())) {
5877           // If this is a conditional operator, analyze its condition for
5878           // min/max reduction operator.
5879           RHSExpr = ACO->getCond();
5880         }
5881         if (const auto *BORHS =
5882                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5883           EExpr = BORHS->getRHS();
5884           BO = BORHS->getOpcode();
5885         }
5886       }
5887       if (XExpr) {
5888         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5889         auto &&AtomicRedGen = [BO, VD,
5890                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5891                                     const Expr *EExpr, const Expr *UpExpr) {
5892           LValue X = CGF.EmitLValue(XExpr);
5893           RValue E;
5894           if (EExpr)
5895             E = CGF.EmitAnyExpr(EExpr);
5896           CGF.EmitOMPAtomicSimpleUpdateExpr(
5897               X, E, BO, /*IsXLHSInRHSPart=*/true,
5898               llvm::AtomicOrdering::Monotonic, Loc,
5899               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5900                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5901                 PrivateScope.addPrivate(
5902                     VD, [&CGF, VD, XRValue, Loc]() {
5903                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5904                       CGF.emitOMPSimpleStore(
5905                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5906                           VD->getType().getNonReferenceType(), Loc);
5907                       return LHSTemp;
5908                     });
5909                 (void)PrivateScope.Privatize();
5910                 return CGF.EmitAnyExpr(UpExpr);
5911               });
5912         };
5913         if ((*IPriv)->getType()->isArrayType()) {
5914           // Emit atomic reduction for array section.
5915           const auto *RHSVar =
5916               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5917           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5918                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5919         } else {
5920           // Emit atomic reduction for array subscript or single variable.
5921           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5922         }
5923       } else {
5924         // Emit as a critical region.
5925         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5926                                            const Expr *, const Expr *) {
5927           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5928           std::string Name = RT.getName({"atomic_reduction"});
5929           RT.emitCriticalRegion(
5930               CGF, Name,
5931               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5932                 Action.Enter(CGF);
5933                 emitReductionCombiner(CGF, E);
5934               },
5935               Loc);
5936         };
5937         if ((*IPriv)->getType()->isArrayType()) {
5938           const auto *LHSVar =
5939               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5940           const auto *RHSVar =
5941               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5942           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5943                                     CritRedGen);
5944         } else {
5945           CritRedGen(CGF, nullptr, nullptr, nullptr);
5946         }
5947       }
5948       ++ILHS;
5949       ++IRHS;
5950       ++IPriv;
5951     }
5952   };
5953   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5954   if (!WithNowait) {
5955     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5956     llvm::Value *EndArgs[] = {
5957         IdentTLoc, // ident_t *<loc>
5958         ThreadId,  // i32 <gtid>
5959         Lock       // kmp_critical_name *&<lock>
5960     };
5961     CommonActionTy Action(nullptr, llvm::None,
5962                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5963                           EndArgs);
5964     AtomicRCG.setAction(Action);
5965     AtomicRCG(CGF);
5966   } else {
5967     AtomicRCG(CGF);
5968   }
5969 
5970   CGF.EmitBranch(DefaultBB);
5971   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5972 }
5973 
5974 /// Generates unique name for artificial threadprivate variables.
5975 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5976 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5977                                       const Expr *Ref) {
5978   SmallString<256> Buffer;
5979   llvm::raw_svector_ostream Out(Buffer);
5980   const clang::DeclRefExpr *DE;
5981   const VarDecl *D = ::getBaseDecl(Ref, DE);
5982   if (!D)
5983     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5984   D = D->getCanonicalDecl();
5985   std::string Name = CGM.getOpenMPRuntime().getName(
5986       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5987   Out << Prefix << Name << "_"
5988       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5989   return Out.str();
5990 }
5991 
5992 /// Emits reduction initializer function:
5993 /// \code
5994 /// void @.red_init(void* %arg) {
5995 /// %0 = bitcast void* %arg to <type>*
5996 /// store <type> <init>, <type>* %0
5997 /// ret void
5998 /// }
5999 /// \endcode
6000 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6001                                            SourceLocation Loc,
6002                                            ReductionCodeGen &RCG, unsigned N) {
6003   ASTContext &C = CGM.getContext();
6004   FunctionArgList Args;
6005   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6006                           ImplicitParamDecl::Other);
6007   Args.emplace_back(&Param);
6008   const auto &FnInfo =
6009       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6010   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6011   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6012   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6013                                     Name, &CGM.getModule());
6014   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6015   Fn->setDoesNotRecurse();
6016   CodeGenFunction CGF(CGM);
6017   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6018   Address PrivateAddr = CGF.EmitLoadOfPointer(
6019       CGF.GetAddrOfLocalVar(&Param),
6020       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6021   llvm::Value *Size = nullptr;
6022   // If the size of the reduction item is non-constant, load it from global
6023   // threadprivate variable.
6024   if (RCG.getSizes(N).second) {
6025     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6026         CGF, CGM.getContext().getSizeType(),
6027         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6028     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6029                                 CGM.getContext().getSizeType(), Loc);
6030   }
6031   RCG.emitAggregateType(CGF, N, Size);
6032   LValue SharedLVal;
6033   // If initializer uses initializer from declare reduction construct, emit a
6034   // pointer to the address of the original reduction item (reuired by reduction
6035   // initializer)
6036   if (RCG.usesReductionInitializer(N)) {
6037     Address SharedAddr =
6038         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6039             CGF, CGM.getContext().VoidPtrTy,
6040             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6041     SharedAddr = CGF.EmitLoadOfPointer(
6042         SharedAddr,
6043         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6044     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6045   } else {
6046     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6047         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6048         CGM.getContext().VoidPtrTy);
6049   }
6050   // Emit the initializer:
6051   // %0 = bitcast void* %arg to <type>*
6052   // store <type> <init>, <type>* %0
6053   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6054                          [](CodeGenFunction &) { return false; });
6055   CGF.FinishFunction();
6056   return Fn;
6057 }
6058 
6059 /// Emits reduction combiner function:
6060 /// \code
6061 /// void @.red_comb(void* %arg0, void* %arg1) {
6062 /// %lhs = bitcast void* %arg0 to <type>*
6063 /// %rhs = bitcast void* %arg1 to <type>*
6064 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6065 /// store <type> %2, <type>* %lhs
6066 /// ret void
6067 /// }
6068 /// \endcode
6069 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6070                                            SourceLocation Loc,
6071                                            ReductionCodeGen &RCG, unsigned N,
6072                                            const Expr *ReductionOp,
6073                                            const Expr *LHS, const Expr *RHS,
6074                                            const Expr *PrivateRef) {
6075   ASTContext &C = CGM.getContext();
6076   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6077   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6078   FunctionArgList Args;
6079   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6080                                C.VoidPtrTy, ImplicitParamDecl::Other);
6081   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6082                             ImplicitParamDecl::Other);
6083   Args.emplace_back(&ParamInOut);
6084   Args.emplace_back(&ParamIn);
6085   const auto &FnInfo =
6086       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6087   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6088   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6089   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6090                                     Name, &CGM.getModule());
6091   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6092   Fn->setDoesNotRecurse();
6093   CodeGenFunction CGF(CGM);
6094   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6095   llvm::Value *Size = nullptr;
6096   // If the size of the reduction item is non-constant, load it from global
6097   // threadprivate variable.
6098   if (RCG.getSizes(N).second) {
6099     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6100         CGF, CGM.getContext().getSizeType(),
6101         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6102     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6103                                 CGM.getContext().getSizeType(), Loc);
6104   }
6105   RCG.emitAggregateType(CGF, N, Size);
6106   // Remap lhs and rhs variables to the addresses of the function arguments.
6107   // %lhs = bitcast void* %arg0 to <type>*
6108   // %rhs = bitcast void* %arg1 to <type>*
6109   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6110   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6111     // Pull out the pointer to the variable.
6112     Address PtrAddr = CGF.EmitLoadOfPointer(
6113         CGF.GetAddrOfLocalVar(&ParamInOut),
6114         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6115     return CGF.Builder.CreateElementBitCast(
6116         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6117   });
6118   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6119     // Pull out the pointer to the variable.
6120     Address PtrAddr = CGF.EmitLoadOfPointer(
6121         CGF.GetAddrOfLocalVar(&ParamIn),
6122         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6123     return CGF.Builder.CreateElementBitCast(
6124         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6125   });
6126   PrivateScope.Privatize();
6127   // Emit the combiner body:
6128   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6129   // store <type> %2, <type>* %lhs
6130   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6131       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6132       cast<DeclRefExpr>(RHS));
6133   CGF.FinishFunction();
6134   return Fn;
6135 }
6136 
6137 /// Emits reduction finalizer function:
6138 /// \code
6139 /// void @.red_fini(void* %arg) {
6140 /// %0 = bitcast void* %arg to <type>*
6141 /// <destroy>(<type>* %0)
6142 /// ret void
6143 /// }
6144 /// \endcode
6145 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6146                                            SourceLocation Loc,
6147                                            ReductionCodeGen &RCG, unsigned N) {
6148   if (!RCG.needCleanups(N))
6149     return nullptr;
6150   ASTContext &C = CGM.getContext();
6151   FunctionArgList Args;
6152   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6153                           ImplicitParamDecl::Other);
6154   Args.emplace_back(&Param);
6155   const auto &FnInfo =
6156       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6157   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6158   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6159   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6160                                     Name, &CGM.getModule());
6161   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6162   Fn->setDoesNotRecurse();
6163   CodeGenFunction CGF(CGM);
6164   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6165   Address PrivateAddr = CGF.EmitLoadOfPointer(
6166       CGF.GetAddrOfLocalVar(&Param),
6167       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6168   llvm::Value *Size = nullptr;
6169   // If the size of the reduction item is non-constant, load it from global
6170   // threadprivate variable.
6171   if (RCG.getSizes(N).second) {
6172     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6173         CGF, CGM.getContext().getSizeType(),
6174         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6175     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6176                                 CGM.getContext().getSizeType(), Loc);
6177   }
6178   RCG.emitAggregateType(CGF, N, Size);
6179   // Emit the finalizer body:
6180   // <destroy>(<type>* %0)
6181   RCG.emitCleanups(CGF, N, PrivateAddr);
6182   CGF.FinishFunction();
6183   return Fn;
6184 }
6185 
6186 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6187     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6188     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6189   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6190     return nullptr;
6191 
6192   // Build typedef struct:
6193   // kmp_task_red_input {
6194   //   void *reduce_shar; // shared reduction item
6195   //   size_t reduce_size; // size of data item
6196   //   void *reduce_init; // data initialization routine
6197   //   void *reduce_fini; // data finalization routine
6198   //   void *reduce_comb; // data combiner routine
6199   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6200   // } kmp_task_red_input_t;
6201   ASTContext &C = CGM.getContext();
6202   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6203   RD->startDefinition();
6204   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6205   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6206   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6207   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6208   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6209   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6210       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6211   RD->completeDefinition();
6212   QualType RDType = C.getRecordType(RD);
6213   unsigned Size = Data.ReductionVars.size();
6214   llvm::APInt ArraySize(/*numBits=*/64, Size);
6215   QualType ArrayRDType = C.getConstantArrayType(
6216       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6217   // kmp_task_red_input_t .rd_input.[Size];
6218   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6219   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6220                        Data.ReductionOps);
6221   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6222     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6223     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6224                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6225     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6226         TaskRedInput.getPointer(), Idxs,
6227         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6228         ".rd_input.gep.");
6229     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6230     // ElemLVal.reduce_shar = &Shareds[Cnt];
6231     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6232     RCG.emitSharedLValue(CGF, Cnt);
6233     llvm::Value *CastedShared =
6234         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6235     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6236     RCG.emitAggregateType(CGF, Cnt);
6237     llvm::Value *SizeValInChars;
6238     llvm::Value *SizeVal;
6239     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6240     // We use delayed creation/initialization for VLAs, array sections and
6241     // custom reduction initializations. It is required because runtime does not
6242     // provide the way to pass the sizes of VLAs/array sections to
6243     // initializer/combiner/finalizer functions and does not pass the pointer to
6244     // original reduction item to the initializer. Instead threadprivate global
6245     // variables are used to store these values and use them in the functions.
6246     bool DelayedCreation = !!SizeVal;
6247     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6248                                                /*isSigned=*/false);
6249     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6250     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6251     // ElemLVal.reduce_init = init;
6252     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6253     llvm::Value *InitAddr =
6254         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6255     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6256     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6257     // ElemLVal.reduce_fini = fini;
6258     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6259     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6260     llvm::Value *FiniAddr = Fini
6261                                 ? CGF.EmitCastToVoidPtr(Fini)
6262                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6263     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6264     // ElemLVal.reduce_comb = comb;
6265     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6266     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6267         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6268         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6269     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6270     // ElemLVal.flags = 0;
6271     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6272     if (DelayedCreation) {
6273       CGF.EmitStoreOfScalar(
6274           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6275           FlagsLVal);
6276     } else
6277       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6278   }
6279   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6280   // *data);
6281   llvm::Value *Args[] = {
6282       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6283                                 /*isSigned=*/true),
6284       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6285       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6286                                                       CGM.VoidPtrTy)};
6287   return CGF.EmitRuntimeCall(
6288       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6289 }
6290 
6291 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6292                                               SourceLocation Loc,
6293                                               ReductionCodeGen &RCG,
6294                                               unsigned N) {
6295   auto Sizes = RCG.getSizes(N);
6296   // Emit threadprivate global variable if the type is non-constant
6297   // (Sizes.second = nullptr).
6298   if (Sizes.second) {
6299     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6300                                                      /*isSigned=*/false);
6301     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6302         CGF, CGM.getContext().getSizeType(),
6303         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6304     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6305   }
6306   // Store address of the original reduction item if custom initializer is used.
6307   if (RCG.usesReductionInitializer(N)) {
6308     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6309         CGF, CGM.getContext().VoidPtrTy,
6310         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6311     CGF.Builder.CreateStore(
6312         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6313             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6314         SharedAddr, /*IsVolatile=*/false);
6315   }
6316 }
6317 
6318 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6319                                               SourceLocation Loc,
6320                                               llvm::Value *ReductionsPtr,
6321                                               LValue SharedLVal) {
6322   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6323   // *d);
6324   llvm::Value *Args[] = {
6325       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6326                                 /*isSigned=*/true),
6327       ReductionsPtr,
6328       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6329                                                       CGM.VoidPtrTy)};
6330   return Address(
6331       CGF.EmitRuntimeCall(
6332           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6333       SharedLVal.getAlignment());
6334 }
6335 
6336 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6337                                        SourceLocation Loc) {
6338   if (!CGF.HaveInsertPoint())
6339     return;
6340   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6341   // global_tid);
6342   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6343   // Ignore return result until untied tasks are supported.
6344   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6345   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6346     Region->emitUntiedSwitch(CGF);
6347 }
6348 
6349 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6350                                            OpenMPDirectiveKind InnerKind,
6351                                            const RegionCodeGenTy &CodeGen,
6352                                            bool HasCancel) {
6353   if (!CGF.HaveInsertPoint())
6354     return;
6355   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6356   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6357 }
6358 
6359 namespace {
6360 enum RTCancelKind {
6361   CancelNoreq = 0,
6362   CancelParallel = 1,
6363   CancelLoop = 2,
6364   CancelSections = 3,
6365   CancelTaskgroup = 4
6366 };
6367 } // anonymous namespace
6368 
6369 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6370   RTCancelKind CancelKind = CancelNoreq;
6371   if (CancelRegion == OMPD_parallel)
6372     CancelKind = CancelParallel;
6373   else if (CancelRegion == OMPD_for)
6374     CancelKind = CancelLoop;
6375   else if (CancelRegion == OMPD_sections)
6376     CancelKind = CancelSections;
6377   else {
6378     assert(CancelRegion == OMPD_taskgroup);
6379     CancelKind = CancelTaskgroup;
6380   }
6381   return CancelKind;
6382 }
6383 
6384 void CGOpenMPRuntime::emitCancellationPointCall(
6385     CodeGenFunction &CGF, SourceLocation Loc,
6386     OpenMPDirectiveKind CancelRegion) {
6387   if (!CGF.HaveInsertPoint())
6388     return;
6389   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6390   // global_tid, kmp_int32 cncl_kind);
6391   if (auto *OMPRegionInfo =
6392           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6393     // For 'cancellation point taskgroup', the task region info may not have a
6394     // cancel. This may instead happen in another adjacent task.
6395     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6396       llvm::Value *Args[] = {
6397           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6398           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6399       // Ignore return result until untied tasks are supported.
6400       llvm::Value *Result = CGF.EmitRuntimeCall(
6401           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6402       // if (__kmpc_cancellationpoint()) {
6403       //   exit from construct;
6404       // }
6405       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6406       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6407       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6408       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6409       CGF.EmitBlock(ExitBB);
6410       // exit from construct;
6411       CodeGenFunction::JumpDest CancelDest =
6412           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6413       CGF.EmitBranchThroughCleanup(CancelDest);
6414       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6415     }
6416   }
6417 }
6418 
6419 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6420                                      const Expr *IfCond,
6421                                      OpenMPDirectiveKind CancelRegion) {
6422   if (!CGF.HaveInsertPoint())
6423     return;
6424   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6425   // kmp_int32 cncl_kind);
6426   if (auto *OMPRegionInfo =
6427           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6428     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6429                                                         PrePostActionTy &) {
6430       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6431       llvm::Value *Args[] = {
6432           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6433           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6434       // Ignore return result until untied tasks are supported.
6435       llvm::Value *Result = CGF.EmitRuntimeCall(
6436           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6437       // if (__kmpc_cancel()) {
6438       //   exit from construct;
6439       // }
6440       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6441       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6442       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6443       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6444       CGF.EmitBlock(ExitBB);
6445       // exit from construct;
6446       CodeGenFunction::JumpDest CancelDest =
6447           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6448       CGF.EmitBranchThroughCleanup(CancelDest);
6449       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6450     };
6451     if (IfCond) {
6452       emitOMPIfClause(CGF, IfCond, ThenGen,
6453                       [](CodeGenFunction &, PrePostActionTy &) {});
6454     } else {
6455       RegionCodeGenTy ThenRCG(ThenGen);
6456       ThenRCG(CGF);
6457     }
6458   }
6459 }
6460 
6461 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6462     const OMPExecutableDirective &D, StringRef ParentName,
6463     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6464     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6465   assert(!ParentName.empty() && "Invalid target region parent name!");
6466   HasEmittedTargetRegion = true;
6467   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6468                                    IsOffloadEntry, CodeGen);
6469 }
6470 
6471 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6472     const OMPExecutableDirective &D, StringRef ParentName,
6473     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6474     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6475   // Create a unique name for the entry function using the source location
6476   // information of the current target region. The name will be something like:
6477   //
6478   // __omp_offloading_DD_FFFF_PP_lBB
6479   //
6480   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6481   // mangled name of the function that encloses the target region and BB is the
6482   // line number of the target region.
6483 
6484   unsigned DeviceID;
6485   unsigned FileID;
6486   unsigned Line;
6487   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6488                            Line);
6489   SmallString<64> EntryFnName;
6490   {
6491     llvm::raw_svector_ostream OS(EntryFnName);
6492     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6493        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6494   }
6495 
6496   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6497 
6498   CodeGenFunction CGF(CGM, true);
6499   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6500   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6501 
6502   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6503 
6504   // If this target outline function is not an offload entry, we don't need to
6505   // register it.
6506   if (!IsOffloadEntry)
6507     return;
6508 
6509   // The target region ID is used by the runtime library to identify the current
6510   // target region, so it only has to be unique and not necessarily point to
6511   // anything. It could be the pointer to the outlined function that implements
6512   // the target region, but we aren't using that so that the compiler doesn't
6513   // need to keep that, and could therefore inline the host function if proven
6514   // worthwhile during optimization. In the other hand, if emitting code for the
6515   // device, the ID has to be the function address so that it can retrieved from
6516   // the offloading entry and launched by the runtime library. We also mark the
6517   // outlined function to have external linkage in case we are emitting code for
6518   // the device, because these functions will be entry points to the device.
6519 
6520   if (CGM.getLangOpts().OpenMPIsDevice) {
6521     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6522     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6523     OutlinedFn->setDSOLocal(false);
6524   } else {
6525     std::string Name = getName({EntryFnName, "region_id"});
6526     OutlinedFnID = new llvm::GlobalVariable(
6527         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6528         llvm::GlobalValue::WeakAnyLinkage,
6529         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6530   }
6531 
6532   // Register the information for the entry associated with this target region.
6533   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6534       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6535       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6536 }
6537 
6538 /// Checks if the expression is constant or does not have non-trivial function
6539 /// calls.
6540 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6541   // We can skip constant expressions.
6542   // We can skip expressions with trivial calls or simple expressions.
6543   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6544           !E->hasNonTrivialCall(Ctx)) &&
6545          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6546 }
6547 
6548 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6549                                                     const Stmt *Body) {
6550   const Stmt *Child = Body->IgnoreContainers();
6551   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6552     Child = nullptr;
6553     for (const Stmt *S : C->body()) {
6554       if (const auto *E = dyn_cast<Expr>(S)) {
6555         if (isTrivial(Ctx, E))
6556           continue;
6557       }
6558       // Some of the statements can be ignored.
6559       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6560           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6561         continue;
6562       // Analyze declarations.
6563       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6564         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6565               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6566                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6567                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6568                   isa<UsingDirectiveDecl>(D) ||
6569                   isa<OMPDeclareReductionDecl>(D) ||
6570                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6571                 return true;
6572               const auto *VD = dyn_cast<VarDecl>(D);
6573               if (!VD)
6574                 return false;
6575               return VD->isConstexpr() ||
6576                      ((VD->getType().isTrivialType(Ctx) ||
6577                        VD->getType()->isReferenceType()) &&
6578                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6579             }))
6580           continue;
6581       }
6582       // Found multiple children - cannot get the one child only.
6583       if (Child)
6584         return nullptr;
6585       Child = S;
6586     }
6587     if (Child)
6588       Child = Child->IgnoreContainers();
6589   }
6590   return Child;
6591 }
6592 
6593 /// Emit the number of teams for a target directive.  Inspect the num_teams
6594 /// clause associated with a teams construct combined or closely nested
6595 /// with the target directive.
6596 ///
6597 /// Emit a team of size one for directives such as 'target parallel' that
6598 /// have no associated teams construct.
6599 ///
6600 /// Otherwise, return nullptr.
6601 static llvm::Value *
6602 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6603                                const OMPExecutableDirective &D) {
6604   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6605          "Clauses associated with the teams directive expected to be emitted "
6606          "only for the host!");
6607   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6608   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6609          "Expected target-based executable directive.");
6610   CGBuilderTy &Bld = CGF.Builder;
6611   switch (DirectiveKind) {
6612   case OMPD_target: {
6613     const auto *CS = D.getInnermostCapturedStmt();
6614     const auto *Body =
6615         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6616     const Stmt *ChildStmt =
6617         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6618     if (const auto *NestedDir =
6619             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6620       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6621         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6622           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6623           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6624           const Expr *NumTeams =
6625               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6626           llvm::Value *NumTeamsVal =
6627               CGF.EmitScalarExpr(NumTeams,
6628                                  /*IgnoreResultAssign*/ true);
6629           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6630                                    /*isSigned=*/true);
6631         }
6632         return Bld.getInt32(0);
6633       }
6634       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6635           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6636         return Bld.getInt32(1);
6637       return Bld.getInt32(0);
6638     }
6639     return nullptr;
6640   }
6641   case OMPD_target_teams:
6642   case OMPD_target_teams_distribute:
6643   case OMPD_target_teams_distribute_simd:
6644   case OMPD_target_teams_distribute_parallel_for:
6645   case OMPD_target_teams_distribute_parallel_for_simd: {
6646     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6647       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6648       const Expr *NumTeams =
6649           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6650       llvm::Value *NumTeamsVal =
6651           CGF.EmitScalarExpr(NumTeams,
6652                              /*IgnoreResultAssign*/ true);
6653       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6654                                /*isSigned=*/true);
6655     }
6656     return Bld.getInt32(0);
6657   }
6658   case OMPD_target_parallel:
6659   case OMPD_target_parallel_for:
6660   case OMPD_target_parallel_for_simd:
6661   case OMPD_target_simd:
6662     return Bld.getInt32(1);
6663   case OMPD_parallel:
6664   case OMPD_for:
6665   case OMPD_parallel_for:
6666   case OMPD_parallel_sections:
6667   case OMPD_for_simd:
6668   case OMPD_parallel_for_simd:
6669   case OMPD_cancel:
6670   case OMPD_cancellation_point:
6671   case OMPD_ordered:
6672   case OMPD_threadprivate:
6673   case OMPD_allocate:
6674   case OMPD_task:
6675   case OMPD_simd:
6676   case OMPD_sections:
6677   case OMPD_section:
6678   case OMPD_single:
6679   case OMPD_master:
6680   case OMPD_critical:
6681   case OMPD_taskyield:
6682   case OMPD_barrier:
6683   case OMPD_taskwait:
6684   case OMPD_taskgroup:
6685   case OMPD_atomic:
6686   case OMPD_flush:
6687   case OMPD_teams:
6688   case OMPD_target_data:
6689   case OMPD_target_exit_data:
6690   case OMPD_target_enter_data:
6691   case OMPD_distribute:
6692   case OMPD_distribute_simd:
6693   case OMPD_distribute_parallel_for:
6694   case OMPD_distribute_parallel_for_simd:
6695   case OMPD_teams_distribute:
6696   case OMPD_teams_distribute_simd:
6697   case OMPD_teams_distribute_parallel_for:
6698   case OMPD_teams_distribute_parallel_for_simd:
6699   case OMPD_target_update:
6700   case OMPD_declare_simd:
6701   case OMPD_declare_variant:
6702   case OMPD_declare_target:
6703   case OMPD_end_declare_target:
6704   case OMPD_declare_reduction:
6705   case OMPD_declare_mapper:
6706   case OMPD_taskloop:
6707   case OMPD_taskloop_simd:
6708   case OMPD_master_taskloop:
6709   case OMPD_parallel_master_taskloop:
6710   case OMPD_requires:
6711   case OMPD_unknown:
6712     break;
6713   }
6714   llvm_unreachable("Unexpected directive kind.");
6715 }
6716 
6717 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6718                                   llvm::Value *DefaultThreadLimitVal) {
6719   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6720       CGF.getContext(), CS->getCapturedStmt());
6721   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6722     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6723       llvm::Value *NumThreads = nullptr;
6724       llvm::Value *CondVal = nullptr;
6725       // Handle if clause. If if clause present, the number of threads is
6726       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6727       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6728         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6729         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6730         const OMPIfClause *IfClause = nullptr;
6731         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6732           if (C->getNameModifier() == OMPD_unknown ||
6733               C->getNameModifier() == OMPD_parallel) {
6734             IfClause = C;
6735             break;
6736           }
6737         }
6738         if (IfClause) {
6739           const Expr *Cond = IfClause->getCondition();
6740           bool Result;
6741           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6742             if (!Result)
6743               return CGF.Builder.getInt32(1);
6744           } else {
6745             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6746             if (const auto *PreInit =
6747                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6748               for (const auto *I : PreInit->decls()) {
6749                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6750                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6751                 } else {
6752                   CodeGenFunction::AutoVarEmission Emission =
6753                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6754                   CGF.EmitAutoVarCleanups(Emission);
6755                 }
6756               }
6757             }
6758             CondVal = CGF.EvaluateExprAsBool(Cond);
6759           }
6760         }
6761       }
6762       // Check the value of num_threads clause iff if clause was not specified
6763       // or is not evaluated to false.
6764       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6765         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6766         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767         const auto *NumThreadsClause =
6768             Dir->getSingleClause<OMPNumThreadsClause>();
6769         CodeGenFunction::LexicalScope Scope(
6770             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6771         if (const auto *PreInit =
6772                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6773           for (const auto *I : PreInit->decls()) {
6774             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6775               CGF.EmitVarDecl(cast<VarDecl>(*I));
6776             } else {
6777               CodeGenFunction::AutoVarEmission Emission =
6778                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6779               CGF.EmitAutoVarCleanups(Emission);
6780             }
6781           }
6782         }
6783         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6784         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6785                                                /*isSigned=*/false);
6786         if (DefaultThreadLimitVal)
6787           NumThreads = CGF.Builder.CreateSelect(
6788               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6789               DefaultThreadLimitVal, NumThreads);
6790       } else {
6791         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6792                                            : CGF.Builder.getInt32(0);
6793       }
6794       // Process condition of the if clause.
6795       if (CondVal) {
6796         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6797                                               CGF.Builder.getInt32(1));
6798       }
6799       return NumThreads;
6800     }
6801     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6802       return CGF.Builder.getInt32(1);
6803     return DefaultThreadLimitVal;
6804   }
6805   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6806                                : CGF.Builder.getInt32(0);
6807 }
6808 
6809 /// Emit the number of threads for a target directive.  Inspect the
6810 /// thread_limit clause associated with a teams construct combined or closely
6811 /// nested with the target directive.
6812 ///
6813 /// Emit the num_threads clause for directives such as 'target parallel' that
6814 /// have no associated teams construct.
6815 ///
6816 /// Otherwise, return nullptr.
6817 static llvm::Value *
6818 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6819                                  const OMPExecutableDirective &D) {
6820   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6821          "Clauses associated with the teams directive expected to be emitted "
6822          "only for the host!");
6823   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6824   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6825          "Expected target-based executable directive.");
6826   CGBuilderTy &Bld = CGF.Builder;
6827   llvm::Value *ThreadLimitVal = nullptr;
6828   llvm::Value *NumThreadsVal = nullptr;
6829   switch (DirectiveKind) {
6830   case OMPD_target: {
6831     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6832     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6833       return NumThreads;
6834     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6835         CGF.getContext(), CS->getCapturedStmt());
6836     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6837       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6838         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6839         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6840         const auto *ThreadLimitClause =
6841             Dir->getSingleClause<OMPThreadLimitClause>();
6842         CodeGenFunction::LexicalScope Scope(
6843             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6844         if (const auto *PreInit =
6845                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6846           for (const auto *I : PreInit->decls()) {
6847             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6848               CGF.EmitVarDecl(cast<VarDecl>(*I));
6849             } else {
6850               CodeGenFunction::AutoVarEmission Emission =
6851                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6852               CGF.EmitAutoVarCleanups(Emission);
6853             }
6854           }
6855         }
6856         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6857             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6858         ThreadLimitVal =
6859             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6860       }
6861       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6862           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6863         CS = Dir->getInnermostCapturedStmt();
6864         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6865             CGF.getContext(), CS->getCapturedStmt());
6866         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6867       }
6868       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6869           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6870         CS = Dir->getInnermostCapturedStmt();
6871         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6872           return NumThreads;
6873       }
6874       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6875         return Bld.getInt32(1);
6876     }
6877     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6878   }
6879   case OMPD_target_teams: {
6880     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6881       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6882       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6883       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6884           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6885       ThreadLimitVal =
6886           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6887     }
6888     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6889     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6890       return NumThreads;
6891     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6892         CGF.getContext(), CS->getCapturedStmt());
6893     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6894       if (Dir->getDirectiveKind() == OMPD_distribute) {
6895         CS = Dir->getInnermostCapturedStmt();
6896         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6897           return NumThreads;
6898       }
6899     }
6900     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6901   }
6902   case OMPD_target_teams_distribute:
6903     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6904       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6905       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6906       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6907           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6908       ThreadLimitVal =
6909           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6910     }
6911     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6912   case OMPD_target_parallel:
6913   case OMPD_target_parallel_for:
6914   case OMPD_target_parallel_for_simd:
6915   case OMPD_target_teams_distribute_parallel_for:
6916   case OMPD_target_teams_distribute_parallel_for_simd: {
6917     llvm::Value *CondVal = nullptr;
6918     // Handle if clause. If if clause present, the number of threads is
6919     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6920     if (D.hasClausesOfKind<OMPIfClause>()) {
6921       const OMPIfClause *IfClause = nullptr;
6922       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6923         if (C->getNameModifier() == OMPD_unknown ||
6924             C->getNameModifier() == OMPD_parallel) {
6925           IfClause = C;
6926           break;
6927         }
6928       }
6929       if (IfClause) {
6930         const Expr *Cond = IfClause->getCondition();
6931         bool Result;
6932         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6933           if (!Result)
6934             return Bld.getInt32(1);
6935         } else {
6936           CodeGenFunction::RunCleanupsScope Scope(CGF);
6937           CondVal = CGF.EvaluateExprAsBool(Cond);
6938         }
6939       }
6940     }
6941     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6942       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6943       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6944       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6945           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6946       ThreadLimitVal =
6947           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6948     }
6949     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6950       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6951       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6952       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6953           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6954       NumThreadsVal =
6955           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6956       ThreadLimitVal = ThreadLimitVal
6957                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6958                                                                 ThreadLimitVal),
6959                                               NumThreadsVal, ThreadLimitVal)
6960                            : NumThreadsVal;
6961     }
6962     if (!ThreadLimitVal)
6963       ThreadLimitVal = Bld.getInt32(0);
6964     if (CondVal)
6965       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6966     return ThreadLimitVal;
6967   }
6968   case OMPD_target_teams_distribute_simd:
6969   case OMPD_target_simd:
6970     return Bld.getInt32(1);
6971   case OMPD_parallel:
6972   case OMPD_for:
6973   case OMPD_parallel_for:
6974   case OMPD_parallel_sections:
6975   case OMPD_for_simd:
6976   case OMPD_parallel_for_simd:
6977   case OMPD_cancel:
6978   case OMPD_cancellation_point:
6979   case OMPD_ordered:
6980   case OMPD_threadprivate:
6981   case OMPD_allocate:
6982   case OMPD_task:
6983   case OMPD_simd:
6984   case OMPD_sections:
6985   case OMPD_section:
6986   case OMPD_single:
6987   case OMPD_master:
6988   case OMPD_critical:
6989   case OMPD_taskyield:
6990   case OMPD_barrier:
6991   case OMPD_taskwait:
6992   case OMPD_taskgroup:
6993   case OMPD_atomic:
6994   case OMPD_flush:
6995   case OMPD_teams:
6996   case OMPD_target_data:
6997   case OMPD_target_exit_data:
6998   case OMPD_target_enter_data:
6999   case OMPD_distribute:
7000   case OMPD_distribute_simd:
7001   case OMPD_distribute_parallel_for:
7002   case OMPD_distribute_parallel_for_simd:
7003   case OMPD_teams_distribute:
7004   case OMPD_teams_distribute_simd:
7005   case OMPD_teams_distribute_parallel_for:
7006   case OMPD_teams_distribute_parallel_for_simd:
7007   case OMPD_target_update:
7008   case OMPD_declare_simd:
7009   case OMPD_declare_variant:
7010   case OMPD_declare_target:
7011   case OMPD_end_declare_target:
7012   case OMPD_declare_reduction:
7013   case OMPD_declare_mapper:
7014   case OMPD_taskloop:
7015   case OMPD_taskloop_simd:
7016   case OMPD_master_taskloop:
7017   case OMPD_parallel_master_taskloop:
7018   case OMPD_requires:
7019   case OMPD_unknown:
7020     break;
7021   }
7022   llvm_unreachable("Unsupported directive kind.");
7023 }
7024 
7025 namespace {
7026 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7027 
7028 // Utility to handle information from clauses associated with a given
7029 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7030 // It provides a convenient interface to obtain the information and generate
7031 // code for that information.
7032 class MappableExprsHandler {
7033 public:
7034   /// Values for bit flags used to specify the mapping type for
7035   /// offloading.
7036   enum OpenMPOffloadMappingFlags : uint64_t {
7037     /// No flags
7038     OMP_MAP_NONE = 0x0,
7039     /// Allocate memory on the device and move data from host to device.
7040     OMP_MAP_TO = 0x01,
7041     /// Allocate memory on the device and move data from device to host.
7042     OMP_MAP_FROM = 0x02,
7043     /// Always perform the requested mapping action on the element, even
7044     /// if it was already mapped before.
7045     OMP_MAP_ALWAYS = 0x04,
7046     /// Delete the element from the device environment, ignoring the
7047     /// current reference count associated with the element.
7048     OMP_MAP_DELETE = 0x08,
7049     /// The element being mapped is a pointer-pointee pair; both the
7050     /// pointer and the pointee should be mapped.
7051     OMP_MAP_PTR_AND_OBJ = 0x10,
7052     /// This flags signals that the base address of an entry should be
7053     /// passed to the target kernel as an argument.
7054     OMP_MAP_TARGET_PARAM = 0x20,
7055     /// Signal that the runtime library has to return the device pointer
7056     /// in the current position for the data being mapped. Used when we have the
7057     /// use_device_ptr clause.
7058     OMP_MAP_RETURN_PARAM = 0x40,
7059     /// This flag signals that the reference being passed is a pointer to
7060     /// private data.
7061     OMP_MAP_PRIVATE = 0x80,
7062     /// Pass the element to the device by value.
7063     OMP_MAP_LITERAL = 0x100,
7064     /// Implicit map
7065     OMP_MAP_IMPLICIT = 0x200,
7066     /// Close is a hint to the runtime to allocate memory close to
7067     /// the target device.
7068     OMP_MAP_CLOSE = 0x400,
7069     /// The 16 MSBs of the flags indicate whether the entry is member of some
7070     /// struct/class.
7071     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7072     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7073   };
7074 
7075   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7076   static unsigned getFlagMemberOffset() {
7077     unsigned Offset = 0;
7078     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7079          Remain = Remain >> 1)
7080       Offset++;
7081     return Offset;
7082   }
7083 
7084   /// Class that associates information with a base pointer to be passed to the
7085   /// runtime library.
7086   class BasePointerInfo {
7087     /// The base pointer.
7088     llvm::Value *Ptr = nullptr;
7089     /// The base declaration that refers to this device pointer, or null if
7090     /// there is none.
7091     const ValueDecl *DevPtrDecl = nullptr;
7092 
7093   public:
7094     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7095         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7096     llvm::Value *operator*() const { return Ptr; }
7097     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7098     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7099   };
7100 
7101   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7102   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7103   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7104 
7105   /// Map between a struct and the its lowest & highest elements which have been
7106   /// mapped.
7107   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7108   ///                    HE(FieldIndex, Pointer)}
7109   struct StructRangeInfoTy {
7110     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7111         0, Address::invalid()};
7112     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7113         0, Address::invalid()};
7114     Address Base = Address::invalid();
7115   };
7116 
7117 private:
7118   /// Kind that defines how a device pointer has to be returned.
7119   struct MapInfo {
7120     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7121     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7122     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7123     bool ReturnDevicePointer = false;
7124     bool IsImplicit = false;
7125 
7126     MapInfo() = default;
7127     MapInfo(
7128         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7129         OpenMPMapClauseKind MapType,
7130         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7131         bool ReturnDevicePointer, bool IsImplicit)
7132         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7133           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7134   };
7135 
7136   /// If use_device_ptr is used on a pointer which is a struct member and there
7137   /// is no map information about it, then emission of that entry is deferred
7138   /// until the whole struct has been processed.
7139   struct DeferredDevicePtrEntryTy {
7140     const Expr *IE = nullptr;
7141     const ValueDecl *VD = nullptr;
7142 
7143     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7144         : IE(IE), VD(VD) {}
7145   };
7146 
7147   /// The target directive from where the mappable clauses were extracted. It
7148   /// is either a executable directive or a user-defined mapper directive.
7149   llvm::PointerUnion<const OMPExecutableDirective *,
7150                      const OMPDeclareMapperDecl *>
7151       CurDir;
7152 
7153   /// Function the directive is being generated for.
7154   CodeGenFunction &CGF;
7155 
7156   /// Set of all first private variables in the current directive.
7157   /// bool data is set to true if the variable is implicitly marked as
7158   /// firstprivate, false otherwise.
7159   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7160 
7161   /// Map between device pointer declarations and their expression components.
7162   /// The key value for declarations in 'this' is null.
7163   llvm::DenseMap<
7164       const ValueDecl *,
7165       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7166       DevPointersMap;
7167 
7168   llvm::Value *getExprTypeSize(const Expr *E) const {
7169     QualType ExprTy = E->getType().getCanonicalType();
7170 
7171     // Reference types are ignored for mapping purposes.
7172     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7173       ExprTy = RefTy->getPointeeType().getCanonicalType();
7174 
7175     // Given that an array section is considered a built-in type, we need to
7176     // do the calculation based on the length of the section instead of relying
7177     // on CGF.getTypeSize(E->getType()).
7178     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7179       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7180                             OAE->getBase()->IgnoreParenImpCasts())
7181                             .getCanonicalType();
7182 
7183       // If there is no length associated with the expression and lower bound is
7184       // not specified too, that means we are using the whole length of the
7185       // base.
7186       if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7187           !OAE->getLowerBound())
7188         return CGF.getTypeSize(BaseTy);
7189 
7190       llvm::Value *ElemSize;
7191       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7192         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7193       } else {
7194         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7195         assert(ATy && "Expecting array type if not a pointer type.");
7196         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7197       }
7198 
7199       // If we don't have a length at this point, that is because we have an
7200       // array section with a single element.
7201       if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7202         return ElemSize;
7203 
7204       if (const Expr *LenExpr = OAE->getLength()) {
7205         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7206         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7207                                              CGF.getContext().getSizeType(),
7208                                              LenExpr->getExprLoc());
7209         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7210       }
7211       assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7212              OAE->getLowerBound() && "expected array_section[lb:].");
7213       // Size = sizetype - lb * elemtype;
7214       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7215       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7216       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7217                                        CGF.getContext().getSizeType(),
7218                                        OAE->getLowerBound()->getExprLoc());
7219       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7220       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7221       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7222       LengthVal = CGF.Builder.CreateSelect(
7223           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7224       return LengthVal;
7225     }
7226     return CGF.getTypeSize(ExprTy);
7227   }
7228 
7229   /// Return the corresponding bits for a given map clause modifier. Add
7230   /// a flag marking the map as a pointer if requested. Add a flag marking the
7231   /// map as the first one of a series of maps that relate to the same map
7232   /// expression.
7233   OpenMPOffloadMappingFlags getMapTypeBits(
7234       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7235       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7236     OpenMPOffloadMappingFlags Bits =
7237         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7238     switch (MapType) {
7239     case OMPC_MAP_alloc:
7240     case OMPC_MAP_release:
7241       // alloc and release is the default behavior in the runtime library,  i.e.
7242       // if we don't pass any bits alloc/release that is what the runtime is
7243       // going to do. Therefore, we don't need to signal anything for these two
7244       // type modifiers.
7245       break;
7246     case OMPC_MAP_to:
7247       Bits |= OMP_MAP_TO;
7248       break;
7249     case OMPC_MAP_from:
7250       Bits |= OMP_MAP_FROM;
7251       break;
7252     case OMPC_MAP_tofrom:
7253       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7254       break;
7255     case OMPC_MAP_delete:
7256       Bits |= OMP_MAP_DELETE;
7257       break;
7258     case OMPC_MAP_unknown:
7259       llvm_unreachable("Unexpected map type!");
7260     }
7261     if (AddPtrFlag)
7262       Bits |= OMP_MAP_PTR_AND_OBJ;
7263     if (AddIsTargetParamFlag)
7264       Bits |= OMP_MAP_TARGET_PARAM;
7265     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7266         != MapModifiers.end())
7267       Bits |= OMP_MAP_ALWAYS;
7268     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7269         != MapModifiers.end())
7270       Bits |= OMP_MAP_CLOSE;
7271     return Bits;
7272   }
7273 
7274   /// Return true if the provided expression is a final array section. A
7275   /// final array section, is one whose length can't be proved to be one.
7276   bool isFinalArraySectionExpression(const Expr *E) const {
7277     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7278 
7279     // It is not an array section and therefore not a unity-size one.
7280     if (!OASE)
7281       return false;
7282 
7283     // An array section with no colon always refer to a single element.
7284     if (OASE->getColonLoc().isInvalid())
7285       return false;
7286 
7287     const Expr *Length = OASE->getLength();
7288 
7289     // If we don't have a length we have to check if the array has size 1
7290     // for this dimension. Also, we should always expect a length if the
7291     // base type is pointer.
7292     if (!Length) {
7293       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7294                              OASE->getBase()->IgnoreParenImpCasts())
7295                              .getCanonicalType();
7296       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7297         return ATy->getSize().getSExtValue() != 1;
7298       // If we don't have a constant dimension length, we have to consider
7299       // the current section as having any size, so it is not necessarily
7300       // unitary. If it happen to be unity size, that's user fault.
7301       return true;
7302     }
7303 
7304     // Check if the length evaluates to 1.
7305     Expr::EvalResult Result;
7306     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7307       return true; // Can have more that size 1.
7308 
7309     llvm::APSInt ConstLength = Result.Val.getInt();
7310     return ConstLength.getSExtValue() != 1;
7311   }
7312 
7313   /// Generate the base pointers, section pointers, sizes and map type
7314   /// bits for the provided map type, map modifier, and expression components.
7315   /// \a IsFirstComponent should be set to true if the provided set of
7316   /// components is the first associated with a capture.
7317   void generateInfoForComponentList(
7318       OpenMPMapClauseKind MapType,
7319       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7320       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7321       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7322       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7323       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7324       bool IsImplicit,
7325       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7326           OverlappedElements = llvm::None) const {
7327     // The following summarizes what has to be generated for each map and the
7328     // types below. The generated information is expressed in this order:
7329     // base pointer, section pointer, size, flags
7330     // (to add to the ones that come from the map type and modifier).
7331     //
7332     // double d;
7333     // int i[100];
7334     // float *p;
7335     //
7336     // struct S1 {
7337     //   int i;
7338     //   float f[50];
7339     // }
7340     // struct S2 {
7341     //   int i;
7342     //   float f[50];
7343     //   S1 s;
7344     //   double *p;
7345     //   struct S2 *ps;
7346     // }
7347     // S2 s;
7348     // S2 *ps;
7349     //
7350     // map(d)
7351     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7352     //
7353     // map(i)
7354     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7355     //
7356     // map(i[1:23])
7357     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7358     //
7359     // map(p)
7360     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7361     //
7362     // map(p[1:24])
7363     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7364     //
7365     // map(s)
7366     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7367     //
7368     // map(s.i)
7369     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7370     //
7371     // map(s.s.f)
7372     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7373     //
7374     // map(s.p)
7375     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7376     //
7377     // map(to: s.p[:22])
7378     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7379     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7380     // &(s.p), &(s.p[0]), 22*sizeof(double),
7381     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7382     // (*) alloc space for struct members, only this is a target parameter
7383     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7384     //      optimizes this entry out, same in the examples below)
7385     // (***) map the pointee (map: to)
7386     //
7387     // map(s.ps)
7388     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7389     //
7390     // map(from: s.ps->s.i)
7391     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7392     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7393     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7394     //
7395     // map(to: s.ps->ps)
7396     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7397     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7398     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7399     //
7400     // map(s.ps->ps->ps)
7401     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7402     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7403     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7404     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7405     //
7406     // map(to: s.ps->ps->s.f[:22])
7407     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7408     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7409     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7410     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7411     //
7412     // map(ps)
7413     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7414     //
7415     // map(ps->i)
7416     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7417     //
7418     // map(ps->s.f)
7419     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7420     //
7421     // map(from: ps->p)
7422     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7423     //
7424     // map(to: ps->p[:22])
7425     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7426     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7427     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7428     //
7429     // map(ps->ps)
7430     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7431     //
7432     // map(from: ps->ps->s.i)
7433     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7434     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7435     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7436     //
7437     // map(from: ps->ps->ps)
7438     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7439     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7440     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7441     //
7442     // map(ps->ps->ps->ps)
7443     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7444     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7445     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7446     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7447     //
7448     // map(to: ps->ps->ps->s.f[:22])
7449     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7450     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7451     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7452     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7453     //
7454     // map(to: s.f[:22]) map(from: s.p[:33])
7455     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7456     //     sizeof(double*) (**), TARGET_PARAM
7457     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7458     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7459     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7460     // (*) allocate contiguous space needed to fit all mapped members even if
7461     //     we allocate space for members not mapped (in this example,
7462     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7463     //     them as well because they fall between &s.f[0] and &s.p)
7464     //
7465     // map(from: s.f[:22]) map(to: ps->p[:33])
7466     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7467     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7468     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7469     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7470     // (*) the struct this entry pertains to is the 2nd element in the list of
7471     //     arguments, hence MEMBER_OF(2)
7472     //
7473     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7474     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7475     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7476     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7477     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7478     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7479     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7480     // (*) the struct this entry pertains to is the 4th element in the list
7481     //     of arguments, hence MEMBER_OF(4)
7482 
7483     // Track if the map information being generated is the first for a capture.
7484     bool IsCaptureFirstInfo = IsFirstComponentList;
7485     // When the variable is on a declare target link or in a to clause with
7486     // unified memory, a reference is needed to hold the host/device address
7487     // of the variable.
7488     bool RequiresReference = false;
7489 
7490     // Scan the components from the base to the complete expression.
7491     auto CI = Components.rbegin();
7492     auto CE = Components.rend();
7493     auto I = CI;
7494 
7495     // Track if the map information being generated is the first for a list of
7496     // components.
7497     bool IsExpressionFirstInfo = true;
7498     Address BP = Address::invalid();
7499     const Expr *AssocExpr = I->getAssociatedExpression();
7500     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7501     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7502 
7503     if (isa<MemberExpr>(AssocExpr)) {
7504       // The base is the 'this' pointer. The content of the pointer is going
7505       // to be the base of the field being mapped.
7506       BP = CGF.LoadCXXThisAddress();
7507     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7508                (OASE &&
7509                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7510       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7511     } else {
7512       // The base is the reference to the variable.
7513       // BP = &Var.
7514       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7515       if (const auto *VD =
7516               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7517         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7518                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7519           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7520               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7521                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7522             RequiresReference = true;
7523             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7524           }
7525         }
7526       }
7527 
7528       // If the variable is a pointer and is being dereferenced (i.e. is not
7529       // the last component), the base has to be the pointer itself, not its
7530       // reference. References are ignored for mapping purposes.
7531       QualType Ty =
7532           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7533       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7534         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7535 
7536         // We do not need to generate individual map information for the
7537         // pointer, it can be associated with the combined storage.
7538         ++I;
7539       }
7540     }
7541 
7542     // Track whether a component of the list should be marked as MEMBER_OF some
7543     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7544     // in a component list should be marked as MEMBER_OF, all subsequent entries
7545     // do not belong to the base struct. E.g.
7546     // struct S2 s;
7547     // s.ps->ps->ps->f[:]
7548     //   (1) (2) (3) (4)
7549     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7550     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7551     // is the pointee of ps(2) which is not member of struct s, so it should not
7552     // be marked as such (it is still PTR_AND_OBJ).
7553     // The variable is initialized to false so that PTR_AND_OBJ entries which
7554     // are not struct members are not considered (e.g. array of pointers to
7555     // data).
7556     bool ShouldBeMemberOf = false;
7557 
7558     // Variable keeping track of whether or not we have encountered a component
7559     // in the component list which is a member expression. Useful when we have a
7560     // pointer or a final array section, in which case it is the previous
7561     // component in the list which tells us whether we have a member expression.
7562     // E.g. X.f[:]
7563     // While processing the final array section "[:]" it is "f" which tells us
7564     // whether we are dealing with a member of a declared struct.
7565     const MemberExpr *EncounteredME = nullptr;
7566 
7567     for (; I != CE; ++I) {
7568       // If the current component is member of a struct (parent struct) mark it.
7569       if (!EncounteredME) {
7570         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7571         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7572         // as MEMBER_OF the parent struct.
7573         if (EncounteredME)
7574           ShouldBeMemberOf = true;
7575       }
7576 
7577       auto Next = std::next(I);
7578 
7579       // We need to generate the addresses and sizes if this is the last
7580       // component, if the component is a pointer or if it is an array section
7581       // whose length can't be proved to be one. If this is a pointer, it
7582       // becomes the base address for the following components.
7583 
7584       // A final array section, is one whose length can't be proved to be one.
7585       bool IsFinalArraySection =
7586           isFinalArraySectionExpression(I->getAssociatedExpression());
7587 
7588       // Get information on whether the element is a pointer. Have to do a
7589       // special treatment for array sections given that they are built-in
7590       // types.
7591       const auto *OASE =
7592           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7593       bool IsPointer =
7594           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7595                        .getCanonicalType()
7596                        ->isAnyPointerType()) ||
7597           I->getAssociatedExpression()->getType()->isAnyPointerType();
7598 
7599       if (Next == CE || IsPointer || IsFinalArraySection) {
7600         // If this is not the last component, we expect the pointer to be
7601         // associated with an array expression or member expression.
7602         assert((Next == CE ||
7603                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7604                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7605                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7606                "Unexpected expression");
7607 
7608         Address LB =
7609             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7610 
7611         // If this component is a pointer inside the base struct then we don't
7612         // need to create any entry for it - it will be combined with the object
7613         // it is pointing to into a single PTR_AND_OBJ entry.
7614         bool IsMemberPointer =
7615             IsPointer && EncounteredME &&
7616             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7617              EncounteredME);
7618         if (!OverlappedElements.empty()) {
7619           // Handle base element with the info for overlapped elements.
7620           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7621           assert(Next == CE &&
7622                  "Expected last element for the overlapped elements.");
7623           assert(!IsPointer &&
7624                  "Unexpected base element with the pointer type.");
7625           // Mark the whole struct as the struct that requires allocation on the
7626           // device.
7627           PartialStruct.LowestElem = {0, LB};
7628           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7629               I->getAssociatedExpression()->getType());
7630           Address HB = CGF.Builder.CreateConstGEP(
7631               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7632                                                               CGF.VoidPtrTy),
7633               TypeSize.getQuantity() - 1);
7634           PartialStruct.HighestElem = {
7635               std::numeric_limits<decltype(
7636                   PartialStruct.HighestElem.first)>::max(),
7637               HB};
7638           PartialStruct.Base = BP;
7639           // Emit data for non-overlapped data.
7640           OpenMPOffloadMappingFlags Flags =
7641               OMP_MAP_MEMBER_OF |
7642               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7643                              /*AddPtrFlag=*/false,
7644                              /*AddIsTargetParamFlag=*/false);
7645           LB = BP;
7646           llvm::Value *Size = nullptr;
7647           // Do bitcopy of all non-overlapped structure elements.
7648           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7649                    Component : OverlappedElements) {
7650             Address ComponentLB = Address::invalid();
7651             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7652                  Component) {
7653               if (MC.getAssociatedDeclaration()) {
7654                 ComponentLB =
7655                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7656                         .getAddress();
7657                 Size = CGF.Builder.CreatePtrDiff(
7658                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7659                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7660                 break;
7661               }
7662             }
7663             BasePointers.push_back(BP.getPointer());
7664             Pointers.push_back(LB.getPointer());
7665             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7666                                                       /*isSigned=*/true));
7667             Types.push_back(Flags);
7668             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7669           }
7670           BasePointers.push_back(BP.getPointer());
7671           Pointers.push_back(LB.getPointer());
7672           Size = CGF.Builder.CreatePtrDiff(
7673               CGF.EmitCastToVoidPtr(
7674                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7675               CGF.EmitCastToVoidPtr(LB.getPointer()));
7676           Sizes.push_back(
7677               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7678           Types.push_back(Flags);
7679           break;
7680         }
7681         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7682         if (!IsMemberPointer) {
7683           BasePointers.push_back(BP.getPointer());
7684           Pointers.push_back(LB.getPointer());
7685           Sizes.push_back(
7686               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7687 
7688           // We need to add a pointer flag for each map that comes from the
7689           // same expression except for the first one. We also need to signal
7690           // this map is the first one that relates with the current capture
7691           // (there is a set of entries for each capture).
7692           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7693               MapType, MapModifiers, IsImplicit,
7694               !IsExpressionFirstInfo || RequiresReference,
7695               IsCaptureFirstInfo && !RequiresReference);
7696 
7697           if (!IsExpressionFirstInfo) {
7698             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7699             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7700             if (IsPointer)
7701               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7702                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7703 
7704             if (ShouldBeMemberOf) {
7705               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7706               // should be later updated with the correct value of MEMBER_OF.
7707               Flags |= OMP_MAP_MEMBER_OF;
7708               // From now on, all subsequent PTR_AND_OBJ entries should not be
7709               // marked as MEMBER_OF.
7710               ShouldBeMemberOf = false;
7711             }
7712           }
7713 
7714           Types.push_back(Flags);
7715         }
7716 
7717         // If we have encountered a member expression so far, keep track of the
7718         // mapped member. If the parent is "*this", then the value declaration
7719         // is nullptr.
7720         if (EncounteredME) {
7721           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7722           unsigned FieldIndex = FD->getFieldIndex();
7723 
7724           // Update info about the lowest and highest elements for this struct
7725           if (!PartialStruct.Base.isValid()) {
7726             PartialStruct.LowestElem = {FieldIndex, LB};
7727             PartialStruct.HighestElem = {FieldIndex, LB};
7728             PartialStruct.Base = BP;
7729           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7730             PartialStruct.LowestElem = {FieldIndex, LB};
7731           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7732             PartialStruct.HighestElem = {FieldIndex, LB};
7733           }
7734         }
7735 
7736         // If we have a final array section, we are done with this expression.
7737         if (IsFinalArraySection)
7738           break;
7739 
7740         // The pointer becomes the base for the next element.
7741         if (Next != CE)
7742           BP = LB;
7743 
7744         IsExpressionFirstInfo = false;
7745         IsCaptureFirstInfo = false;
7746       }
7747     }
7748   }
7749 
7750   /// Return the adjusted map modifiers if the declaration a capture refers to
7751   /// appears in a first-private clause. This is expected to be used only with
7752   /// directives that start with 'target'.
7753   MappableExprsHandler::OpenMPOffloadMappingFlags
7754   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7755     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7756 
7757     // A first private variable captured by reference will use only the
7758     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7759     // declaration is known as first-private in this handler.
7760     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7761       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7762           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7763         return MappableExprsHandler::OMP_MAP_ALWAYS |
7764                MappableExprsHandler::OMP_MAP_TO;
7765       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7766         return MappableExprsHandler::OMP_MAP_TO |
7767                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7768       return MappableExprsHandler::OMP_MAP_PRIVATE |
7769              MappableExprsHandler::OMP_MAP_TO;
7770     }
7771     return MappableExprsHandler::OMP_MAP_TO |
7772            MappableExprsHandler::OMP_MAP_FROM;
7773   }
7774 
7775   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7776     // Rotate by getFlagMemberOffset() bits.
7777     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7778                                                   << getFlagMemberOffset());
7779   }
7780 
7781   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7782                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7783     // If the entry is PTR_AND_OBJ but has not been marked with the special
7784     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7785     // marked as MEMBER_OF.
7786     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7787         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7788       return;
7789 
7790     // Reset the placeholder value to prepare the flag for the assignment of the
7791     // proper MEMBER_OF value.
7792     Flags &= ~OMP_MAP_MEMBER_OF;
7793     Flags |= MemberOfFlag;
7794   }
7795 
7796   void getPlainLayout(const CXXRecordDecl *RD,
7797                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7798                       bool AsBase) const {
7799     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7800 
7801     llvm::StructType *St =
7802         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7803 
7804     unsigned NumElements = St->getNumElements();
7805     llvm::SmallVector<
7806         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7807         RecordLayout(NumElements);
7808 
7809     // Fill bases.
7810     for (const auto &I : RD->bases()) {
7811       if (I.isVirtual())
7812         continue;
7813       const auto *Base = I.getType()->getAsCXXRecordDecl();
7814       // Ignore empty bases.
7815       if (Base->isEmpty() || CGF.getContext()
7816                                  .getASTRecordLayout(Base)
7817                                  .getNonVirtualSize()
7818                                  .isZero())
7819         continue;
7820 
7821       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7822       RecordLayout[FieldIndex] = Base;
7823     }
7824     // Fill in virtual bases.
7825     for (const auto &I : RD->vbases()) {
7826       const auto *Base = I.getType()->getAsCXXRecordDecl();
7827       // Ignore empty bases.
7828       if (Base->isEmpty())
7829         continue;
7830       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7831       if (RecordLayout[FieldIndex])
7832         continue;
7833       RecordLayout[FieldIndex] = Base;
7834     }
7835     // Fill in all the fields.
7836     assert(!RD->isUnion() && "Unexpected union.");
7837     for (const auto *Field : RD->fields()) {
7838       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7839       // will fill in later.)
7840       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7841         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7842         RecordLayout[FieldIndex] = Field;
7843       }
7844     }
7845     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7846              &Data : RecordLayout) {
7847       if (Data.isNull())
7848         continue;
7849       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7850         getPlainLayout(Base, Layout, /*AsBase=*/true);
7851       else
7852         Layout.push_back(Data.get<const FieldDecl *>());
7853     }
7854   }
7855 
7856 public:
7857   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7858       : CurDir(&Dir), CGF(CGF) {
7859     // Extract firstprivate clause information.
7860     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7861       for (const auto *D : C->varlists())
7862         FirstPrivateDecls.try_emplace(
7863             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7864     // Extract device pointer clause information.
7865     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7866       for (auto L : C->component_lists())
7867         DevPointersMap[L.first].push_back(L.second);
7868   }
7869 
7870   /// Constructor for the declare mapper directive.
7871   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7872       : CurDir(&Dir), CGF(CGF) {}
7873 
7874   /// Generate code for the combined entry if we have a partially mapped struct
7875   /// and take care of the mapping flags of the arguments corresponding to
7876   /// individual struct members.
7877   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7878                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7879                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7880                          const StructRangeInfoTy &PartialStruct) const {
7881     // Base is the base of the struct
7882     BasePointers.push_back(PartialStruct.Base.getPointer());
7883     // Pointer is the address of the lowest element
7884     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7885     Pointers.push_back(LB);
7886     // Size is (addr of {highest+1} element) - (addr of lowest element)
7887     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7888     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7889     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7890     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7891     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7892     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7893                                                   /*isSigned=*/false);
7894     Sizes.push_back(Size);
7895     // Map type is always TARGET_PARAM
7896     Types.push_back(OMP_MAP_TARGET_PARAM);
7897     // Remove TARGET_PARAM flag from the first element
7898     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7899 
7900     // All other current entries will be MEMBER_OF the combined entry
7901     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7902     // 0xFFFF in the MEMBER_OF field).
7903     OpenMPOffloadMappingFlags MemberOfFlag =
7904         getMemberOfFlag(BasePointers.size() - 1);
7905     for (auto &M : CurTypes)
7906       setCorrectMemberOfFlag(M, MemberOfFlag);
7907   }
7908 
7909   /// Generate all the base pointers, section pointers, sizes and map
7910   /// types for the extracted mappable expressions. Also, for each item that
7911   /// relates with a device pointer, a pair of the relevant declaration and
7912   /// index where it occurs is appended to the device pointers info array.
7913   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7914                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7915                        MapFlagsArrayTy &Types) const {
7916     // We have to process the component lists that relate with the same
7917     // declaration in a single chunk so that we can generate the map flags
7918     // correctly. Therefore, we organize all lists in a map.
7919     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7920 
7921     // Helper function to fill the information map for the different supported
7922     // clauses.
7923     auto &&InfoGen = [&Info](
7924         const ValueDecl *D,
7925         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7926         OpenMPMapClauseKind MapType,
7927         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7928         bool ReturnDevicePointer, bool IsImplicit) {
7929       const ValueDecl *VD =
7930           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7931       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7932                             IsImplicit);
7933     };
7934 
7935     assert(CurDir.is<const OMPExecutableDirective *>() &&
7936            "Expect a executable directive");
7937     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7938     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7939       for (const auto &L : C->component_lists()) {
7940         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7941             /*ReturnDevicePointer=*/false, C->isImplicit());
7942       }
7943     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7944       for (const auto &L : C->component_lists()) {
7945         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7946             /*ReturnDevicePointer=*/false, C->isImplicit());
7947       }
7948     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7949       for (const auto &L : C->component_lists()) {
7950         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7951             /*ReturnDevicePointer=*/false, C->isImplicit());
7952       }
7953 
7954     // Look at the use_device_ptr clause information and mark the existing map
7955     // entries as such. If there is no map information for an entry in the
7956     // use_device_ptr list, we create one with map type 'alloc' and zero size
7957     // section. It is the user fault if that was not mapped before. If there is
7958     // no map information and the pointer is a struct member, then we defer the
7959     // emission of that entry until the whole struct has been processed.
7960     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7961         DeferredInfo;
7962 
7963     for (const auto *C :
7964          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7965       for (const auto &L : C->component_lists()) {
7966         assert(!L.second.empty() && "Not expecting empty list of components!");
7967         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7968         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7969         const Expr *IE = L.second.back().getAssociatedExpression();
7970         // If the first component is a member expression, we have to look into
7971         // 'this', which maps to null in the map of map information. Otherwise
7972         // look directly for the information.
7973         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7974 
7975         // We potentially have map information for this declaration already.
7976         // Look for the first set of components that refer to it.
7977         if (It != Info.end()) {
7978           auto CI = std::find_if(
7979               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7980                 return MI.Components.back().getAssociatedDeclaration() == VD;
7981               });
7982           // If we found a map entry, signal that the pointer has to be returned
7983           // and move on to the next declaration.
7984           if (CI != It->second.end()) {
7985             CI->ReturnDevicePointer = true;
7986             continue;
7987           }
7988         }
7989 
7990         // We didn't find any match in our map information - generate a zero
7991         // size array section - if the pointer is a struct member we defer this
7992         // action until the whole struct has been processed.
7993         if (isa<MemberExpr>(IE)) {
7994           // Insert the pointer into Info to be processed by
7995           // generateInfoForComponentList. Because it is a member pointer
7996           // without a pointee, no entry will be generated for it, therefore
7997           // we need to generate one after the whole struct has been processed.
7998           // Nonetheless, generateInfoForComponentList must be called to take
7999           // the pointer into account for the calculation of the range of the
8000           // partial struct.
8001           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8002                   /*ReturnDevicePointer=*/false, C->isImplicit());
8003           DeferredInfo[nullptr].emplace_back(IE, VD);
8004         } else {
8005           llvm::Value *Ptr =
8006               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8007           BasePointers.emplace_back(Ptr, VD);
8008           Pointers.push_back(Ptr);
8009           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8010           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8011         }
8012       }
8013     }
8014 
8015     for (const auto &M : Info) {
8016       // We need to know when we generate information for the first component
8017       // associated with a capture, because the mapping flags depend on it.
8018       bool IsFirstComponentList = true;
8019 
8020       // Temporary versions of arrays
8021       MapBaseValuesArrayTy CurBasePointers;
8022       MapValuesArrayTy CurPointers;
8023       MapValuesArrayTy CurSizes;
8024       MapFlagsArrayTy CurTypes;
8025       StructRangeInfoTy PartialStruct;
8026 
8027       for (const MapInfo &L : M.second) {
8028         assert(!L.Components.empty() &&
8029                "Not expecting declaration with no component lists.");
8030 
8031         // Remember the current base pointer index.
8032         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8033         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8034                                      CurBasePointers, CurPointers, CurSizes,
8035                                      CurTypes, PartialStruct,
8036                                      IsFirstComponentList, L.IsImplicit);
8037 
8038         // If this entry relates with a device pointer, set the relevant
8039         // declaration and add the 'return pointer' flag.
8040         if (L.ReturnDevicePointer) {
8041           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8042                  "Unexpected number of mapped base pointers.");
8043 
8044           const ValueDecl *RelevantVD =
8045               L.Components.back().getAssociatedDeclaration();
8046           assert(RelevantVD &&
8047                  "No relevant declaration related with device pointer??");
8048 
8049           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8050           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8051         }
8052         IsFirstComponentList = false;
8053       }
8054 
8055       // Append any pending zero-length pointers which are struct members and
8056       // used with use_device_ptr.
8057       auto CI = DeferredInfo.find(M.first);
8058       if (CI != DeferredInfo.end()) {
8059         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8060           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8061           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8062               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8063           CurBasePointers.emplace_back(BasePtr, L.VD);
8064           CurPointers.push_back(Ptr);
8065           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8066           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8067           // value MEMBER_OF=FFFF so that the entry is later updated with the
8068           // correct value of MEMBER_OF.
8069           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8070                              OMP_MAP_MEMBER_OF);
8071         }
8072       }
8073 
8074       // If there is an entry in PartialStruct it means we have a struct with
8075       // individual members mapped. Emit an extra combined entry.
8076       if (PartialStruct.Base.isValid())
8077         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8078                           PartialStruct);
8079 
8080       // We need to append the results of this capture to what we already have.
8081       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8082       Pointers.append(CurPointers.begin(), CurPointers.end());
8083       Sizes.append(CurSizes.begin(), CurSizes.end());
8084       Types.append(CurTypes.begin(), CurTypes.end());
8085     }
8086   }
8087 
8088   /// Generate all the base pointers, section pointers, sizes and map types for
8089   /// the extracted map clauses of user-defined mapper.
8090   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8091                                 MapValuesArrayTy &Pointers,
8092                                 MapValuesArrayTy &Sizes,
8093                                 MapFlagsArrayTy &Types) const {
8094     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8095            "Expect a declare mapper directive");
8096     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8097     // We have to process the component lists that relate with the same
8098     // declaration in a single chunk so that we can generate the map flags
8099     // correctly. Therefore, we organize all lists in a map.
8100     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8101 
8102     // Helper function to fill the information map for the different supported
8103     // clauses.
8104     auto &&InfoGen = [&Info](
8105         const ValueDecl *D,
8106         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8107         OpenMPMapClauseKind MapType,
8108         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8109         bool ReturnDevicePointer, bool IsImplicit) {
8110       const ValueDecl *VD =
8111           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8112       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8113                             IsImplicit);
8114     };
8115 
8116     for (const auto *C : CurMapperDir->clauselists()) {
8117       const auto *MC = cast<OMPMapClause>(C);
8118       for (const auto &L : MC->component_lists()) {
8119         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8120                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8121       }
8122     }
8123 
8124     for (const auto &M : Info) {
8125       // We need to know when we generate information for the first component
8126       // associated with a capture, because the mapping flags depend on it.
8127       bool IsFirstComponentList = true;
8128 
8129       // Temporary versions of arrays
8130       MapBaseValuesArrayTy CurBasePointers;
8131       MapValuesArrayTy CurPointers;
8132       MapValuesArrayTy CurSizes;
8133       MapFlagsArrayTy CurTypes;
8134       StructRangeInfoTy PartialStruct;
8135 
8136       for (const MapInfo &L : M.second) {
8137         assert(!L.Components.empty() &&
8138                "Not expecting declaration with no component lists.");
8139         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8140                                      CurBasePointers, CurPointers, CurSizes,
8141                                      CurTypes, PartialStruct,
8142                                      IsFirstComponentList, L.IsImplicit);
8143         IsFirstComponentList = false;
8144       }
8145 
8146       // If there is an entry in PartialStruct it means we have a struct with
8147       // individual members mapped. Emit an extra combined entry.
8148       if (PartialStruct.Base.isValid())
8149         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8150                           PartialStruct);
8151 
8152       // We need to append the results of this capture to what we already have.
8153       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8154       Pointers.append(CurPointers.begin(), CurPointers.end());
8155       Sizes.append(CurSizes.begin(), CurSizes.end());
8156       Types.append(CurTypes.begin(), CurTypes.end());
8157     }
8158   }
8159 
8160   /// Emit capture info for lambdas for variables captured by reference.
8161   void generateInfoForLambdaCaptures(
8162       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8163       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8164       MapFlagsArrayTy &Types,
8165       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8166     const auto *RD = VD->getType()
8167                          .getCanonicalType()
8168                          .getNonReferenceType()
8169                          ->getAsCXXRecordDecl();
8170     if (!RD || !RD->isLambda())
8171       return;
8172     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8173     LValue VDLVal = CGF.MakeAddrLValue(
8174         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8175     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8176     FieldDecl *ThisCapture = nullptr;
8177     RD->getCaptureFields(Captures, ThisCapture);
8178     if (ThisCapture) {
8179       LValue ThisLVal =
8180           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8181       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8182       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8183       BasePointers.push_back(ThisLVal.getPointer());
8184       Pointers.push_back(ThisLValVal.getPointer());
8185       Sizes.push_back(
8186           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8187                                     CGF.Int64Ty, /*isSigned=*/true));
8188       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8189                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8190     }
8191     for (const LambdaCapture &LC : RD->captures()) {
8192       if (!LC.capturesVariable())
8193         continue;
8194       const VarDecl *VD = LC.getCapturedVar();
8195       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8196         continue;
8197       auto It = Captures.find(VD);
8198       assert(It != Captures.end() && "Found lambda capture without field.");
8199       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8200       if (LC.getCaptureKind() == LCK_ByRef) {
8201         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8202         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8203         BasePointers.push_back(VarLVal.getPointer());
8204         Pointers.push_back(VarLValVal.getPointer());
8205         Sizes.push_back(CGF.Builder.CreateIntCast(
8206             CGF.getTypeSize(
8207                 VD->getType().getCanonicalType().getNonReferenceType()),
8208             CGF.Int64Ty, /*isSigned=*/true));
8209       } else {
8210         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8211         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8212         BasePointers.push_back(VarLVal.getPointer());
8213         Pointers.push_back(VarRVal.getScalarVal());
8214         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8215       }
8216       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8217                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8218     }
8219   }
8220 
8221   /// Set correct indices for lambdas captures.
8222   void adjustMemberOfForLambdaCaptures(
8223       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8224       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8225       MapFlagsArrayTy &Types) const {
8226     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8227       // Set correct member_of idx for all implicit lambda captures.
8228       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8229                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8230         continue;
8231       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8232       assert(BasePtr && "Unable to find base lambda address.");
8233       int TgtIdx = -1;
8234       for (unsigned J = I; J > 0; --J) {
8235         unsigned Idx = J - 1;
8236         if (Pointers[Idx] != BasePtr)
8237           continue;
8238         TgtIdx = Idx;
8239         break;
8240       }
8241       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8242       // All other current entries will be MEMBER_OF the combined entry
8243       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8244       // 0xFFFF in the MEMBER_OF field).
8245       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8246       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8247     }
8248   }
8249 
8250   /// Generate the base pointers, section pointers, sizes and map types
8251   /// associated to a given capture.
8252   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8253                               llvm::Value *Arg,
8254                               MapBaseValuesArrayTy &BasePointers,
8255                               MapValuesArrayTy &Pointers,
8256                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8257                               StructRangeInfoTy &PartialStruct) const {
8258     assert(!Cap->capturesVariableArrayType() &&
8259            "Not expecting to generate map info for a variable array type!");
8260 
8261     // We need to know when we generating information for the first component
8262     const ValueDecl *VD = Cap->capturesThis()
8263                               ? nullptr
8264                               : Cap->getCapturedVar()->getCanonicalDecl();
8265 
8266     // If this declaration appears in a is_device_ptr clause we just have to
8267     // pass the pointer by value. If it is a reference to a declaration, we just
8268     // pass its value.
8269     if (DevPointersMap.count(VD)) {
8270       BasePointers.emplace_back(Arg, VD);
8271       Pointers.push_back(Arg);
8272       Sizes.push_back(
8273           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8274                                     CGF.Int64Ty, /*isSigned=*/true));
8275       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8276       return;
8277     }
8278 
8279     using MapData =
8280         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8281                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8282     SmallVector<MapData, 4> DeclComponentLists;
8283     assert(CurDir.is<const OMPExecutableDirective *>() &&
8284            "Expect a executable directive");
8285     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8286     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8287       for (const auto &L : C->decl_component_lists(VD)) {
8288         assert(L.first == VD &&
8289                "We got information for the wrong declaration??");
8290         assert(!L.second.empty() &&
8291                "Not expecting declaration with no component lists.");
8292         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8293                                         C->getMapTypeModifiers(),
8294                                         C->isImplicit());
8295       }
8296     }
8297 
8298     // Find overlapping elements (including the offset from the base element).
8299     llvm::SmallDenseMap<
8300         const MapData *,
8301         llvm::SmallVector<
8302             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8303         4>
8304         OverlappedData;
8305     size_t Count = 0;
8306     for (const MapData &L : DeclComponentLists) {
8307       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8308       OpenMPMapClauseKind MapType;
8309       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8310       bool IsImplicit;
8311       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8312       ++Count;
8313       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8314         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8315         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8316         auto CI = Components.rbegin();
8317         auto CE = Components.rend();
8318         auto SI = Components1.rbegin();
8319         auto SE = Components1.rend();
8320         for (; CI != CE && SI != SE; ++CI, ++SI) {
8321           if (CI->getAssociatedExpression()->getStmtClass() !=
8322               SI->getAssociatedExpression()->getStmtClass())
8323             break;
8324           // Are we dealing with different variables/fields?
8325           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8326             break;
8327         }
8328         // Found overlapping if, at least for one component, reached the head of
8329         // the components list.
8330         if (CI == CE || SI == SE) {
8331           assert((CI != CE || SI != SE) &&
8332                  "Unexpected full match of the mapping components.");
8333           const MapData &BaseData = CI == CE ? L : L1;
8334           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8335               SI == SE ? Components : Components1;
8336           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8337           OverlappedElements.getSecond().push_back(SubData);
8338         }
8339       }
8340     }
8341     // Sort the overlapped elements for each item.
8342     llvm::SmallVector<const FieldDecl *, 4> Layout;
8343     if (!OverlappedData.empty()) {
8344       if (const auto *CRD =
8345               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8346         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8347       else {
8348         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8349         Layout.append(RD->field_begin(), RD->field_end());
8350       }
8351     }
8352     for (auto &Pair : OverlappedData) {
8353       llvm::sort(
8354           Pair.getSecond(),
8355           [&Layout](
8356               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8357               OMPClauseMappableExprCommon::MappableExprComponentListRef
8358                   Second) {
8359             auto CI = First.rbegin();
8360             auto CE = First.rend();
8361             auto SI = Second.rbegin();
8362             auto SE = Second.rend();
8363             for (; CI != CE && SI != SE; ++CI, ++SI) {
8364               if (CI->getAssociatedExpression()->getStmtClass() !=
8365                   SI->getAssociatedExpression()->getStmtClass())
8366                 break;
8367               // Are we dealing with different variables/fields?
8368               if (CI->getAssociatedDeclaration() !=
8369                   SI->getAssociatedDeclaration())
8370                 break;
8371             }
8372 
8373             // Lists contain the same elements.
8374             if (CI == CE && SI == SE)
8375               return false;
8376 
8377             // List with less elements is less than list with more elements.
8378             if (CI == CE || SI == SE)
8379               return CI == CE;
8380 
8381             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8382             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8383             if (FD1->getParent() == FD2->getParent())
8384               return FD1->getFieldIndex() < FD2->getFieldIndex();
8385             const auto It =
8386                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8387                   return FD == FD1 || FD == FD2;
8388                 });
8389             return *It == FD1;
8390           });
8391     }
8392 
8393     // Associated with a capture, because the mapping flags depend on it.
8394     // Go through all of the elements with the overlapped elements.
8395     for (const auto &Pair : OverlappedData) {
8396       const MapData &L = *Pair.getFirst();
8397       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8398       OpenMPMapClauseKind MapType;
8399       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8400       bool IsImplicit;
8401       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8402       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8403           OverlappedComponents = Pair.getSecond();
8404       bool IsFirstComponentList = true;
8405       generateInfoForComponentList(MapType, MapModifiers, Components,
8406                                    BasePointers, Pointers, Sizes, Types,
8407                                    PartialStruct, IsFirstComponentList,
8408                                    IsImplicit, OverlappedComponents);
8409     }
8410     // Go through other elements without overlapped elements.
8411     bool IsFirstComponentList = OverlappedData.empty();
8412     for (const MapData &L : DeclComponentLists) {
8413       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8414       OpenMPMapClauseKind MapType;
8415       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8416       bool IsImplicit;
8417       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8418       auto It = OverlappedData.find(&L);
8419       if (It == OverlappedData.end())
8420         generateInfoForComponentList(MapType, MapModifiers, Components,
8421                                      BasePointers, Pointers, Sizes, Types,
8422                                      PartialStruct, IsFirstComponentList,
8423                                      IsImplicit);
8424       IsFirstComponentList = false;
8425     }
8426   }
8427 
8428   /// Generate the base pointers, section pointers, sizes and map types
8429   /// associated with the declare target link variables.
8430   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8431                                         MapValuesArrayTy &Pointers,
8432                                         MapValuesArrayTy &Sizes,
8433                                         MapFlagsArrayTy &Types) const {
8434     assert(CurDir.is<const OMPExecutableDirective *>() &&
8435            "Expect a executable directive");
8436     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8437     // Map other list items in the map clause which are not captured variables
8438     // but "declare target link" global variables.
8439     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8440       for (const auto &L : C->component_lists()) {
8441         if (!L.first)
8442           continue;
8443         const auto *VD = dyn_cast<VarDecl>(L.first);
8444         if (!VD)
8445           continue;
8446         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8447             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8448         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8449             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8450           continue;
8451         StructRangeInfoTy PartialStruct;
8452         generateInfoForComponentList(
8453             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8454             Pointers, Sizes, Types, PartialStruct,
8455             /*IsFirstComponentList=*/true, C->isImplicit());
8456         assert(!PartialStruct.Base.isValid() &&
8457                "No partial structs for declare target link expected.");
8458       }
8459     }
8460   }
8461 
8462   /// Generate the default map information for a given capture \a CI,
8463   /// record field declaration \a RI and captured value \a CV.
8464   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8465                               const FieldDecl &RI, llvm::Value *CV,
8466                               MapBaseValuesArrayTy &CurBasePointers,
8467                               MapValuesArrayTy &CurPointers,
8468                               MapValuesArrayTy &CurSizes,
8469                               MapFlagsArrayTy &CurMapTypes) const {
8470     bool IsImplicit = true;
8471     // Do the default mapping.
8472     if (CI.capturesThis()) {
8473       CurBasePointers.push_back(CV);
8474       CurPointers.push_back(CV);
8475       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8476       CurSizes.push_back(
8477           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8478                                     CGF.Int64Ty, /*isSigned=*/true));
8479       // Default map type.
8480       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8481     } else if (CI.capturesVariableByCopy()) {
8482       CurBasePointers.push_back(CV);
8483       CurPointers.push_back(CV);
8484       if (!RI.getType()->isAnyPointerType()) {
8485         // We have to signal to the runtime captures passed by value that are
8486         // not pointers.
8487         CurMapTypes.push_back(OMP_MAP_LITERAL);
8488         CurSizes.push_back(CGF.Builder.CreateIntCast(
8489             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8490       } else {
8491         // Pointers are implicitly mapped with a zero size and no flags
8492         // (other than first map that is added for all implicit maps).
8493         CurMapTypes.push_back(OMP_MAP_NONE);
8494         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8495       }
8496       const VarDecl *VD = CI.getCapturedVar();
8497       auto I = FirstPrivateDecls.find(VD);
8498       if (I != FirstPrivateDecls.end())
8499         IsImplicit = I->getSecond();
8500     } else {
8501       assert(CI.capturesVariable() && "Expected captured reference.");
8502       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8503       QualType ElementType = PtrTy->getPointeeType();
8504       CurSizes.push_back(CGF.Builder.CreateIntCast(
8505           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8506       // The default map type for a scalar/complex type is 'to' because by
8507       // default the value doesn't have to be retrieved. For an aggregate
8508       // type, the default is 'tofrom'.
8509       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8510       const VarDecl *VD = CI.getCapturedVar();
8511       auto I = FirstPrivateDecls.find(VD);
8512       if (I != FirstPrivateDecls.end() &&
8513           VD->getType().isConstant(CGF.getContext())) {
8514         llvm::Constant *Addr =
8515             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8516         // Copy the value of the original variable to the new global copy.
8517         CGF.Builder.CreateMemCpy(
8518             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8519             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8520             CurSizes.back(), /*IsVolatile=*/false);
8521         // Use new global variable as the base pointers.
8522         CurBasePointers.push_back(Addr);
8523         CurPointers.push_back(Addr);
8524       } else {
8525         CurBasePointers.push_back(CV);
8526         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8527           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8528               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8529               AlignmentSource::Decl));
8530           CurPointers.push_back(PtrAddr.getPointer());
8531         } else {
8532           CurPointers.push_back(CV);
8533         }
8534       }
8535       if (I != FirstPrivateDecls.end())
8536         IsImplicit = I->getSecond();
8537     }
8538     // Every default map produces a single argument which is a target parameter.
8539     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8540 
8541     // Add flag stating this is an implicit map.
8542     if (IsImplicit)
8543       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8544   }
8545 };
8546 } // anonymous namespace
8547 
8548 /// Emit the arrays used to pass the captures and map information to the
8549 /// offloading runtime library. If there is no map or capture information,
8550 /// return nullptr by reference.
8551 static void
8552 emitOffloadingArrays(CodeGenFunction &CGF,
8553                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8554                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8555                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8556                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8557                      CGOpenMPRuntime::TargetDataInfo &Info) {
8558   CodeGenModule &CGM = CGF.CGM;
8559   ASTContext &Ctx = CGF.getContext();
8560 
8561   // Reset the array information.
8562   Info.clearArrayInfo();
8563   Info.NumberOfPtrs = BasePointers.size();
8564 
8565   if (Info.NumberOfPtrs) {
8566     // Detect if we have any capture size requiring runtime evaluation of the
8567     // size so that a constant array could be eventually used.
8568     bool hasRuntimeEvaluationCaptureSize = false;
8569     for (llvm::Value *S : Sizes)
8570       if (!isa<llvm::Constant>(S)) {
8571         hasRuntimeEvaluationCaptureSize = true;
8572         break;
8573       }
8574 
8575     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8576     QualType PointerArrayType = Ctx.getConstantArrayType(
8577         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8578         /*IndexTypeQuals=*/0);
8579 
8580     Info.BasePointersArray =
8581         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8582     Info.PointersArray =
8583         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8584 
8585     // If we don't have any VLA types or other types that require runtime
8586     // evaluation, we can use a constant array for the map sizes, otherwise we
8587     // need to fill up the arrays as we do for the pointers.
8588     QualType Int64Ty =
8589         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8590     if (hasRuntimeEvaluationCaptureSize) {
8591       QualType SizeArrayType = Ctx.getConstantArrayType(
8592           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8593           /*IndexTypeQuals=*/0);
8594       Info.SizesArray =
8595           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8596     } else {
8597       // We expect all the sizes to be constant, so we collect them to create
8598       // a constant array.
8599       SmallVector<llvm::Constant *, 16> ConstSizes;
8600       for (llvm::Value *S : Sizes)
8601         ConstSizes.push_back(cast<llvm::Constant>(S));
8602 
8603       auto *SizesArrayInit = llvm::ConstantArray::get(
8604           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8605       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8606       auto *SizesArrayGbl = new llvm::GlobalVariable(
8607           CGM.getModule(), SizesArrayInit->getType(),
8608           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8609           SizesArrayInit, Name);
8610       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8611       Info.SizesArray = SizesArrayGbl;
8612     }
8613 
8614     // The map types are always constant so we don't need to generate code to
8615     // fill arrays. Instead, we create an array constant.
8616     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8617     llvm::copy(MapTypes, Mapping.begin());
8618     llvm::Constant *MapTypesArrayInit =
8619         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8620     std::string MaptypesName =
8621         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8622     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8623         CGM.getModule(), MapTypesArrayInit->getType(),
8624         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8625         MapTypesArrayInit, MaptypesName);
8626     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8627     Info.MapTypesArray = MapTypesArrayGbl;
8628 
8629     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8630       llvm::Value *BPVal = *BasePointers[I];
8631       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8632           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8633           Info.BasePointersArray, 0, I);
8634       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8635           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8636       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8637       CGF.Builder.CreateStore(BPVal, BPAddr);
8638 
8639       if (Info.requiresDevicePointerInfo())
8640         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8641           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8642 
8643       llvm::Value *PVal = Pointers[I];
8644       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8645           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8646           Info.PointersArray, 0, I);
8647       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8648           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8649       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8650       CGF.Builder.CreateStore(PVal, PAddr);
8651 
8652       if (hasRuntimeEvaluationCaptureSize) {
8653         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8654             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8655             Info.SizesArray,
8656             /*Idx0=*/0,
8657             /*Idx1=*/I);
8658         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8659         CGF.Builder.CreateStore(
8660             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8661             SAddr);
8662       }
8663     }
8664   }
8665 }
8666 
8667 /// Emit the arguments to be passed to the runtime library based on the
8668 /// arrays of pointers, sizes and map types.
8669 static void emitOffloadingArraysArgument(
8670     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8671     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8672     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8673   CodeGenModule &CGM = CGF.CGM;
8674   if (Info.NumberOfPtrs) {
8675     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8676         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8677         Info.BasePointersArray,
8678         /*Idx0=*/0, /*Idx1=*/0);
8679     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8680         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8681         Info.PointersArray,
8682         /*Idx0=*/0,
8683         /*Idx1=*/0);
8684     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8685         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8686         /*Idx0=*/0, /*Idx1=*/0);
8687     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8688         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8689         Info.MapTypesArray,
8690         /*Idx0=*/0,
8691         /*Idx1=*/0);
8692   } else {
8693     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8694     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8695     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8696     MapTypesArrayArg =
8697         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8698   }
8699 }
8700 
8701 /// Check for inner distribute directive.
8702 static const OMPExecutableDirective *
8703 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8704   const auto *CS = D.getInnermostCapturedStmt();
8705   const auto *Body =
8706       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8707   const Stmt *ChildStmt =
8708       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8709 
8710   if (const auto *NestedDir =
8711           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8712     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8713     switch (D.getDirectiveKind()) {
8714     case OMPD_target:
8715       if (isOpenMPDistributeDirective(DKind))
8716         return NestedDir;
8717       if (DKind == OMPD_teams) {
8718         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8719             /*IgnoreCaptured=*/true);
8720         if (!Body)
8721           return nullptr;
8722         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8723         if (const auto *NND =
8724                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8725           DKind = NND->getDirectiveKind();
8726           if (isOpenMPDistributeDirective(DKind))
8727             return NND;
8728         }
8729       }
8730       return nullptr;
8731     case OMPD_target_teams:
8732       if (isOpenMPDistributeDirective(DKind))
8733         return NestedDir;
8734       return nullptr;
8735     case OMPD_target_parallel:
8736     case OMPD_target_simd:
8737     case OMPD_target_parallel_for:
8738     case OMPD_target_parallel_for_simd:
8739       return nullptr;
8740     case OMPD_target_teams_distribute:
8741     case OMPD_target_teams_distribute_simd:
8742     case OMPD_target_teams_distribute_parallel_for:
8743     case OMPD_target_teams_distribute_parallel_for_simd:
8744     case OMPD_parallel:
8745     case OMPD_for:
8746     case OMPD_parallel_for:
8747     case OMPD_parallel_sections:
8748     case OMPD_for_simd:
8749     case OMPD_parallel_for_simd:
8750     case OMPD_cancel:
8751     case OMPD_cancellation_point:
8752     case OMPD_ordered:
8753     case OMPD_threadprivate:
8754     case OMPD_allocate:
8755     case OMPD_task:
8756     case OMPD_simd:
8757     case OMPD_sections:
8758     case OMPD_section:
8759     case OMPD_single:
8760     case OMPD_master:
8761     case OMPD_critical:
8762     case OMPD_taskyield:
8763     case OMPD_barrier:
8764     case OMPD_taskwait:
8765     case OMPD_taskgroup:
8766     case OMPD_atomic:
8767     case OMPD_flush:
8768     case OMPD_teams:
8769     case OMPD_target_data:
8770     case OMPD_target_exit_data:
8771     case OMPD_target_enter_data:
8772     case OMPD_distribute:
8773     case OMPD_distribute_simd:
8774     case OMPD_distribute_parallel_for:
8775     case OMPD_distribute_parallel_for_simd:
8776     case OMPD_teams_distribute:
8777     case OMPD_teams_distribute_simd:
8778     case OMPD_teams_distribute_parallel_for:
8779     case OMPD_teams_distribute_parallel_for_simd:
8780     case OMPD_target_update:
8781     case OMPD_declare_simd:
8782     case OMPD_declare_variant:
8783     case OMPD_declare_target:
8784     case OMPD_end_declare_target:
8785     case OMPD_declare_reduction:
8786     case OMPD_declare_mapper:
8787     case OMPD_taskloop:
8788     case OMPD_taskloop_simd:
8789     case OMPD_master_taskloop:
8790     case OMPD_parallel_master_taskloop:
8791     case OMPD_requires:
8792     case OMPD_unknown:
8793       llvm_unreachable("Unexpected directive.");
8794     }
8795   }
8796 
8797   return nullptr;
8798 }
8799 
8800 /// Emit the user-defined mapper function. The code generation follows the
8801 /// pattern in the example below.
8802 /// \code
8803 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8804 ///                                           void *base, void *begin,
8805 ///                                           int64_t size, int64_t type) {
8806 ///   // Allocate space for an array section first.
8807 ///   if (size > 1 && !maptype.IsDelete)
8808 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8809 ///                                 size*sizeof(Ty), clearToFrom(type));
8810 ///   // Map members.
8811 ///   for (unsigned i = 0; i < size; i++) {
8812 ///     // For each component specified by this mapper:
8813 ///     for (auto c : all_components) {
8814 ///       if (c.hasMapper())
8815 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8816 ///                       c.arg_type);
8817 ///       else
8818 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8819 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8820 ///     }
8821 ///   }
8822 ///   // Delete the array section.
8823 ///   if (size > 1 && maptype.IsDelete)
8824 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8825 ///                                 size*sizeof(Ty), clearToFrom(type));
8826 /// }
8827 /// \endcode
8828 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8829                                             CodeGenFunction *CGF) {
8830   if (UDMMap.count(D) > 0)
8831     return;
8832   ASTContext &C = CGM.getContext();
8833   QualType Ty = D->getType();
8834   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8835   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8836   auto *MapperVarDecl =
8837       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8838   SourceLocation Loc = D->getLocation();
8839   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8840 
8841   // Prepare mapper function arguments and attributes.
8842   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8843                               C.VoidPtrTy, ImplicitParamDecl::Other);
8844   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8845                             ImplicitParamDecl::Other);
8846   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8847                              C.VoidPtrTy, ImplicitParamDecl::Other);
8848   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8849                             ImplicitParamDecl::Other);
8850   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8851                             ImplicitParamDecl::Other);
8852   FunctionArgList Args;
8853   Args.push_back(&HandleArg);
8854   Args.push_back(&BaseArg);
8855   Args.push_back(&BeginArg);
8856   Args.push_back(&SizeArg);
8857   Args.push_back(&TypeArg);
8858   const CGFunctionInfo &FnInfo =
8859       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8860   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8861   SmallString<64> TyStr;
8862   llvm::raw_svector_ostream Out(TyStr);
8863   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8864   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8865   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8866                                     Name, &CGM.getModule());
8867   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8868   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8869   // Start the mapper function code generation.
8870   CodeGenFunction MapperCGF(CGM);
8871   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8872   // Compute the starting and end addreses of array elements.
8873   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8874       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8875       C.getPointerType(Int64Ty), Loc);
8876   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8877       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8878       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8879   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8880   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8881       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8882       C.getPointerType(Int64Ty), Loc);
8883   // Prepare common arguments for array initiation and deletion.
8884   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8885       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8886       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8887   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8888       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8889       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8890   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8891       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8892       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8893 
8894   // Emit array initiation if this is an array section and \p MapType indicates
8895   // that memory allocation is required.
8896   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8897   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8898                              ElementSize, HeadBB, /*IsInit=*/true);
8899 
8900   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8901 
8902   // Emit the loop header block.
8903   MapperCGF.EmitBlock(HeadBB);
8904   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8905   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8906   // Evaluate whether the initial condition is satisfied.
8907   llvm::Value *IsEmpty =
8908       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8909   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8910   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8911 
8912   // Emit the loop body block.
8913   MapperCGF.EmitBlock(BodyBB);
8914   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8915       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8916   PtrPHI->addIncoming(PtrBegin, EntryBB);
8917   Address PtrCurrent =
8918       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8919                           .getAlignment()
8920                           .alignmentOfArrayElement(ElementSize));
8921   // Privatize the declared variable of mapper to be the current array element.
8922   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8923   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8924     return MapperCGF
8925         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8926         .getAddress();
8927   });
8928   (void)Scope.Privatize();
8929 
8930   // Get map clause information. Fill up the arrays with all mapped variables.
8931   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8932   MappableExprsHandler::MapValuesArrayTy Pointers;
8933   MappableExprsHandler::MapValuesArrayTy Sizes;
8934   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8935   MappableExprsHandler MEHandler(*D, MapperCGF);
8936   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8937 
8938   // Call the runtime API __tgt_mapper_num_components to get the number of
8939   // pre-existing components.
8940   llvm::Value *OffloadingArgs[] = {Handle};
8941   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8942       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8943   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8944       PreviousSize,
8945       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8946 
8947   // Fill up the runtime mapper handle for all components.
8948   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8949     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8950         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8951     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8952         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8953     llvm::Value *CurSizeArg = Sizes[I];
8954 
8955     // Extract the MEMBER_OF field from the map type.
8956     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8957     MapperCGF.EmitBlock(MemberBB);
8958     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8959     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8960         OriMapType,
8961         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8962     llvm::BasicBlock *MemberCombineBB =
8963         MapperCGF.createBasicBlock("omp.member.combine");
8964     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8965     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8966     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8967     // Add the number of pre-existing components to the MEMBER_OF field if it
8968     // is valid.
8969     MapperCGF.EmitBlock(MemberCombineBB);
8970     llvm::Value *CombinedMember =
8971         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8972     // Do nothing if it is not a member of previous components.
8973     MapperCGF.EmitBlock(TypeBB);
8974     llvm::PHINode *MemberMapType =
8975         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
8976     MemberMapType->addIncoming(OriMapType, MemberBB);
8977     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
8978 
8979     // Combine the map type inherited from user-defined mapper with that
8980     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
8981     // bits of the \a MapType, which is the input argument of the mapper
8982     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
8983     // bits of MemberMapType.
8984     // [OpenMP 5.0], 1.2.6. map-type decay.
8985     //        | alloc |  to   | from  | tofrom | release | delete
8986     // ----------------------------------------------------------
8987     // alloc  | alloc | alloc | alloc | alloc  | release | delete
8988     // to     | alloc |  to   | alloc |   to   | release | delete
8989     // from   | alloc | alloc | from  |  from  | release | delete
8990     // tofrom | alloc |  to   | from  | tofrom | release | delete
8991     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
8992         MapType,
8993         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
8994                                    MappableExprsHandler::OMP_MAP_FROM));
8995     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
8996     llvm::BasicBlock *AllocElseBB =
8997         MapperCGF.createBasicBlock("omp.type.alloc.else");
8998     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
8999     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9000     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9001     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9002     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9003     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9004     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9005     MapperCGF.EmitBlock(AllocBB);
9006     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9007         MemberMapType,
9008         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9009                                      MappableExprsHandler::OMP_MAP_FROM)));
9010     MapperCGF.Builder.CreateBr(EndBB);
9011     MapperCGF.EmitBlock(AllocElseBB);
9012     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9013         LeftToFrom,
9014         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9015     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9016     // In case of to, clear OMP_MAP_FROM.
9017     MapperCGF.EmitBlock(ToBB);
9018     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9019         MemberMapType,
9020         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9021     MapperCGF.Builder.CreateBr(EndBB);
9022     MapperCGF.EmitBlock(ToElseBB);
9023     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9024         LeftToFrom,
9025         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9026     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9027     // In case of from, clear OMP_MAP_TO.
9028     MapperCGF.EmitBlock(FromBB);
9029     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9030         MemberMapType,
9031         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9032     // In case of tofrom, do nothing.
9033     MapperCGF.EmitBlock(EndBB);
9034     llvm::PHINode *CurMapType =
9035         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9036     CurMapType->addIncoming(AllocMapType, AllocBB);
9037     CurMapType->addIncoming(ToMapType, ToBB);
9038     CurMapType->addIncoming(FromMapType, FromBB);
9039     CurMapType->addIncoming(MemberMapType, ToElseBB);
9040 
9041     // TODO: call the corresponding mapper function if a user-defined mapper is
9042     // associated with this map clause.
9043     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9044     // data structure.
9045     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9046                                      CurSizeArg, CurMapType};
9047     MapperCGF.EmitRuntimeCall(
9048         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9049         OffloadingArgs);
9050   }
9051 
9052   // Update the pointer to point to the next element that needs to be mapped,
9053   // and check whether we have mapped all elements.
9054   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9055       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9056   PtrPHI->addIncoming(PtrNext, BodyBB);
9057   llvm::Value *IsDone =
9058       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9059   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9060   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9061 
9062   MapperCGF.EmitBlock(ExitBB);
9063   // Emit array deletion if this is an array section and \p MapType indicates
9064   // that deletion is required.
9065   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9066                              ElementSize, DoneBB, /*IsInit=*/false);
9067 
9068   // Emit the function exit block.
9069   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9070   MapperCGF.FinishFunction();
9071   UDMMap.try_emplace(D, Fn);
9072   if (CGF) {
9073     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9074     Decls.second.push_back(D);
9075   }
9076 }
9077 
9078 /// Emit the array initialization or deletion portion for user-defined mapper
9079 /// code generation. First, it evaluates whether an array section is mapped and
9080 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9081 /// true, and \a MapType indicates to not delete this array, array
9082 /// initialization code is generated. If \a IsInit is false, and \a MapType
9083 /// indicates to not this array, array deletion code is generated.
9084 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9085     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9086     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9087     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9088   StringRef Prefix = IsInit ? ".init" : ".del";
9089 
9090   // Evaluate if this is an array section.
9091   llvm::BasicBlock *IsDeleteBB =
9092       MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9093   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9094   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9095       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9096   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9097 
9098   // Evaluate if we are going to delete this section.
9099   MapperCGF.EmitBlock(IsDeleteBB);
9100   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9101       MapType,
9102       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9103   llvm::Value *DeleteCond;
9104   if (IsInit) {
9105     DeleteCond = MapperCGF.Builder.CreateIsNull(
9106         DeleteBit, "omp.array" + Prefix + ".delete");
9107   } else {
9108     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9109         DeleteBit, "omp.array" + Prefix + ".delete");
9110   }
9111   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9112 
9113   MapperCGF.EmitBlock(BodyBB);
9114   // Get the array size by multiplying element size and element number (i.e., \p
9115   // Size).
9116   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9117       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9118   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9119   // memory allocation/deletion purpose only.
9120   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9121       MapType,
9122       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9123                                    MappableExprsHandler::OMP_MAP_FROM)));
9124   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9125   // data structure.
9126   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9127   MapperCGF.EmitRuntimeCall(
9128       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9129 }
9130 
9131 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9132     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9133     llvm::Value *DeviceID,
9134     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9135                                      const OMPLoopDirective &D)>
9136         SizeEmitter) {
9137   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9138   const OMPExecutableDirective *TD = &D;
9139   // Get nested teams distribute kind directive, if any.
9140   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9141     TD = getNestedDistributeDirective(CGM.getContext(), D);
9142   if (!TD)
9143     return;
9144   const auto *LD = cast<OMPLoopDirective>(TD);
9145   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9146                                                      PrePostActionTy &) {
9147     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9148       llvm::Value *Args[] = {DeviceID, NumIterations};
9149       CGF.EmitRuntimeCall(
9150           createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9151     }
9152   };
9153   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9154 }
9155 
9156 void CGOpenMPRuntime::emitTargetCall(
9157     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9158     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9159     const Expr *Device,
9160     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9161                                      const OMPLoopDirective &D)>
9162         SizeEmitter) {
9163   if (!CGF.HaveInsertPoint())
9164     return;
9165 
9166   assert(OutlinedFn && "Invalid outlined function!");
9167 
9168   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9169   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9170   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9171   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9172                                             PrePostActionTy &) {
9173     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9174   };
9175   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9176 
9177   CodeGenFunction::OMPTargetDataInfo InputInfo;
9178   llvm::Value *MapTypesArray = nullptr;
9179   // Fill up the pointer arrays and transfer execution to the device.
9180   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9181                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9182                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9183     // On top of the arrays that were filled up, the target offloading call
9184     // takes as arguments the device id as well as the host pointer. The host
9185     // pointer is used by the runtime library to identify the current target
9186     // region, so it only has to be unique and not necessarily point to
9187     // anything. It could be the pointer to the outlined function that
9188     // implements the target region, but we aren't using that so that the
9189     // compiler doesn't need to keep that, and could therefore inline the host
9190     // function if proven worthwhile during optimization.
9191 
9192     // From this point on, we need to have an ID of the target region defined.
9193     assert(OutlinedFnID && "Invalid outlined function ID!");
9194 
9195     // Emit device ID if any.
9196     llvm::Value *DeviceID;
9197     if (Device) {
9198       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9199                                            CGF.Int64Ty, /*isSigned=*/true);
9200     } else {
9201       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9202     }
9203 
9204     // Emit the number of elements in the offloading arrays.
9205     llvm::Value *PointerNum =
9206         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9207 
9208     // Return value of the runtime offloading call.
9209     llvm::Value *Return;
9210 
9211     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9212     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9213 
9214     // Emit tripcount for the target loop-based directive.
9215     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9216 
9217     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9218     // The target region is an outlined function launched by the runtime
9219     // via calls __tgt_target() or __tgt_target_teams().
9220     //
9221     // __tgt_target() launches a target region with one team and one thread,
9222     // executing a serial region.  This master thread may in turn launch
9223     // more threads within its team upon encountering a parallel region,
9224     // however, no additional teams can be launched on the device.
9225     //
9226     // __tgt_target_teams() launches a target region with one or more teams,
9227     // each with one or more threads.  This call is required for target
9228     // constructs such as:
9229     //  'target teams'
9230     //  'target' / 'teams'
9231     //  'target teams distribute parallel for'
9232     //  'target parallel'
9233     // and so on.
9234     //
9235     // Note that on the host and CPU targets, the runtime implementation of
9236     // these calls simply call the outlined function without forking threads.
9237     // The outlined functions themselves have runtime calls to
9238     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9239     // the compiler in emitTeamsCall() and emitParallelCall().
9240     //
9241     // In contrast, on the NVPTX target, the implementation of
9242     // __tgt_target_teams() launches a GPU kernel with the requested number
9243     // of teams and threads so no additional calls to the runtime are required.
9244     if (NumTeams) {
9245       // If we have NumTeams defined this means that we have an enclosed teams
9246       // region. Therefore we also expect to have NumThreads defined. These two
9247       // values should be defined in the presence of a teams directive,
9248       // regardless of having any clauses associated. If the user is using teams
9249       // but no clauses, these two values will be the default that should be
9250       // passed to the runtime library - a 32-bit integer with the value zero.
9251       assert(NumThreads && "Thread limit expression should be available along "
9252                            "with number of teams.");
9253       llvm::Value *OffloadingArgs[] = {DeviceID,
9254                                        OutlinedFnID,
9255                                        PointerNum,
9256                                        InputInfo.BasePointersArray.getPointer(),
9257                                        InputInfo.PointersArray.getPointer(),
9258                                        InputInfo.SizesArray.getPointer(),
9259                                        MapTypesArray,
9260                                        NumTeams,
9261                                        NumThreads};
9262       Return = CGF.EmitRuntimeCall(
9263           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9264                                           : OMPRTL__tgt_target_teams),
9265           OffloadingArgs);
9266     } else {
9267       llvm::Value *OffloadingArgs[] = {DeviceID,
9268                                        OutlinedFnID,
9269                                        PointerNum,
9270                                        InputInfo.BasePointersArray.getPointer(),
9271                                        InputInfo.PointersArray.getPointer(),
9272                                        InputInfo.SizesArray.getPointer(),
9273                                        MapTypesArray};
9274       Return = CGF.EmitRuntimeCall(
9275           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9276                                           : OMPRTL__tgt_target),
9277           OffloadingArgs);
9278     }
9279 
9280     // Check the error code and execute the host version if required.
9281     llvm::BasicBlock *OffloadFailedBlock =
9282         CGF.createBasicBlock("omp_offload.failed");
9283     llvm::BasicBlock *OffloadContBlock =
9284         CGF.createBasicBlock("omp_offload.cont");
9285     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9286     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9287 
9288     CGF.EmitBlock(OffloadFailedBlock);
9289     if (RequiresOuterTask) {
9290       CapturedVars.clear();
9291       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9292     }
9293     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9294     CGF.EmitBranch(OffloadContBlock);
9295 
9296     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9297   };
9298 
9299   // Notify that the host version must be executed.
9300   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9301                     RequiresOuterTask](CodeGenFunction &CGF,
9302                                        PrePostActionTy &) {
9303     if (RequiresOuterTask) {
9304       CapturedVars.clear();
9305       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9306     }
9307     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9308   };
9309 
9310   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9311                           &CapturedVars, RequiresOuterTask,
9312                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9313     // Fill up the arrays with all the captured variables.
9314     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9315     MappableExprsHandler::MapValuesArrayTy Pointers;
9316     MappableExprsHandler::MapValuesArrayTy Sizes;
9317     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9318 
9319     // Get mappable expression information.
9320     MappableExprsHandler MEHandler(D, CGF);
9321     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9322 
9323     auto RI = CS.getCapturedRecordDecl()->field_begin();
9324     auto CV = CapturedVars.begin();
9325     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9326                                               CE = CS.capture_end();
9327          CI != CE; ++CI, ++RI, ++CV) {
9328       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9329       MappableExprsHandler::MapValuesArrayTy CurPointers;
9330       MappableExprsHandler::MapValuesArrayTy CurSizes;
9331       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9332       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9333 
9334       // VLA sizes are passed to the outlined region by copy and do not have map
9335       // information associated.
9336       if (CI->capturesVariableArrayType()) {
9337         CurBasePointers.push_back(*CV);
9338         CurPointers.push_back(*CV);
9339         CurSizes.push_back(CGF.Builder.CreateIntCast(
9340             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9341         // Copy to the device as an argument. No need to retrieve it.
9342         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9343                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9344                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9345       } else {
9346         // If we have any information in the map clause, we use it, otherwise we
9347         // just do a default mapping.
9348         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9349                                          CurSizes, CurMapTypes, PartialStruct);
9350         if (CurBasePointers.empty())
9351           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9352                                            CurPointers, CurSizes, CurMapTypes);
9353         // Generate correct mapping for variables captured by reference in
9354         // lambdas.
9355         if (CI->capturesVariable())
9356           MEHandler.generateInfoForLambdaCaptures(
9357               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9358               CurMapTypes, LambdaPointers);
9359       }
9360       // We expect to have at least an element of information for this capture.
9361       assert(!CurBasePointers.empty() &&
9362              "Non-existing map pointer for capture!");
9363       assert(CurBasePointers.size() == CurPointers.size() &&
9364              CurBasePointers.size() == CurSizes.size() &&
9365              CurBasePointers.size() == CurMapTypes.size() &&
9366              "Inconsistent map information sizes!");
9367 
9368       // If there is an entry in PartialStruct it means we have a struct with
9369       // individual members mapped. Emit an extra combined entry.
9370       if (PartialStruct.Base.isValid())
9371         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9372                                     CurMapTypes, PartialStruct);
9373 
9374       // We need to append the results of this capture to what we already have.
9375       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9376       Pointers.append(CurPointers.begin(), CurPointers.end());
9377       Sizes.append(CurSizes.begin(), CurSizes.end());
9378       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9379     }
9380     // Adjust MEMBER_OF flags for the lambdas captures.
9381     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9382                                               Pointers, MapTypes);
9383     // Map other list items in the map clause which are not captured variables
9384     // but "declare target link" global variables.
9385     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9386                                                MapTypes);
9387 
9388     TargetDataInfo Info;
9389     // Fill up the arrays and create the arguments.
9390     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9391     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9392                                  Info.PointersArray, Info.SizesArray,
9393                                  Info.MapTypesArray, Info);
9394     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9395     InputInfo.BasePointersArray =
9396         Address(Info.BasePointersArray, CGM.getPointerAlign());
9397     InputInfo.PointersArray =
9398         Address(Info.PointersArray, CGM.getPointerAlign());
9399     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9400     MapTypesArray = Info.MapTypesArray;
9401     if (RequiresOuterTask)
9402       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9403     else
9404       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9405   };
9406 
9407   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9408                              CodeGenFunction &CGF, PrePostActionTy &) {
9409     if (RequiresOuterTask) {
9410       CodeGenFunction::OMPTargetDataInfo InputInfo;
9411       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9412     } else {
9413       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9414     }
9415   };
9416 
9417   // If we have a target function ID it means that we need to support
9418   // offloading, otherwise, just execute on the host. We need to execute on host
9419   // regardless of the conditional in the if clause if, e.g., the user do not
9420   // specify target triples.
9421   if (OutlinedFnID) {
9422     if (IfCond) {
9423       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9424     } else {
9425       RegionCodeGenTy ThenRCG(TargetThenGen);
9426       ThenRCG(CGF);
9427     }
9428   } else {
9429     RegionCodeGenTy ElseRCG(TargetElseGen);
9430     ElseRCG(CGF);
9431   }
9432 }
9433 
9434 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9435                                                     StringRef ParentName) {
9436   if (!S)
9437     return;
9438 
9439   // Codegen OMP target directives that offload compute to the device.
9440   bool RequiresDeviceCodegen =
9441       isa<OMPExecutableDirective>(S) &&
9442       isOpenMPTargetExecutionDirective(
9443           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9444 
9445   if (RequiresDeviceCodegen) {
9446     const auto &E = *cast<OMPExecutableDirective>(S);
9447     unsigned DeviceID;
9448     unsigned FileID;
9449     unsigned Line;
9450     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9451                              FileID, Line);
9452 
9453     // Is this a target region that should not be emitted as an entry point? If
9454     // so just signal we are done with this target region.
9455     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9456                                                             ParentName, Line))
9457       return;
9458 
9459     switch (E.getDirectiveKind()) {
9460     case OMPD_target:
9461       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9462                                                    cast<OMPTargetDirective>(E));
9463       break;
9464     case OMPD_target_parallel:
9465       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9466           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9467       break;
9468     case OMPD_target_teams:
9469       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9470           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9471       break;
9472     case OMPD_target_teams_distribute:
9473       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9474           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9475       break;
9476     case OMPD_target_teams_distribute_simd:
9477       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9478           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9479       break;
9480     case OMPD_target_parallel_for:
9481       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9482           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9483       break;
9484     case OMPD_target_parallel_for_simd:
9485       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9486           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9487       break;
9488     case OMPD_target_simd:
9489       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9490           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9491       break;
9492     case OMPD_target_teams_distribute_parallel_for:
9493       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9494           CGM, ParentName,
9495           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9496       break;
9497     case OMPD_target_teams_distribute_parallel_for_simd:
9498       CodeGenFunction::
9499           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9500               CGM, ParentName,
9501               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9502       break;
9503     case OMPD_parallel:
9504     case OMPD_for:
9505     case OMPD_parallel_for:
9506     case OMPD_parallel_sections:
9507     case OMPD_for_simd:
9508     case OMPD_parallel_for_simd:
9509     case OMPD_cancel:
9510     case OMPD_cancellation_point:
9511     case OMPD_ordered:
9512     case OMPD_threadprivate:
9513     case OMPD_allocate:
9514     case OMPD_task:
9515     case OMPD_simd:
9516     case OMPD_sections:
9517     case OMPD_section:
9518     case OMPD_single:
9519     case OMPD_master:
9520     case OMPD_critical:
9521     case OMPD_taskyield:
9522     case OMPD_barrier:
9523     case OMPD_taskwait:
9524     case OMPD_taskgroup:
9525     case OMPD_atomic:
9526     case OMPD_flush:
9527     case OMPD_teams:
9528     case OMPD_target_data:
9529     case OMPD_target_exit_data:
9530     case OMPD_target_enter_data:
9531     case OMPD_distribute:
9532     case OMPD_distribute_simd:
9533     case OMPD_distribute_parallel_for:
9534     case OMPD_distribute_parallel_for_simd:
9535     case OMPD_teams_distribute:
9536     case OMPD_teams_distribute_simd:
9537     case OMPD_teams_distribute_parallel_for:
9538     case OMPD_teams_distribute_parallel_for_simd:
9539     case OMPD_target_update:
9540     case OMPD_declare_simd:
9541     case OMPD_declare_variant:
9542     case OMPD_declare_target:
9543     case OMPD_end_declare_target:
9544     case OMPD_declare_reduction:
9545     case OMPD_declare_mapper:
9546     case OMPD_taskloop:
9547     case OMPD_taskloop_simd:
9548     case OMPD_master_taskloop:
9549     case OMPD_parallel_master_taskloop:
9550     case OMPD_requires:
9551     case OMPD_unknown:
9552       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9553     }
9554     return;
9555   }
9556 
9557   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9558     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9559       return;
9560 
9561     scanForTargetRegionsFunctions(
9562         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9563     return;
9564   }
9565 
9566   // If this is a lambda function, look into its body.
9567   if (const auto *L = dyn_cast<LambdaExpr>(S))
9568     S = L->getBody();
9569 
9570   // Keep looking for target regions recursively.
9571   for (const Stmt *II : S->children())
9572     scanForTargetRegionsFunctions(II, ParentName);
9573 }
9574 
9575 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9576   // If emitting code for the host, we do not process FD here. Instead we do
9577   // the normal code generation.
9578   if (!CGM.getLangOpts().OpenMPIsDevice) {
9579     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9580       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9581           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9582       // Do not emit device_type(nohost) functions for the host.
9583       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9584         return true;
9585     }
9586     return false;
9587   }
9588 
9589   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9590   StringRef Name = CGM.getMangledName(GD);
9591   // Try to detect target regions in the function.
9592   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9593     scanForTargetRegionsFunctions(FD->getBody(), Name);
9594     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9595         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9596     // Do not emit device_type(nohost) functions for the host.
9597     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9598       return true;
9599   }
9600 
9601   // Do not to emit function if it is not marked as declare target.
9602   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9603          AlreadyEmittedTargetFunctions.count(Name) == 0;
9604 }
9605 
9606 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9607   if (!CGM.getLangOpts().OpenMPIsDevice)
9608     return false;
9609 
9610   // Check if there are Ctors/Dtors in this declaration and look for target
9611   // regions in it. We use the complete variant to produce the kernel name
9612   // mangling.
9613   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9614   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9615     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9616       StringRef ParentName =
9617           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9618       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9619     }
9620     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9621       StringRef ParentName =
9622           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9623       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9624     }
9625   }
9626 
9627   // Do not to emit variable if it is not marked as declare target.
9628   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9629       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9630           cast<VarDecl>(GD.getDecl()));
9631   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9632       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9633        HasRequiresUnifiedSharedMemory)) {
9634     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9635     return true;
9636   }
9637   return false;
9638 }
9639 
9640 llvm::Constant *
9641 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9642                                                 const VarDecl *VD) {
9643   assert(VD->getType().isConstant(CGM.getContext()) &&
9644          "Expected constant variable.");
9645   StringRef VarName;
9646   llvm::Constant *Addr;
9647   llvm::GlobalValue::LinkageTypes Linkage;
9648   QualType Ty = VD->getType();
9649   SmallString<128> Buffer;
9650   {
9651     unsigned DeviceID;
9652     unsigned FileID;
9653     unsigned Line;
9654     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9655                              FileID, Line);
9656     llvm::raw_svector_ostream OS(Buffer);
9657     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9658        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9659     VarName = OS.str();
9660   }
9661   Linkage = llvm::GlobalValue::InternalLinkage;
9662   Addr =
9663       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9664                                   getDefaultFirstprivateAddressSpace());
9665   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9666   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9667   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9668   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9669       VarName, Addr, VarSize,
9670       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9671   return Addr;
9672 }
9673 
9674 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9675                                                    llvm::Constant *Addr) {
9676   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9677       !CGM.getLangOpts().OpenMPIsDevice)
9678     return;
9679   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9680       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9681   if (!Res) {
9682     if (CGM.getLangOpts().OpenMPIsDevice) {
9683       // Register non-target variables being emitted in device code (debug info
9684       // may cause this).
9685       StringRef VarName = CGM.getMangledName(VD);
9686       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9687     }
9688     return;
9689   }
9690   // Register declare target variables.
9691   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9692   StringRef VarName;
9693   CharUnits VarSize;
9694   llvm::GlobalValue::LinkageTypes Linkage;
9695 
9696   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9697       !HasRequiresUnifiedSharedMemory) {
9698     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9699     VarName = CGM.getMangledName(VD);
9700     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9701       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9702       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9703     } else {
9704       VarSize = CharUnits::Zero();
9705     }
9706     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9707     // Temp solution to prevent optimizations of the internal variables.
9708     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9709       std::string RefName = getName({VarName, "ref"});
9710       if (!CGM.GetGlobalValue(RefName)) {
9711         llvm::Constant *AddrRef =
9712             getOrCreateInternalVariable(Addr->getType(), RefName);
9713         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9714         GVAddrRef->setConstant(/*Val=*/true);
9715         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9716         GVAddrRef->setInitializer(Addr);
9717         CGM.addCompilerUsedGlobal(GVAddrRef);
9718       }
9719     }
9720   } else {
9721     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9722             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9723              HasRequiresUnifiedSharedMemory)) &&
9724            "Declare target attribute must link or to with unified memory.");
9725     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9726       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9727     else
9728       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9729 
9730     if (CGM.getLangOpts().OpenMPIsDevice) {
9731       VarName = Addr->getName();
9732       Addr = nullptr;
9733     } else {
9734       VarName = getAddrOfDeclareTargetVar(VD).getName();
9735       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9736     }
9737     VarSize = CGM.getPointerSize();
9738     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9739   }
9740 
9741   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9742       VarName, Addr, VarSize, Flags, Linkage);
9743 }
9744 
9745 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9746   if (isa<FunctionDecl>(GD.getDecl()) ||
9747       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9748     return emitTargetFunctions(GD);
9749 
9750   return emitTargetGlobalVariable(GD);
9751 }
9752 
9753 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9754   for (const VarDecl *VD : DeferredGlobalVariables) {
9755     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9756         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9757     if (!Res)
9758       continue;
9759     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9760         !HasRequiresUnifiedSharedMemory) {
9761       CGM.EmitGlobal(VD);
9762     } else {
9763       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9764               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9765                HasRequiresUnifiedSharedMemory)) &&
9766              "Expected link clause or to clause with unified memory.");
9767       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9768     }
9769   }
9770 }
9771 
9772 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9773     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9774   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9775          " Expected target-based directive.");
9776 }
9777 
9778 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9779     const OMPRequiresDecl *D) {
9780   for (const OMPClause *Clause : D->clauselists()) {
9781     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9782       HasRequiresUnifiedSharedMemory = true;
9783       break;
9784     }
9785   }
9786 }
9787 
9788 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9789                                                        LangAS &AS) {
9790   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9791     return false;
9792   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9793   switch(A->getAllocatorType()) {
9794   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9795   // Not supported, fallback to the default mem space.
9796   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9797   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9798   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9799   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9800   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9801   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9802   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9803     AS = LangAS::Default;
9804     return true;
9805   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9806     llvm_unreachable("Expected predefined allocator for the variables with the "
9807                      "static storage.");
9808   }
9809   return false;
9810 }
9811 
9812 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9813   return HasRequiresUnifiedSharedMemory;
9814 }
9815 
9816 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9817     CodeGenModule &CGM)
9818     : CGM(CGM) {
9819   if (CGM.getLangOpts().OpenMPIsDevice) {
9820     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9821     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9822   }
9823 }
9824 
9825 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9826   if (CGM.getLangOpts().OpenMPIsDevice)
9827     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9828 }
9829 
9830 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9831   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9832     return true;
9833 
9834   StringRef Name = CGM.getMangledName(GD);
9835   const auto *D = cast<FunctionDecl>(GD.getDecl());
9836   // Do not to emit function if it is marked as declare target as it was already
9837   // emitted.
9838   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9839     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9840       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9841         return !F->isDeclaration();
9842       return false;
9843     }
9844     return true;
9845   }
9846 
9847   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9848 }
9849 
9850 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9851   // If we don't have entries or if we are emitting code for the device, we
9852   // don't need to do anything.
9853   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9854       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9855       (OffloadEntriesInfoManager.empty() &&
9856        !HasEmittedDeclareTargetRegion &&
9857        !HasEmittedTargetRegion))
9858     return nullptr;
9859 
9860   // Create and register the function that handles the requires directives.
9861   ASTContext &C = CGM.getContext();
9862 
9863   llvm::Function *RequiresRegFn;
9864   {
9865     CodeGenFunction CGF(CGM);
9866     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9867     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9868     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9869     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9870     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9871     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9872     // TODO: check for other requires clauses.
9873     // The requires directive takes effect only when a target region is
9874     // present in the compilation unit. Otherwise it is ignored and not
9875     // passed to the runtime. This avoids the runtime from throwing an error
9876     // for mismatching requires clauses across compilation units that don't
9877     // contain at least 1 target region.
9878     assert((HasEmittedTargetRegion ||
9879             HasEmittedDeclareTargetRegion ||
9880             !OffloadEntriesInfoManager.empty()) &&
9881            "Target or declare target region expected.");
9882     if (HasRequiresUnifiedSharedMemory)
9883       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9884     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9885         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9886     CGF.FinishFunction();
9887   }
9888   return RequiresRegFn;
9889 }
9890 
9891 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9892                                     const OMPExecutableDirective &D,
9893                                     SourceLocation Loc,
9894                                     llvm::Function *OutlinedFn,
9895                                     ArrayRef<llvm::Value *> CapturedVars) {
9896   if (!CGF.HaveInsertPoint())
9897     return;
9898 
9899   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9900   CodeGenFunction::RunCleanupsScope Scope(CGF);
9901 
9902   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9903   llvm::Value *Args[] = {
9904       RTLoc,
9905       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9906       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9907   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9908   RealArgs.append(std::begin(Args), std::end(Args));
9909   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9910 
9911   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9912   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9913 }
9914 
9915 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9916                                          const Expr *NumTeams,
9917                                          const Expr *ThreadLimit,
9918                                          SourceLocation Loc) {
9919   if (!CGF.HaveInsertPoint())
9920     return;
9921 
9922   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9923 
9924   llvm::Value *NumTeamsVal =
9925       NumTeams
9926           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9927                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9928           : CGF.Builder.getInt32(0);
9929 
9930   llvm::Value *ThreadLimitVal =
9931       ThreadLimit
9932           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9933                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9934           : CGF.Builder.getInt32(0);
9935 
9936   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9937   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9938                                      ThreadLimitVal};
9939   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9940                       PushNumTeamsArgs);
9941 }
9942 
9943 void CGOpenMPRuntime::emitTargetDataCalls(
9944     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9945     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9946   if (!CGF.HaveInsertPoint())
9947     return;
9948 
9949   // Action used to replace the default codegen action and turn privatization
9950   // off.
9951   PrePostActionTy NoPrivAction;
9952 
9953   // Generate the code for the opening of the data environment. Capture all the
9954   // arguments of the runtime call by reference because they are used in the
9955   // closing of the region.
9956   auto &&BeginThenGen = [this, &D, Device, &Info,
9957                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9958     // Fill up the arrays with all the mapped variables.
9959     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9960     MappableExprsHandler::MapValuesArrayTy Pointers;
9961     MappableExprsHandler::MapValuesArrayTy Sizes;
9962     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9963 
9964     // Get map clause information.
9965     MappableExprsHandler MCHandler(D, CGF);
9966     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9967 
9968     // Fill up the arrays and create the arguments.
9969     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9970 
9971     llvm::Value *BasePointersArrayArg = nullptr;
9972     llvm::Value *PointersArrayArg = nullptr;
9973     llvm::Value *SizesArrayArg = nullptr;
9974     llvm::Value *MapTypesArrayArg = nullptr;
9975     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9976                                  SizesArrayArg, MapTypesArrayArg, Info);
9977 
9978     // Emit device ID if any.
9979     llvm::Value *DeviceID = nullptr;
9980     if (Device) {
9981       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9982                                            CGF.Int64Ty, /*isSigned=*/true);
9983     } else {
9984       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9985     }
9986 
9987     // Emit the number of elements in the offloading arrays.
9988     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9989 
9990     llvm::Value *OffloadingArgs[] = {
9991         DeviceID,         PointerNum,    BasePointersArrayArg,
9992         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9993     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9994                         OffloadingArgs);
9995 
9996     // If device pointer privatization is required, emit the body of the region
9997     // here. It will have to be duplicated: with and without privatization.
9998     if (!Info.CaptureDeviceAddrMap.empty())
9999       CodeGen(CGF);
10000   };
10001 
10002   // Generate code for the closing of the data region.
10003   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10004                                             PrePostActionTy &) {
10005     assert(Info.isValid() && "Invalid data environment closing arguments.");
10006 
10007     llvm::Value *BasePointersArrayArg = nullptr;
10008     llvm::Value *PointersArrayArg = nullptr;
10009     llvm::Value *SizesArrayArg = nullptr;
10010     llvm::Value *MapTypesArrayArg = nullptr;
10011     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10012                                  SizesArrayArg, MapTypesArrayArg, Info);
10013 
10014     // Emit device ID if any.
10015     llvm::Value *DeviceID = nullptr;
10016     if (Device) {
10017       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10018                                            CGF.Int64Ty, /*isSigned=*/true);
10019     } else {
10020       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10021     }
10022 
10023     // Emit the number of elements in the offloading arrays.
10024     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10025 
10026     llvm::Value *OffloadingArgs[] = {
10027         DeviceID,         PointerNum,    BasePointersArrayArg,
10028         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10029     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10030                         OffloadingArgs);
10031   };
10032 
10033   // If we need device pointer privatization, we need to emit the body of the
10034   // region with no privatization in the 'else' branch of the conditional.
10035   // Otherwise, we don't have to do anything.
10036   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10037                                                          PrePostActionTy &) {
10038     if (!Info.CaptureDeviceAddrMap.empty()) {
10039       CodeGen.setAction(NoPrivAction);
10040       CodeGen(CGF);
10041     }
10042   };
10043 
10044   // We don't have to do anything to close the region if the if clause evaluates
10045   // to false.
10046   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10047 
10048   if (IfCond) {
10049     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10050   } else {
10051     RegionCodeGenTy RCG(BeginThenGen);
10052     RCG(CGF);
10053   }
10054 
10055   // If we don't require privatization of device pointers, we emit the body in
10056   // between the runtime calls. This avoids duplicating the body code.
10057   if (Info.CaptureDeviceAddrMap.empty()) {
10058     CodeGen.setAction(NoPrivAction);
10059     CodeGen(CGF);
10060   }
10061 
10062   if (IfCond) {
10063     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10064   } else {
10065     RegionCodeGenTy RCG(EndThenGen);
10066     RCG(CGF);
10067   }
10068 }
10069 
10070 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10071     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10072     const Expr *Device) {
10073   if (!CGF.HaveInsertPoint())
10074     return;
10075 
10076   assert((isa<OMPTargetEnterDataDirective>(D) ||
10077           isa<OMPTargetExitDataDirective>(D) ||
10078           isa<OMPTargetUpdateDirective>(D)) &&
10079          "Expecting either target enter, exit data, or update directives.");
10080 
10081   CodeGenFunction::OMPTargetDataInfo InputInfo;
10082   llvm::Value *MapTypesArray = nullptr;
10083   // Generate the code for the opening of the data environment.
10084   auto &&ThenGen = [this, &D, Device, &InputInfo,
10085                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10086     // Emit device ID if any.
10087     llvm::Value *DeviceID = nullptr;
10088     if (Device) {
10089       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10090                                            CGF.Int64Ty, /*isSigned=*/true);
10091     } else {
10092       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10093     }
10094 
10095     // Emit the number of elements in the offloading arrays.
10096     llvm::Constant *PointerNum =
10097         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10098 
10099     llvm::Value *OffloadingArgs[] = {DeviceID,
10100                                      PointerNum,
10101                                      InputInfo.BasePointersArray.getPointer(),
10102                                      InputInfo.PointersArray.getPointer(),
10103                                      InputInfo.SizesArray.getPointer(),
10104                                      MapTypesArray};
10105 
10106     // Select the right runtime function call for each expected standalone
10107     // directive.
10108     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10109     OpenMPRTLFunction RTLFn;
10110     switch (D.getDirectiveKind()) {
10111     case OMPD_target_enter_data:
10112       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10113                         : OMPRTL__tgt_target_data_begin;
10114       break;
10115     case OMPD_target_exit_data:
10116       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10117                         : OMPRTL__tgt_target_data_end;
10118       break;
10119     case OMPD_target_update:
10120       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10121                         : OMPRTL__tgt_target_data_update;
10122       break;
10123     case OMPD_parallel:
10124     case OMPD_for:
10125     case OMPD_parallel_for:
10126     case OMPD_parallel_sections:
10127     case OMPD_for_simd:
10128     case OMPD_parallel_for_simd:
10129     case OMPD_cancel:
10130     case OMPD_cancellation_point:
10131     case OMPD_ordered:
10132     case OMPD_threadprivate:
10133     case OMPD_allocate:
10134     case OMPD_task:
10135     case OMPD_simd:
10136     case OMPD_sections:
10137     case OMPD_section:
10138     case OMPD_single:
10139     case OMPD_master:
10140     case OMPD_critical:
10141     case OMPD_taskyield:
10142     case OMPD_barrier:
10143     case OMPD_taskwait:
10144     case OMPD_taskgroup:
10145     case OMPD_atomic:
10146     case OMPD_flush:
10147     case OMPD_teams:
10148     case OMPD_target_data:
10149     case OMPD_distribute:
10150     case OMPD_distribute_simd:
10151     case OMPD_distribute_parallel_for:
10152     case OMPD_distribute_parallel_for_simd:
10153     case OMPD_teams_distribute:
10154     case OMPD_teams_distribute_simd:
10155     case OMPD_teams_distribute_parallel_for:
10156     case OMPD_teams_distribute_parallel_for_simd:
10157     case OMPD_declare_simd:
10158     case OMPD_declare_variant:
10159     case OMPD_declare_target:
10160     case OMPD_end_declare_target:
10161     case OMPD_declare_reduction:
10162     case OMPD_declare_mapper:
10163     case OMPD_taskloop:
10164     case OMPD_taskloop_simd:
10165     case OMPD_master_taskloop:
10166     case OMPD_parallel_master_taskloop:
10167     case OMPD_target:
10168     case OMPD_target_simd:
10169     case OMPD_target_teams_distribute:
10170     case OMPD_target_teams_distribute_simd:
10171     case OMPD_target_teams_distribute_parallel_for:
10172     case OMPD_target_teams_distribute_parallel_for_simd:
10173     case OMPD_target_teams:
10174     case OMPD_target_parallel:
10175     case OMPD_target_parallel_for:
10176     case OMPD_target_parallel_for_simd:
10177     case OMPD_requires:
10178     case OMPD_unknown:
10179       llvm_unreachable("Unexpected standalone target data directive.");
10180       break;
10181     }
10182     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10183   };
10184 
10185   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10186                              CodeGenFunction &CGF, PrePostActionTy &) {
10187     // Fill up the arrays with all the mapped variables.
10188     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10189     MappableExprsHandler::MapValuesArrayTy Pointers;
10190     MappableExprsHandler::MapValuesArrayTy Sizes;
10191     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10192 
10193     // Get map clause information.
10194     MappableExprsHandler MEHandler(D, CGF);
10195     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10196 
10197     TargetDataInfo Info;
10198     // Fill up the arrays and create the arguments.
10199     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10200     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10201                                  Info.PointersArray, Info.SizesArray,
10202                                  Info.MapTypesArray, Info);
10203     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10204     InputInfo.BasePointersArray =
10205         Address(Info.BasePointersArray, CGM.getPointerAlign());
10206     InputInfo.PointersArray =
10207         Address(Info.PointersArray, CGM.getPointerAlign());
10208     InputInfo.SizesArray =
10209         Address(Info.SizesArray, CGM.getPointerAlign());
10210     MapTypesArray = Info.MapTypesArray;
10211     if (D.hasClausesOfKind<OMPDependClause>())
10212       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10213     else
10214       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10215   };
10216 
10217   if (IfCond) {
10218     emitOMPIfClause(CGF, IfCond, TargetThenGen,
10219                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
10220   } else {
10221     RegionCodeGenTy ThenRCG(TargetThenGen);
10222     ThenRCG(CGF);
10223   }
10224 }
10225 
10226 namespace {
10227   /// Kind of parameter in a function with 'declare simd' directive.
10228   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10229   /// Attribute set of the parameter.
10230   struct ParamAttrTy {
10231     ParamKindTy Kind = Vector;
10232     llvm::APSInt StrideOrArg;
10233     llvm::APSInt Alignment;
10234   };
10235 } // namespace
10236 
10237 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10238                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10239   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10240   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10241   // of that clause. The VLEN value must be power of 2.
10242   // In other case the notion of the function`s "characteristic data type" (CDT)
10243   // is used to compute the vector length.
10244   // CDT is defined in the following order:
10245   //   a) For non-void function, the CDT is the return type.
10246   //   b) If the function has any non-uniform, non-linear parameters, then the
10247   //   CDT is the type of the first such parameter.
10248   //   c) If the CDT determined by a) or b) above is struct, union, or class
10249   //   type which is pass-by-value (except for the type that maps to the
10250   //   built-in complex data type), the characteristic data type is int.
10251   //   d) If none of the above three cases is applicable, the CDT is int.
10252   // The VLEN is then determined based on the CDT and the size of vector
10253   // register of that ISA for which current vector version is generated. The
10254   // VLEN is computed using the formula below:
10255   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10256   // where vector register size specified in section 3.2.1 Registers and the
10257   // Stack Frame of original AMD64 ABI document.
10258   QualType RetType = FD->getReturnType();
10259   if (RetType.isNull())
10260     return 0;
10261   ASTContext &C = FD->getASTContext();
10262   QualType CDT;
10263   if (!RetType.isNull() && !RetType->isVoidType()) {
10264     CDT = RetType;
10265   } else {
10266     unsigned Offset = 0;
10267     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10268       if (ParamAttrs[Offset].Kind == Vector)
10269         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10270       ++Offset;
10271     }
10272     if (CDT.isNull()) {
10273       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10274         if (ParamAttrs[I + Offset].Kind == Vector) {
10275           CDT = FD->getParamDecl(I)->getType();
10276           break;
10277         }
10278       }
10279     }
10280   }
10281   if (CDT.isNull())
10282     CDT = C.IntTy;
10283   CDT = CDT->getCanonicalTypeUnqualified();
10284   if (CDT->isRecordType() || CDT->isUnionType())
10285     CDT = C.IntTy;
10286   return C.getTypeSize(CDT);
10287 }
10288 
10289 static void
10290 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10291                            const llvm::APSInt &VLENVal,
10292                            ArrayRef<ParamAttrTy> ParamAttrs,
10293                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10294   struct ISADataTy {
10295     char ISA;
10296     unsigned VecRegSize;
10297   };
10298   ISADataTy ISAData[] = {
10299       {
10300           'b', 128
10301       }, // SSE
10302       {
10303           'c', 256
10304       }, // AVX
10305       {
10306           'd', 256
10307       }, // AVX2
10308       {
10309           'e', 512
10310       }, // AVX512
10311   };
10312   llvm::SmallVector<char, 2> Masked;
10313   switch (State) {
10314   case OMPDeclareSimdDeclAttr::BS_Undefined:
10315     Masked.push_back('N');
10316     Masked.push_back('M');
10317     break;
10318   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10319     Masked.push_back('N');
10320     break;
10321   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10322     Masked.push_back('M');
10323     break;
10324   }
10325   for (char Mask : Masked) {
10326     for (const ISADataTy &Data : ISAData) {
10327       SmallString<256> Buffer;
10328       llvm::raw_svector_ostream Out(Buffer);
10329       Out << "_ZGV" << Data.ISA << Mask;
10330       if (!VLENVal) {
10331         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10332         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10333         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10334       } else {
10335         Out << VLENVal;
10336       }
10337       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10338         switch (ParamAttr.Kind){
10339         case LinearWithVarStride:
10340           Out << 's' << ParamAttr.StrideOrArg;
10341           break;
10342         case Linear:
10343           Out << 'l';
10344           if (!!ParamAttr.StrideOrArg)
10345             Out << ParamAttr.StrideOrArg;
10346           break;
10347         case Uniform:
10348           Out << 'u';
10349           break;
10350         case Vector:
10351           Out << 'v';
10352           break;
10353         }
10354         if (!!ParamAttr.Alignment)
10355           Out << 'a' << ParamAttr.Alignment;
10356       }
10357       Out << '_' << Fn->getName();
10358       Fn->addFnAttr(Out.str());
10359     }
10360   }
10361 }
10362 
10363 // This are the Functions that are needed to mangle the name of the
10364 // vector functions generated by the compiler, according to the rules
10365 // defined in the "Vector Function ABI specifications for AArch64",
10366 // available at
10367 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10368 
10369 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10370 ///
10371 /// TODO: Need to implement the behavior for reference marked with a
10372 /// var or no linear modifiers (1.b in the section). For this, we
10373 /// need to extend ParamKindTy to support the linear modifiers.
10374 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10375   QT = QT.getCanonicalType();
10376 
10377   if (QT->isVoidType())
10378     return false;
10379 
10380   if (Kind == ParamKindTy::Uniform)
10381     return false;
10382 
10383   if (Kind == ParamKindTy::Linear)
10384     return false;
10385 
10386   // TODO: Handle linear references with modifiers
10387 
10388   if (Kind == ParamKindTy::LinearWithVarStride)
10389     return false;
10390 
10391   return true;
10392 }
10393 
10394 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10395 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10396   QT = QT.getCanonicalType();
10397   unsigned Size = C.getTypeSize(QT);
10398 
10399   // Only scalars and complex within 16 bytes wide set PVB to true.
10400   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10401     return false;
10402 
10403   if (QT->isFloatingType())
10404     return true;
10405 
10406   if (QT->isIntegerType())
10407     return true;
10408 
10409   if (QT->isPointerType())
10410     return true;
10411 
10412   // TODO: Add support for complex types (section 3.1.2, item 2).
10413 
10414   return false;
10415 }
10416 
10417 /// Computes the lane size (LS) of a return type or of an input parameter,
10418 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10419 /// TODO: Add support for references, section 3.2.1, item 1.
10420 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10421   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10422     QualType PTy = QT.getCanonicalType()->getPointeeType();
10423     if (getAArch64PBV(PTy, C))
10424       return C.getTypeSize(PTy);
10425   }
10426   if (getAArch64PBV(QT, C))
10427     return C.getTypeSize(QT);
10428 
10429   return C.getTypeSize(C.getUIntPtrType());
10430 }
10431 
10432 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10433 // signature of the scalar function, as defined in 3.2.2 of the
10434 // AAVFABI.
10435 static std::tuple<unsigned, unsigned, bool>
10436 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10437   QualType RetType = FD->getReturnType().getCanonicalType();
10438 
10439   ASTContext &C = FD->getASTContext();
10440 
10441   bool OutputBecomesInput = false;
10442 
10443   llvm::SmallVector<unsigned, 8> Sizes;
10444   if (!RetType->isVoidType()) {
10445     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10446     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10447       OutputBecomesInput = true;
10448   }
10449   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10450     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10451     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10452   }
10453 
10454   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10455   // The LS of a function parameter / return value can only be a power
10456   // of 2, starting from 8 bits, up to 128.
10457   assert(std::all_of(Sizes.begin(), Sizes.end(),
10458                      [](unsigned Size) {
10459                        return Size == 8 || Size == 16 || Size == 32 ||
10460                               Size == 64 || Size == 128;
10461                      }) &&
10462          "Invalid size");
10463 
10464   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10465                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10466                          OutputBecomesInput);
10467 }
10468 
10469 /// Mangle the parameter part of the vector function name according to
10470 /// their OpenMP classification. The mangling function is defined in
10471 /// section 3.5 of the AAVFABI.
10472 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10473   SmallString<256> Buffer;
10474   llvm::raw_svector_ostream Out(Buffer);
10475   for (const auto &ParamAttr : ParamAttrs) {
10476     switch (ParamAttr.Kind) {
10477     case LinearWithVarStride:
10478       Out << "ls" << ParamAttr.StrideOrArg;
10479       break;
10480     case Linear:
10481       Out << 'l';
10482       // Don't print the step value if it is not present or if it is
10483       // equal to 1.
10484       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10485         Out << ParamAttr.StrideOrArg;
10486       break;
10487     case Uniform:
10488       Out << 'u';
10489       break;
10490     case Vector:
10491       Out << 'v';
10492       break;
10493     }
10494 
10495     if (!!ParamAttr.Alignment)
10496       Out << 'a' << ParamAttr.Alignment;
10497   }
10498 
10499   return Out.str();
10500 }
10501 
10502 // Function used to add the attribute. The parameter `VLEN` is
10503 // templated to allow the use of "x" when targeting scalable functions
10504 // for SVE.
10505 template <typename T>
10506 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10507                                  char ISA, StringRef ParSeq,
10508                                  StringRef MangledName, bool OutputBecomesInput,
10509                                  llvm::Function *Fn) {
10510   SmallString<256> Buffer;
10511   llvm::raw_svector_ostream Out(Buffer);
10512   Out << Prefix << ISA << LMask << VLEN;
10513   if (OutputBecomesInput)
10514     Out << "v";
10515   Out << ParSeq << "_" << MangledName;
10516   Fn->addFnAttr(Out.str());
10517 }
10518 
10519 // Helper function to generate the Advanced SIMD names depending on
10520 // the value of the NDS when simdlen is not present.
10521 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10522                                       StringRef Prefix, char ISA,
10523                                       StringRef ParSeq, StringRef MangledName,
10524                                       bool OutputBecomesInput,
10525                                       llvm::Function *Fn) {
10526   switch (NDS) {
10527   case 8:
10528     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10529                          OutputBecomesInput, Fn);
10530     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10531                          OutputBecomesInput, Fn);
10532     break;
10533   case 16:
10534     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10535                          OutputBecomesInput, Fn);
10536     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10537                          OutputBecomesInput, Fn);
10538     break;
10539   case 32:
10540     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10541                          OutputBecomesInput, Fn);
10542     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10543                          OutputBecomesInput, Fn);
10544     break;
10545   case 64:
10546   case 128:
10547     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10548                          OutputBecomesInput, Fn);
10549     break;
10550   default:
10551     llvm_unreachable("Scalar type is too wide.");
10552   }
10553 }
10554 
10555 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10556 static void emitAArch64DeclareSimdFunction(
10557     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10558     ArrayRef<ParamAttrTy> ParamAttrs,
10559     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10560     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10561 
10562   // Get basic data for building the vector signature.
10563   const auto Data = getNDSWDS(FD, ParamAttrs);
10564   const unsigned NDS = std::get<0>(Data);
10565   const unsigned WDS = std::get<1>(Data);
10566   const bool OutputBecomesInput = std::get<2>(Data);
10567 
10568   // Check the values provided via `simdlen` by the user.
10569   // 1. A `simdlen(1)` doesn't produce vector signatures,
10570   if (UserVLEN == 1) {
10571     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10572         DiagnosticsEngine::Warning,
10573         "The clause simdlen(1) has no effect when targeting aarch64.");
10574     CGM.getDiags().Report(SLoc, DiagID);
10575     return;
10576   }
10577 
10578   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10579   // Advanced SIMD output.
10580   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10581     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10582         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10583                                     "power of 2 when targeting Advanced SIMD.");
10584     CGM.getDiags().Report(SLoc, DiagID);
10585     return;
10586   }
10587 
10588   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10589   // limits.
10590   if (ISA == 's' && UserVLEN != 0) {
10591     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10592       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10593           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10594                                       "lanes in the architectural constraints "
10595                                       "for SVE (min is 128-bit, max is "
10596                                       "2048-bit, by steps of 128-bit)");
10597       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10598       return;
10599     }
10600   }
10601 
10602   // Sort out parameter sequence.
10603   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10604   StringRef Prefix = "_ZGV";
10605   // Generate simdlen from user input (if any).
10606   if (UserVLEN) {
10607     if (ISA == 's') {
10608       // SVE generates only a masked function.
10609       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10610                            OutputBecomesInput, Fn);
10611     } else {
10612       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10613       // Advanced SIMD generates one or two functions, depending on
10614       // the `[not]inbranch` clause.
10615       switch (State) {
10616       case OMPDeclareSimdDeclAttr::BS_Undefined:
10617         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10618                              OutputBecomesInput, Fn);
10619         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10620                              OutputBecomesInput, Fn);
10621         break;
10622       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10623         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10624                              OutputBecomesInput, Fn);
10625         break;
10626       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10627         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10628                              OutputBecomesInput, Fn);
10629         break;
10630       }
10631     }
10632   } else {
10633     // If no user simdlen is provided, follow the AAVFABI rules for
10634     // generating the vector length.
10635     if (ISA == 's') {
10636       // SVE, section 3.4.1, item 1.
10637       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10638                            OutputBecomesInput, Fn);
10639     } else {
10640       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10641       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10642       // two vector names depending on the use of the clause
10643       // `[not]inbranch`.
10644       switch (State) {
10645       case OMPDeclareSimdDeclAttr::BS_Undefined:
10646         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10647                                   OutputBecomesInput, Fn);
10648         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10649                                   OutputBecomesInput, Fn);
10650         break;
10651       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10652         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10653                                   OutputBecomesInput, Fn);
10654         break;
10655       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10656         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10657                                   OutputBecomesInput, Fn);
10658         break;
10659       }
10660     }
10661   }
10662 }
10663 
10664 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10665                                               llvm::Function *Fn) {
10666   ASTContext &C = CGM.getContext();
10667   FD = FD->getMostRecentDecl();
10668   // Map params to their positions in function decl.
10669   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10670   if (isa<CXXMethodDecl>(FD))
10671     ParamPositions.try_emplace(FD, 0);
10672   unsigned ParamPos = ParamPositions.size();
10673   for (const ParmVarDecl *P : FD->parameters()) {
10674     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10675     ++ParamPos;
10676   }
10677   while (FD) {
10678     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10679       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10680       // Mark uniform parameters.
10681       for (const Expr *E : Attr->uniforms()) {
10682         E = E->IgnoreParenImpCasts();
10683         unsigned Pos;
10684         if (isa<CXXThisExpr>(E)) {
10685           Pos = ParamPositions[FD];
10686         } else {
10687           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10688                                 ->getCanonicalDecl();
10689           Pos = ParamPositions[PVD];
10690         }
10691         ParamAttrs[Pos].Kind = Uniform;
10692       }
10693       // Get alignment info.
10694       auto NI = Attr->alignments_begin();
10695       for (const Expr *E : Attr->aligneds()) {
10696         E = E->IgnoreParenImpCasts();
10697         unsigned Pos;
10698         QualType ParmTy;
10699         if (isa<CXXThisExpr>(E)) {
10700           Pos = ParamPositions[FD];
10701           ParmTy = E->getType();
10702         } else {
10703           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10704                                 ->getCanonicalDecl();
10705           Pos = ParamPositions[PVD];
10706           ParmTy = PVD->getType();
10707         }
10708         ParamAttrs[Pos].Alignment =
10709             (*NI)
10710                 ? (*NI)->EvaluateKnownConstInt(C)
10711                 : llvm::APSInt::getUnsigned(
10712                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10713                           .getQuantity());
10714         ++NI;
10715       }
10716       // Mark linear parameters.
10717       auto SI = Attr->steps_begin();
10718       auto MI = Attr->modifiers_begin();
10719       for (const Expr *E : Attr->linears()) {
10720         E = E->IgnoreParenImpCasts();
10721         unsigned Pos;
10722         if (isa<CXXThisExpr>(E)) {
10723           Pos = ParamPositions[FD];
10724         } else {
10725           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10726                                 ->getCanonicalDecl();
10727           Pos = ParamPositions[PVD];
10728         }
10729         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10730         ParamAttr.Kind = Linear;
10731         if (*SI) {
10732           Expr::EvalResult Result;
10733           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10734             if (const auto *DRE =
10735                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10736               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10737                 ParamAttr.Kind = LinearWithVarStride;
10738                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10739                     ParamPositions[StridePVD->getCanonicalDecl()]);
10740               }
10741             }
10742           } else {
10743             ParamAttr.StrideOrArg = Result.Val.getInt();
10744           }
10745         }
10746         ++SI;
10747         ++MI;
10748       }
10749       llvm::APSInt VLENVal;
10750       SourceLocation ExprLoc;
10751       const Expr *VLENExpr = Attr->getSimdlen();
10752       if (VLENExpr) {
10753         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10754         ExprLoc = VLENExpr->getExprLoc();
10755       }
10756       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10757       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10758           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10759         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10760       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10761         unsigned VLEN = VLENVal.getExtValue();
10762         StringRef MangledName = Fn->getName();
10763         if (CGM.getTarget().hasFeature("sve"))
10764           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10765                                          MangledName, 's', 128, Fn, ExprLoc);
10766         if (CGM.getTarget().hasFeature("neon"))
10767           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10768                                          MangledName, 'n', 128, Fn, ExprLoc);
10769       }
10770     }
10771     FD = FD->getPreviousDecl();
10772   }
10773 }
10774 
10775 namespace {
10776 /// Cleanup action for doacross support.
10777 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10778 public:
10779   static const int DoacrossFinArgs = 2;
10780 
10781 private:
10782   llvm::FunctionCallee RTLFn;
10783   llvm::Value *Args[DoacrossFinArgs];
10784 
10785 public:
10786   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10787                     ArrayRef<llvm::Value *> CallArgs)
10788       : RTLFn(RTLFn) {
10789     assert(CallArgs.size() == DoacrossFinArgs);
10790     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10791   }
10792   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10793     if (!CGF.HaveInsertPoint())
10794       return;
10795     CGF.EmitRuntimeCall(RTLFn, Args);
10796   }
10797 };
10798 } // namespace
10799 
10800 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10801                                        const OMPLoopDirective &D,
10802                                        ArrayRef<Expr *> NumIterations) {
10803   if (!CGF.HaveInsertPoint())
10804     return;
10805 
10806   ASTContext &C = CGM.getContext();
10807   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10808   RecordDecl *RD;
10809   if (KmpDimTy.isNull()) {
10810     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10811     //  kmp_int64 lo; // lower
10812     //  kmp_int64 up; // upper
10813     //  kmp_int64 st; // stride
10814     // };
10815     RD = C.buildImplicitRecord("kmp_dim");
10816     RD->startDefinition();
10817     addFieldToRecordDecl(C, RD, Int64Ty);
10818     addFieldToRecordDecl(C, RD, Int64Ty);
10819     addFieldToRecordDecl(C, RD, Int64Ty);
10820     RD->completeDefinition();
10821     KmpDimTy = C.getRecordType(RD);
10822   } else {
10823     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10824   }
10825   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10826   QualType ArrayTy =
10827       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10828 
10829   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10830   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10831   enum { LowerFD = 0, UpperFD, StrideFD };
10832   // Fill dims with data.
10833   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10834     LValue DimsLVal = CGF.MakeAddrLValue(
10835         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10836     // dims.upper = num_iterations;
10837     LValue UpperLVal = CGF.EmitLValueForField(
10838         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10839     llvm::Value *NumIterVal =
10840         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10841                                  D.getNumIterations()->getType(), Int64Ty,
10842                                  D.getNumIterations()->getExprLoc());
10843     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10844     // dims.stride = 1;
10845     LValue StrideLVal = CGF.EmitLValueForField(
10846         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10847     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10848                           StrideLVal);
10849   }
10850 
10851   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10852   // kmp_int32 num_dims, struct kmp_dim * dims);
10853   llvm::Value *Args[] = {
10854       emitUpdateLocation(CGF, D.getBeginLoc()),
10855       getThreadID(CGF, D.getBeginLoc()),
10856       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10857       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10858           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10859           CGM.VoidPtrTy)};
10860 
10861   llvm::FunctionCallee RTLFn =
10862       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10863   CGF.EmitRuntimeCall(RTLFn, Args);
10864   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10865       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10866   llvm::FunctionCallee FiniRTLFn =
10867       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10868   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10869                                              llvm::makeArrayRef(FiniArgs));
10870 }
10871 
10872 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10873                                           const OMPDependClause *C) {
10874   QualType Int64Ty =
10875       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10876   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10877   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10878       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10879   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10880   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10881     const Expr *CounterVal = C->getLoopData(I);
10882     assert(CounterVal);
10883     llvm::Value *CntVal = CGF.EmitScalarConversion(
10884         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10885         CounterVal->getExprLoc());
10886     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10887                           /*Volatile=*/false, Int64Ty);
10888   }
10889   llvm::Value *Args[] = {
10890       emitUpdateLocation(CGF, C->getBeginLoc()),
10891       getThreadID(CGF, C->getBeginLoc()),
10892       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10893   llvm::FunctionCallee RTLFn;
10894   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10895     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10896   } else {
10897     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10898     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10899   }
10900   CGF.EmitRuntimeCall(RTLFn, Args);
10901 }
10902 
10903 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10904                                llvm::FunctionCallee Callee,
10905                                ArrayRef<llvm::Value *> Args) const {
10906   assert(Loc.isValid() && "Outlined function call location must be valid.");
10907   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10908 
10909   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10910     if (Fn->doesNotThrow()) {
10911       CGF.EmitNounwindRuntimeCall(Fn, Args);
10912       return;
10913     }
10914   }
10915   CGF.EmitRuntimeCall(Callee, Args);
10916 }
10917 
10918 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10919     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10920     ArrayRef<llvm::Value *> Args) const {
10921   emitCall(CGF, Loc, OutlinedFn, Args);
10922 }
10923 
10924 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10925   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10926     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10927       HasEmittedDeclareTargetRegion = true;
10928 }
10929 
10930 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10931                                              const VarDecl *NativeParam,
10932                                              const VarDecl *TargetParam) const {
10933   return CGF.GetAddrOfLocalVar(NativeParam);
10934 }
10935 
10936 namespace {
10937 /// Cleanup action for allocate support.
10938 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10939 public:
10940   static const int CleanupArgs = 3;
10941 
10942 private:
10943   llvm::FunctionCallee RTLFn;
10944   llvm::Value *Args[CleanupArgs];
10945 
10946 public:
10947   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10948                        ArrayRef<llvm::Value *> CallArgs)
10949       : RTLFn(RTLFn) {
10950     assert(CallArgs.size() == CleanupArgs &&
10951            "Size of arguments does not match.");
10952     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10953   }
10954   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10955     if (!CGF.HaveInsertPoint())
10956       return;
10957     CGF.EmitRuntimeCall(RTLFn, Args);
10958   }
10959 };
10960 } // namespace
10961 
10962 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10963                                                    const VarDecl *VD) {
10964   if (!VD)
10965     return Address::invalid();
10966   const VarDecl *CVD = VD->getCanonicalDecl();
10967   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10968     return Address::invalid();
10969   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10970   // Use the default allocation.
10971   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10972       !AA->getAllocator())
10973     return Address::invalid();
10974   llvm::Value *Size;
10975   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10976   if (CVD->getType()->isVariablyModifiedType()) {
10977     Size = CGF.getTypeSize(CVD->getType());
10978     // Align the size: ((size + align - 1) / align) * align
10979     Size = CGF.Builder.CreateNUWAdd(
10980         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10981     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10982     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10983   } else {
10984     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10985     Size = CGM.getSize(Sz.alignTo(Align));
10986   }
10987   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10988   assert(AA->getAllocator() &&
10989          "Expected allocator expression for non-default allocator.");
10990   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10991   // According to the standard, the original allocator type is a enum (integer).
10992   // Convert to pointer type, if required.
10993   if (Allocator->getType()->isIntegerTy())
10994     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10995   else if (Allocator->getType()->isPointerTy())
10996     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10997                                                                 CGM.VoidPtrTy);
10998   llvm::Value *Args[] = {ThreadID, Size, Allocator};
10999 
11000   llvm::Value *Addr =
11001       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11002                           CVD->getName() + ".void.addr");
11003   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11004                                                               Allocator};
11005   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11006 
11007   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11008                                                 llvm::makeArrayRef(FiniArgs));
11009   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11010       Addr,
11011       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11012       CVD->getName() + ".addr");
11013   return Address(Addr, Align);
11014 }
11015 
11016 /// Checks current context and returns true if it matches the context selector.
11017 template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet,
11018           OMPDeclareVariantAttr::CtxSelectorType Ctx>
11019 static bool checkContext(const OMPDeclareVariantAttr *A) {
11020   assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown &&
11021          Ctx != OMPDeclareVariantAttr::CtxUnknown &&
11022          "Unknown context selector or context selector set.");
11023   return false;
11024 }
11025 
11026 /// Checks for implementation={vendor(<vendor>)} context selector.
11027 /// \returns true iff <vendor>="llvm", false otherwise.
11028 template <>
11029 bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
11030                   OMPDeclareVariantAttr::CtxVendor>(
11031     const OMPDeclareVariantAttr *A) {
11032   return llvm::all_of(A->implVendors(),
11033                       [](StringRef S) { return !S.compare_lower("llvm"); });
11034 }
11035 
11036 static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) {
11037   // If both scores are unknown, choose the very first one.
11038   if (!LHS && !RHS)
11039     return true;
11040   // If only one is known, return this one.
11041   if (LHS && !RHS)
11042     return true;
11043   if (!LHS && RHS)
11044     return false;
11045   llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx);
11046   llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx);
11047   return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0;
11048 }
11049 
11050 namespace {
11051 /// Comparator for the priority queue for context selector.
11052 class OMPDeclareVariantAttrComparer
11053     : public std::greater<const OMPDeclareVariantAttr *> {
11054 private:
11055   ASTContext &Ctx;
11056 
11057 public:
11058   OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {}
11059   bool operator()(const OMPDeclareVariantAttr *LHS,
11060                   const OMPDeclareVariantAttr *RHS) const {
11061     const Expr *LHSExpr = nullptr;
11062     const Expr *RHSExpr = nullptr;
11063     if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
11064       LHSExpr = LHS->getScore();
11065     if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
11066       RHSExpr = RHS->getScore();
11067     return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
11068   }
11069 };
11070 } // anonymous namespace
11071 
11072 /// Finds the variant function that matches current context with its context
11073 /// selector.
11074 static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx,
11075                                                      const FunctionDecl *FD) {
11076   if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11077     return FD;
11078   // Iterate through all DeclareVariant attributes and check context selectors.
11079   auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS,
11080                            const OMPDeclareVariantAttr *RHS) {
11081     const Expr *LHSExpr = nullptr;
11082     const Expr *RHSExpr = nullptr;
11083     if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
11084       LHSExpr = LHS->getScore();
11085     if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
11086       RHSExpr = RHS->getScore();
11087     return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
11088   };
11089   const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11090   for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11091     const OMPDeclareVariantAttr *SelectedAttr = nullptr;
11092     switch (A->getCtxSelectorSet()) {
11093     case OMPDeclareVariantAttr::CtxSetImplementation:
11094       switch (A->getCtxSelector()) {
11095       case OMPDeclareVariantAttr::CtxVendor:
11096         if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
11097                          OMPDeclareVariantAttr::CtxVendor>(A))
11098           SelectedAttr = A;
11099         break;
11100       case OMPDeclareVariantAttr::CtxUnknown:
11101         llvm_unreachable(
11102             "Unknown context selector in implementation selector set.");
11103       }
11104       break;
11105     case OMPDeclareVariantAttr::CtxSetUnknown:
11106       llvm_unreachable("Unknown context selector set.");
11107     }
11108     // If the attribute matches the context, find the attribute with the highest
11109     // score.
11110     if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr)))
11111       TopMostAttr = SelectedAttr;
11112   }
11113   if (!TopMostAttr)
11114     return FD;
11115   return cast<FunctionDecl>(
11116       cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11117           ->getDecl());
11118 }
11119 
11120 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11121   const auto *D = cast<FunctionDecl>(GD.getDecl());
11122   // If the original function is defined already, use its definition.
11123   StringRef MangledName = CGM.getMangledName(GD);
11124   llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11125   if (Orig && !Orig->isDeclaration())
11126     return false;
11127   const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D);
11128   // Emit original function if it does not have declare variant attribute or the
11129   // context does not match.
11130   if (NewFD == D)
11131     return false;
11132   GlobalDecl NewGD = GD.getWithDecl(NewFD);
11133   if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11134     DeferredVariantFunction.erase(D);
11135     return true;
11136   }
11137   DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11138   return true;
11139 }
11140 
11141 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11142     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11143     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11144   llvm_unreachable("Not supported in SIMD-only mode");
11145 }
11146 
11147 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11148     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11149     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11150   llvm_unreachable("Not supported in SIMD-only mode");
11151 }
11152 
11153 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11154     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11155     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11156     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11157     bool Tied, unsigned &NumberOfParts) {
11158   llvm_unreachable("Not supported in SIMD-only mode");
11159 }
11160 
11161 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11162                                            SourceLocation Loc,
11163                                            llvm::Function *OutlinedFn,
11164                                            ArrayRef<llvm::Value *> CapturedVars,
11165                                            const Expr *IfCond) {
11166   llvm_unreachable("Not supported in SIMD-only mode");
11167 }
11168 
11169 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11170     CodeGenFunction &CGF, StringRef CriticalName,
11171     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11172     const Expr *Hint) {
11173   llvm_unreachable("Not supported in SIMD-only mode");
11174 }
11175 
11176 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11177                                            const RegionCodeGenTy &MasterOpGen,
11178                                            SourceLocation Loc) {
11179   llvm_unreachable("Not supported in SIMD-only mode");
11180 }
11181 
11182 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11183                                             SourceLocation Loc) {
11184   llvm_unreachable("Not supported in SIMD-only mode");
11185 }
11186 
11187 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11188     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11189     SourceLocation Loc) {
11190   llvm_unreachable("Not supported in SIMD-only mode");
11191 }
11192 
11193 void CGOpenMPSIMDRuntime::emitSingleRegion(
11194     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11195     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11196     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11197     ArrayRef<const Expr *> AssignmentOps) {
11198   llvm_unreachable("Not supported in SIMD-only mode");
11199 }
11200 
11201 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11202                                             const RegionCodeGenTy &OrderedOpGen,
11203                                             SourceLocation Loc,
11204                                             bool IsThreads) {
11205   llvm_unreachable("Not supported in SIMD-only mode");
11206 }
11207 
11208 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11209                                           SourceLocation Loc,
11210                                           OpenMPDirectiveKind Kind,
11211                                           bool EmitChecks,
11212                                           bool ForceSimpleCall) {
11213   llvm_unreachable("Not supported in SIMD-only mode");
11214 }
11215 
11216 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11217     CodeGenFunction &CGF, SourceLocation Loc,
11218     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11219     bool Ordered, const DispatchRTInput &DispatchValues) {
11220   llvm_unreachable("Not supported in SIMD-only mode");
11221 }
11222 
11223 void CGOpenMPSIMDRuntime::emitForStaticInit(
11224     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11225     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11226   llvm_unreachable("Not supported in SIMD-only mode");
11227 }
11228 
11229 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11230     CodeGenFunction &CGF, SourceLocation Loc,
11231     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11232   llvm_unreachable("Not supported in SIMD-only mode");
11233 }
11234 
11235 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11236                                                      SourceLocation Loc,
11237                                                      unsigned IVSize,
11238                                                      bool IVSigned) {
11239   llvm_unreachable("Not supported in SIMD-only mode");
11240 }
11241 
11242 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11243                                               SourceLocation Loc,
11244                                               OpenMPDirectiveKind DKind) {
11245   llvm_unreachable("Not supported in SIMD-only mode");
11246 }
11247 
11248 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11249                                               SourceLocation Loc,
11250                                               unsigned IVSize, bool IVSigned,
11251                                               Address IL, Address LB,
11252                                               Address UB, Address ST) {
11253   llvm_unreachable("Not supported in SIMD-only mode");
11254 }
11255 
11256 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11257                                                llvm::Value *NumThreads,
11258                                                SourceLocation Loc) {
11259   llvm_unreachable("Not supported in SIMD-only mode");
11260 }
11261 
11262 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11263                                              OpenMPProcBindClauseKind ProcBind,
11264                                              SourceLocation Loc) {
11265   llvm_unreachable("Not supported in SIMD-only mode");
11266 }
11267 
11268 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11269                                                     const VarDecl *VD,
11270                                                     Address VDAddr,
11271                                                     SourceLocation Loc) {
11272   llvm_unreachable("Not supported in SIMD-only mode");
11273 }
11274 
11275 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11276     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11277     CodeGenFunction *CGF) {
11278   llvm_unreachable("Not supported in SIMD-only mode");
11279 }
11280 
11281 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11282     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11283   llvm_unreachable("Not supported in SIMD-only mode");
11284 }
11285 
11286 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11287                                     ArrayRef<const Expr *> Vars,
11288                                     SourceLocation Loc) {
11289   llvm_unreachable("Not supported in SIMD-only mode");
11290 }
11291 
11292 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11293                                        const OMPExecutableDirective &D,
11294                                        llvm::Function *TaskFunction,
11295                                        QualType SharedsTy, Address Shareds,
11296                                        const Expr *IfCond,
11297                                        const OMPTaskDataTy &Data) {
11298   llvm_unreachable("Not supported in SIMD-only mode");
11299 }
11300 
11301 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11302     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11303     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11304     const Expr *IfCond, const OMPTaskDataTy &Data) {
11305   llvm_unreachable("Not supported in SIMD-only mode");
11306 }
11307 
11308 void CGOpenMPSIMDRuntime::emitReduction(
11309     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11310     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11311     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11312   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11313   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11314                                  ReductionOps, Options);
11315 }
11316 
11317 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11318     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11319     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11320   llvm_unreachable("Not supported in SIMD-only mode");
11321 }
11322 
11323 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11324                                                   SourceLocation Loc,
11325                                                   ReductionCodeGen &RCG,
11326                                                   unsigned N) {
11327   llvm_unreachable("Not supported in SIMD-only mode");
11328 }
11329 
11330 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11331                                                   SourceLocation Loc,
11332                                                   llvm::Value *ReductionsPtr,
11333                                                   LValue SharedLVal) {
11334   llvm_unreachable("Not supported in SIMD-only mode");
11335 }
11336 
11337 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11338                                            SourceLocation Loc) {
11339   llvm_unreachable("Not supported in SIMD-only mode");
11340 }
11341 
11342 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11343     CodeGenFunction &CGF, SourceLocation Loc,
11344     OpenMPDirectiveKind CancelRegion) {
11345   llvm_unreachable("Not supported in SIMD-only mode");
11346 }
11347 
11348 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11349                                          SourceLocation Loc, const Expr *IfCond,
11350                                          OpenMPDirectiveKind CancelRegion) {
11351   llvm_unreachable("Not supported in SIMD-only mode");
11352 }
11353 
11354 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11355     const OMPExecutableDirective &D, StringRef ParentName,
11356     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11357     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11358   llvm_unreachable("Not supported in SIMD-only mode");
11359 }
11360 
11361 void CGOpenMPSIMDRuntime::emitTargetCall(
11362     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11363     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11364     const Expr *Device,
11365     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11366                                      const OMPLoopDirective &D)>
11367         SizeEmitter) {
11368   llvm_unreachable("Not supported in SIMD-only mode");
11369 }
11370 
11371 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11372   llvm_unreachable("Not supported in SIMD-only mode");
11373 }
11374 
11375 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11376   llvm_unreachable("Not supported in SIMD-only mode");
11377 }
11378 
11379 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11380   return false;
11381 }
11382 
11383 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11384                                         const OMPExecutableDirective &D,
11385                                         SourceLocation Loc,
11386                                         llvm::Function *OutlinedFn,
11387                                         ArrayRef<llvm::Value *> CapturedVars) {
11388   llvm_unreachable("Not supported in SIMD-only mode");
11389 }
11390 
11391 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11392                                              const Expr *NumTeams,
11393                                              const Expr *ThreadLimit,
11394                                              SourceLocation Loc) {
11395   llvm_unreachable("Not supported in SIMD-only mode");
11396 }
11397 
11398 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11399     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11400     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11401   llvm_unreachable("Not supported in SIMD-only mode");
11402 }
11403 
11404 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11405     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11406     const Expr *Device) {
11407   llvm_unreachable("Not supported in SIMD-only mode");
11408 }
11409 
11410 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11411                                            const OMPLoopDirective &D,
11412                                            ArrayRef<Expr *> NumIterations) {
11413   llvm_unreachable("Not supported in SIMD-only mode");
11414 }
11415 
11416 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11417                                               const OMPDependClause *C) {
11418   llvm_unreachable("Not supported in SIMD-only mode");
11419 }
11420 
11421 const VarDecl *
11422 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11423                                         const VarDecl *NativeParam) const {
11424   llvm_unreachable("Not supported in SIMD-only mode");
11425 }
11426 
11427 Address
11428 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11429                                          const VarDecl *NativeParam,
11430                                          const VarDecl *TargetParam) const {
11431   llvm_unreachable("Not supported in SIMD-only mode");
11432 }
11433