1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558 
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705 
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
756   OMPRTL__tgt_mapper_num_components,
757   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
758   // *base, void *begin, int64_t size, int64_t type);
759   OMPRTL__tgt_push_mapper_component,
760 };
761 
762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
763 /// region.
764 class CleanupTy final : public EHScopeStack::Cleanup {
765   PrePostActionTy *Action;
766 
767 public:
768   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
769   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
770     if (!CGF.HaveInsertPoint())
771       return;
772     Action->Exit(CGF);
773   }
774 };
775 
776 } // anonymous namespace
777 
778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
779   CodeGenFunction::RunCleanupsScope Scope(CGF);
780   if (PrePostAction) {
781     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
782     Callback(CodeGen, CGF, *PrePostAction);
783   } else {
784     PrePostActionTy Action;
785     Callback(CodeGen, CGF, Action);
786   }
787 }
788 
789 /// Check if the combiner is a call to UDR combiner and if it is so return the
790 /// UDR decl used for reduction.
791 static const OMPDeclareReductionDecl *
792 getReductionInit(const Expr *ReductionOp) {
793   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
794     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
795       if (const auto *DRE =
796               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
797         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
798           return DRD;
799   return nullptr;
800 }
801 
802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
803                                              const OMPDeclareReductionDecl *DRD,
804                                              const Expr *InitOp,
805                                              Address Private, Address Original,
806                                              QualType Ty) {
807   if (DRD->getInitializer()) {
808     std::pair<llvm::Function *, llvm::Function *> Reduction =
809         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
810     const auto *CE = cast<CallExpr>(InitOp);
811     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
812     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
813     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
814     const auto *LHSDRE =
815         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
816     const auto *RHSDRE =
817         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
818     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
819     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
820                             [=]() { return Private; });
821     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
822                             [=]() { return Original; });
823     (void)PrivateScope.Privatize();
824     RValue Func = RValue::get(Reduction.second);
825     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
826     CGF.EmitIgnoredExpr(InitOp);
827   } else {
828     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
829     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
830     auto *GV = new llvm::GlobalVariable(
831         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
832         llvm::GlobalValue::PrivateLinkage, Init, Name);
833     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
834     RValue InitRVal;
835     switch (CGF.getEvaluationKind(Ty)) {
836     case TEK_Scalar:
837       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
838       break;
839     case TEK_Complex:
840       InitRVal =
841           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
842       break;
843     case TEK_Aggregate:
844       InitRVal = RValue::getAggregate(LV.getAddress());
845       break;
846     }
847     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
848     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
849     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
850                          /*IsInitializer=*/false);
851   }
852 }
853 
854 /// Emit initialization of arrays of complex types.
855 /// \param DestAddr Address of the array.
856 /// \param Type Type of array.
857 /// \param Init Initial expression of array.
858 /// \param SrcAddr Address of the original array.
859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
860                                  QualType Type, bool EmitDeclareReductionInit,
861                                  const Expr *Init,
862                                  const OMPDeclareReductionDecl *DRD,
863                                  Address SrcAddr = Address::invalid()) {
864   // Perform element-by-element initialization.
865   QualType ElementTy;
866 
867   // Drill down to the base element type on both arrays.
868   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
869   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
870   DestAddr =
871       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
872   if (DRD)
873     SrcAddr =
874         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
875 
876   llvm::Value *SrcBegin = nullptr;
877   if (DRD)
878     SrcBegin = SrcAddr.getPointer();
879   llvm::Value *DestBegin = DestAddr.getPointer();
880   // Cast from pointer to array type to pointer to single element.
881   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
882   // The basic structure here is a while-do loop.
883   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
884   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
885   llvm::Value *IsEmpty =
886       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
887   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
888 
889   // Enter the loop body, making that address the current address.
890   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
891   CGF.EmitBlock(BodyBB);
892 
893   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
894 
895   llvm::PHINode *SrcElementPHI = nullptr;
896   Address SrcElementCurrent = Address::invalid();
897   if (DRD) {
898     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
899                                           "omp.arraycpy.srcElementPast");
900     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
901     SrcElementCurrent =
902         Address(SrcElementPHI,
903                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
904   }
905   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
906       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
907   DestElementPHI->addIncoming(DestBegin, EntryBB);
908   Address DestElementCurrent =
909       Address(DestElementPHI,
910               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
911 
912   // Emit copy.
913   {
914     CodeGenFunction::RunCleanupsScope InitScope(CGF);
915     if (EmitDeclareReductionInit) {
916       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
917                                        SrcElementCurrent, ElementTy);
918     } else
919       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
920                            /*IsInitializer=*/false);
921   }
922 
923   if (DRD) {
924     // Shift the address forward by one element.
925     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
926         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
927     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
928   }
929 
930   // Shift the address forward by one element.
931   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
932       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
933   // Check whether we've reached the end.
934   llvm::Value *Done =
935       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
936   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
937   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
938 
939   // Done.
940   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
941 }
942 
943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
944   return CGF.EmitOMPSharedLValue(E);
945 }
946 
947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
948                                             const Expr *E) {
949   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
950     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
951   return LValue();
952 }
953 
954 void ReductionCodeGen::emitAggregateInitialization(
955     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
956     const OMPDeclareReductionDecl *DRD) {
957   // Emit VarDecl with copy init for arrays.
958   // Get the address of the original variable captured in current
959   // captured region.
960   const auto *PrivateVD =
961       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
962   bool EmitDeclareReductionInit =
963       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
964   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
965                        EmitDeclareReductionInit,
966                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
967                                                 : PrivateVD->getInit(),
968                        DRD, SharedLVal.getAddress());
969 }
970 
971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
972                                    ArrayRef<const Expr *> Privates,
973                                    ArrayRef<const Expr *> ReductionOps) {
974   ClausesData.reserve(Shareds.size());
975   SharedAddresses.reserve(Shareds.size());
976   Sizes.reserve(Shareds.size());
977   BaseDecls.reserve(Shareds.size());
978   auto IPriv = Privates.begin();
979   auto IRed = ReductionOps.begin();
980   for (const Expr *Ref : Shareds) {
981     ClausesData.emplace_back(Ref, *IPriv, *IRed);
982     std::advance(IPriv, 1);
983     std::advance(IRed, 1);
984   }
985 }
986 
987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
988   assert(SharedAddresses.size() == N &&
989          "Number of generated lvalues must be exactly N.");
990   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
991   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
992   SharedAddresses.emplace_back(First, Second);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
996   const auto *PrivateVD =
997       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
998   QualType PrivateType = PrivateVD->getType();
999   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     Sizes.emplace_back(
1002         CGF.getTypeSize(
1003             SharedAddresses[N].first.getType().getNonReferenceType()),
1004         nullptr);
1005     return;
1006   }
1007   llvm::Value *Size;
1008   llvm::Value *SizeInChars;
1009   auto *ElemType =
1010       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1011           ->getElementType();
1012   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1013   if (AsArraySection) {
1014     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1015                                      SharedAddresses[N].first.getPointer());
1016     Size = CGF.Builder.CreateNUWAdd(
1017         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1018     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1019   } else {
1020     SizeInChars = CGF.getTypeSize(
1021         SharedAddresses[N].first.getType().getNonReferenceType());
1022     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1023   }
1024   Sizes.emplace_back(SizeInChars, Size);
1025   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1026       CGF,
1027       cast<OpaqueValueExpr>(
1028           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1029       RValue::get(Size));
1030   CGF.EmitVariablyModifiedType(PrivateType);
1031 }
1032 
1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1034                                          llvm::Value *Size) {
1035   const auto *PrivateVD =
1036       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1037   QualType PrivateType = PrivateVD->getType();
1038   if (!PrivateType->isVariablyModifiedType()) {
1039     assert(!Size && !Sizes[N].second &&
1040            "Size should be nullptr for non-variably modified reduction "
1041            "items.");
1042     return;
1043   }
1044   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1045       CGF,
1046       cast<OpaqueValueExpr>(
1047           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1048       RValue::get(Size));
1049   CGF.EmitVariablyModifiedType(PrivateType);
1050 }
1051 
1052 void ReductionCodeGen::emitInitialization(
1053     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1054     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1055   assert(SharedAddresses.size() > N && "No variable was generated");
1056   const auto *PrivateVD =
1057       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1058   const OMPDeclareReductionDecl *DRD =
1059       getReductionInit(ClausesData[N].ReductionOp);
1060   QualType PrivateType = PrivateVD->getType();
1061   PrivateAddr = CGF.Builder.CreateElementBitCast(
1062       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1063   QualType SharedType = SharedAddresses[N].first.getType();
1064   SharedLVal = CGF.MakeAddrLValue(
1065       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1066                                        CGF.ConvertTypeForMem(SharedType)),
1067       SharedType, SharedAddresses[N].first.getBaseInfo(),
1068       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1069   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1070     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1071   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1072     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1073                                      PrivateAddr, SharedLVal.getAddress(),
1074                                      SharedLVal.getType());
1075   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1076              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1077     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1078                          PrivateVD->getType().getQualifiers(),
1079                          /*IsInitializer=*/false);
1080   }
1081 }
1082 
1083 bool ReductionCodeGen::needCleanups(unsigned N) {
1084   const auto *PrivateVD =
1085       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1086   QualType PrivateType = PrivateVD->getType();
1087   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1088   return DTorKind != QualType::DK_none;
1089 }
1090 
1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1092                                     Address PrivateAddr) {
1093   const auto *PrivateVD =
1094       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1095   QualType PrivateType = PrivateVD->getType();
1096   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1097   if (needCleanups(N)) {
1098     PrivateAddr = CGF.Builder.CreateElementBitCast(
1099         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1100     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1101   }
1102 }
1103 
1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1105                           LValue BaseLV) {
1106   BaseTy = BaseTy.getNonReferenceType();
1107   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1108          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1109     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1110       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1111     } else {
1112       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1113       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1114     }
1115     BaseTy = BaseTy->getPointeeType();
1116   }
1117   return CGF.MakeAddrLValue(
1118       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1119                                        CGF.ConvertTypeForMem(ElTy)),
1120       BaseLV.getType(), BaseLV.getBaseInfo(),
1121       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1122 }
1123 
1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1125                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1126                           llvm::Value *Addr) {
1127   Address Tmp = Address::invalid();
1128   Address TopTmp = Address::invalid();
1129   Address MostTopTmp = Address::invalid();
1130   BaseTy = BaseTy.getNonReferenceType();
1131   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1132          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1133     Tmp = CGF.CreateMemTemp(BaseTy);
1134     if (TopTmp.isValid())
1135       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1136     else
1137       MostTopTmp = Tmp;
1138     TopTmp = Tmp;
1139     BaseTy = BaseTy->getPointeeType();
1140   }
1141   llvm::Type *Ty = BaseLVType;
1142   if (Tmp.isValid())
1143     Ty = Tmp.getElementType();
1144   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1145   if (Tmp.isValid()) {
1146     CGF.Builder.CreateStore(Addr, Tmp);
1147     return MostTopTmp;
1148   }
1149   return Address(Addr, BaseLVAlignment);
1150 }
1151 
1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1153   const VarDecl *OrigVD = nullptr;
1154   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1155     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1156     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1157       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1158     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1159       Base = TempASE->getBase()->IgnoreParenImpCasts();
1160     DE = cast<DeclRefExpr>(Base);
1161     OrigVD = cast<VarDecl>(DE->getDecl());
1162   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1163     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1164     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1165       Base = TempASE->getBase()->IgnoreParenImpCasts();
1166     DE = cast<DeclRefExpr>(Base);
1167     OrigVD = cast<VarDecl>(DE->getDecl());
1168   }
1169   return OrigVD;
1170 }
1171 
1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1173                                                Address PrivateAddr) {
1174   const DeclRefExpr *DE;
1175   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1176     BaseDecls.emplace_back(OrigVD);
1177     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1178     LValue BaseLValue =
1179         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1180                     OriginalBaseLValue);
1181     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1182         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1183     llvm::Value *PrivatePointer =
1184         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1185             PrivateAddr.getPointer(),
1186             SharedAddresses[N].first.getAddress().getType());
1187     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1188     return castToBase(CGF, OrigVD->getType(),
1189                       SharedAddresses[N].first.getType(),
1190                       OriginalBaseLValue.getAddress().getType(),
1191                       OriginalBaseLValue.getAlignment(), Ptr);
1192   }
1193   BaseDecls.emplace_back(
1194       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1195   return PrivateAddr;
1196 }
1197 
1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1199   const OMPDeclareReductionDecl *DRD =
1200       getReductionInit(ClausesData[N].ReductionOp);
1201   return DRD && DRD->getInitializer();
1202 }
1203 
1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1205   return CGF.EmitLoadOfPointerLValue(
1206       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1207       getThreadIDVariable()->getType()->castAs<PointerType>());
1208 }
1209 
1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1211   if (!CGF.HaveInsertPoint())
1212     return;
1213   // 1.2.2 OpenMP Language Terminology
1214   // Structured block - An executable statement with a single entry at the
1215   // top and a single exit at the bottom.
1216   // The point of exit cannot be a branch out of the structured block.
1217   // longjmp() and throw() must not violate the entry/exit criteria.
1218   CGF.EHStack.pushTerminate();
1219   CodeGen(CGF);
1220   CGF.EHStack.popTerminate();
1221 }
1222 
1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1224     CodeGenFunction &CGF) {
1225   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1226                             getThreadIDVariable()->getType(),
1227                             AlignmentSource::Decl);
1228 }
1229 
1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1231                                        QualType FieldTy) {
1232   auto *Field = FieldDecl::Create(
1233       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1234       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1235       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1236   Field->setAccess(AS_public);
1237   DC->addDecl(Field);
1238   return Field;
1239 }
1240 
1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1242                                  StringRef Separator)
1243     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1244       OffloadEntriesInfoManager(CGM) {
1245   ASTContext &C = CGM.getContext();
1246   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1247   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1248   RD->startDefinition();
1249   // reserved_1
1250   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1251   // flags
1252   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253   // reserved_2
1254   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255   // reserved_3
1256   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257   // psource
1258   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1259   RD->completeDefinition();
1260   IdentQTy = C.getRecordType(RD);
1261   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1262   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1263 
1264   loadOffloadInfoMetadata();
1265 }
1266 
1267 void CGOpenMPRuntime::clear() {
1268   InternalVars.clear();
1269   // Clean non-target variable declarations possibly used only in debug info.
1270   for (const auto &Data : EmittedNonTargetVariables) {
1271     if (!Data.getValue().pointsToAliveValue())
1272       continue;
1273     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1274     if (!GV)
1275       continue;
1276     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1277       continue;
1278     GV->eraseFromParent();
1279   }
1280 }
1281 
1282 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1283   SmallString<128> Buffer;
1284   llvm::raw_svector_ostream OS(Buffer);
1285   StringRef Sep = FirstSeparator;
1286   for (StringRef Part : Parts) {
1287     OS << Sep << Part;
1288     Sep = Separator;
1289   }
1290   return OS.str();
1291 }
1292 
1293 static llvm::Function *
1294 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1295                           const Expr *CombinerInitializer, const VarDecl *In,
1296                           const VarDecl *Out, bool IsCombiner) {
1297   // void .omp_combiner.(Ty *in, Ty *out);
1298   ASTContext &C = CGM.getContext();
1299   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1300   FunctionArgList Args;
1301   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1302                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1303   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1304                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1305   Args.push_back(&OmpOutParm);
1306   Args.push_back(&OmpInParm);
1307   const CGFunctionInfo &FnInfo =
1308       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1309   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1310   std::string Name = CGM.getOpenMPRuntime().getName(
1311       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1312   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1313                                     Name, &CGM.getModule());
1314   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1315   if (CGM.getLangOpts().Optimize) {
1316     Fn->removeFnAttr(llvm::Attribute::NoInline);
1317     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1318     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1319   }
1320   CodeGenFunction CGF(CGM);
1321   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1322   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1323   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1324                     Out->getLocation());
1325   CodeGenFunction::OMPPrivateScope Scope(CGF);
1326   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1327   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1328     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1329         .getAddress();
1330   });
1331   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1332   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1333     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1334         .getAddress();
1335   });
1336   (void)Scope.Privatize();
1337   if (!IsCombiner && Out->hasInit() &&
1338       !CGF.isTrivialInitializer(Out->getInit())) {
1339     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1340                          Out->getType().getQualifiers(),
1341                          /*IsInitializer=*/true);
1342   }
1343   if (CombinerInitializer)
1344     CGF.EmitIgnoredExpr(CombinerInitializer);
1345   Scope.ForceCleanup();
1346   CGF.FinishFunction();
1347   return Fn;
1348 }
1349 
1350 void CGOpenMPRuntime::emitUserDefinedReduction(
1351     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1352   if (UDRMap.count(D) > 0)
1353     return;
1354   llvm::Function *Combiner = emitCombinerOrInitializer(
1355       CGM, D->getType(), D->getCombiner(),
1356       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1357       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1358       /*IsCombiner=*/true);
1359   llvm::Function *Initializer = nullptr;
1360   if (const Expr *Init = D->getInitializer()) {
1361     Initializer = emitCombinerOrInitializer(
1362         CGM, D->getType(),
1363         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1364                                                                      : nullptr,
1365         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1366         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1367         /*IsCombiner=*/false);
1368   }
1369   UDRMap.try_emplace(D, Combiner, Initializer);
1370   if (CGF) {
1371     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1372     Decls.second.push_back(D);
1373   }
1374 }
1375 
1376 std::pair<llvm::Function *, llvm::Function *>
1377 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1378   auto I = UDRMap.find(D);
1379   if (I != UDRMap.end())
1380     return I->second;
1381   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1382   return UDRMap.lookup(D);
1383 }
1384 
1385 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1386     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1387     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1388     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1389   assert(ThreadIDVar->getType()->isPointerType() &&
1390          "thread id variable must be of type kmp_int32 *");
1391   CodeGenFunction CGF(CGM, true);
1392   bool HasCancel = false;
1393   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1394     HasCancel = OPD->hasCancel();
1395   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1396     HasCancel = OPSD->hasCancel();
1397   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1398     HasCancel = OPFD->hasCancel();
1399   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1400     HasCancel = OPFD->hasCancel();
1401   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1402     HasCancel = OPFD->hasCancel();
1403   else if (const auto *OPFD =
1404                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1405     HasCancel = OPFD->hasCancel();
1406   else if (const auto *OPFD =
1407                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1408     HasCancel = OPFD->hasCancel();
1409   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1410                                     HasCancel, OutlinedHelperName);
1411   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1412   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1413 }
1414 
1415 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1416     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1417     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1418   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1419   return emitParallelOrTeamsOutlinedFunction(
1420       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1421 }
1422 
1423 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1424     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1425     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1426   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1427   return emitParallelOrTeamsOutlinedFunction(
1428       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1429 }
1430 
1431 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1432     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1433     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1434     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1435     bool Tied, unsigned &NumberOfParts) {
1436   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1437                                               PrePostActionTy &) {
1438     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1439     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1440     llvm::Value *TaskArgs[] = {
1441         UpLoc, ThreadID,
1442         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1443                                     TaskTVar->getType()->castAs<PointerType>())
1444             .getPointer()};
1445     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1446   };
1447   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1448                                                             UntiedCodeGen);
1449   CodeGen.setAction(Action);
1450   assert(!ThreadIDVar->getType()->isPointerType() &&
1451          "thread id variable must be of type kmp_int32 for tasks");
1452   const OpenMPDirectiveKind Region =
1453       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1454                                                       : OMPD_task;
1455   const CapturedStmt *CS = D.getCapturedStmt(Region);
1456   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1457   CodeGenFunction CGF(CGM, true);
1458   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1459                                         InnermostKind,
1460                                         TD ? TD->hasCancel() : false, Action);
1461   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1462   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1463   if (!Tied)
1464     NumberOfParts = Action.getNumberOfParts();
1465   return Res;
1466 }
1467 
1468 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1469                              const RecordDecl *RD, const CGRecordLayout &RL,
1470                              ArrayRef<llvm::Constant *> Data) {
1471   llvm::StructType *StructTy = RL.getLLVMType();
1472   unsigned PrevIdx = 0;
1473   ConstantInitBuilder CIBuilder(CGM);
1474   auto DI = Data.begin();
1475   for (const FieldDecl *FD : RD->fields()) {
1476     unsigned Idx = RL.getLLVMFieldNo(FD);
1477     // Fill the alignment.
1478     for (unsigned I = PrevIdx; I < Idx; ++I)
1479       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1480     PrevIdx = Idx + 1;
1481     Fields.add(*DI);
1482     ++DI;
1483   }
1484 }
1485 
1486 template <class... As>
1487 static llvm::GlobalVariable *
1488 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1489                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1490                    As &&... Args) {
1491   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1492   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1493   ConstantInitBuilder CIBuilder(CGM);
1494   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1495   buildStructValue(Fields, CGM, RD, RL, Data);
1496   return Fields.finishAndCreateGlobal(
1497       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1498       std::forward<As>(Args)...);
1499 }
1500 
1501 template <typename T>
1502 static void
1503 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1504                                          ArrayRef<llvm::Constant *> Data,
1505                                          T &Parent) {
1506   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1507   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1508   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1509   buildStructValue(Fields, CGM, RD, RL, Data);
1510   Fields.finishAndAddTo(Parent);
1511 }
1512 
1513 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1514   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1515   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1516   FlagsTy FlagsKey(Flags, Reserved2Flags);
1517   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1518   if (!Entry) {
1519     if (!DefaultOpenMPPSource) {
1520       // Initialize default location for psource field of ident_t structure of
1521       // all ident_t objects. Format is ";file;function;line;column;;".
1522       // Taken from
1523       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1524       DefaultOpenMPPSource =
1525           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1526       DefaultOpenMPPSource =
1527           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1528     }
1529 
1530     llvm::Constant *Data[] = {
1531         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1532         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1533         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1534         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1535     llvm::GlobalValue *DefaultOpenMPLocation =
1536         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1537                            llvm::GlobalValue::PrivateLinkage);
1538     DefaultOpenMPLocation->setUnnamedAddr(
1539         llvm::GlobalValue::UnnamedAddr::Global);
1540 
1541     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1542   }
1543   return Address(Entry, Align);
1544 }
1545 
1546 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1547                                              bool AtCurrentPoint) {
1548   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1549   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1550 
1551   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1552   if (AtCurrentPoint) {
1553     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1554         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1555   } else {
1556     Elem.second.ServiceInsertPt =
1557         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1558     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1559   }
1560 }
1561 
1562 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1563   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1564   if (Elem.second.ServiceInsertPt) {
1565     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1566     Elem.second.ServiceInsertPt = nullptr;
1567     Ptr->eraseFromParent();
1568   }
1569 }
1570 
1571 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1572                                                  SourceLocation Loc,
1573                                                  unsigned Flags) {
1574   Flags |= OMP_IDENT_KMPC;
1575   // If no debug info is generated - return global default location.
1576   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1577       Loc.isInvalid())
1578     return getOrCreateDefaultLocation(Flags).getPointer();
1579 
1580   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1581 
1582   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1583   Address LocValue = Address::invalid();
1584   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1585   if (I != OpenMPLocThreadIDMap.end())
1586     LocValue = Address(I->second.DebugLoc, Align);
1587 
1588   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1589   // GetOpenMPThreadID was called before this routine.
1590   if (!LocValue.isValid()) {
1591     // Generate "ident_t .kmpc_loc.addr;"
1592     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1593     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1594     Elem.second.DebugLoc = AI.getPointer();
1595     LocValue = AI;
1596 
1597     if (!Elem.second.ServiceInsertPt)
1598       setLocThreadIdInsertPt(CGF);
1599     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1600     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1601     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1602                              CGF.getTypeSize(IdentQTy));
1603   }
1604 
1605   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1606   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1607   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1608   LValue PSource =
1609       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1610 
1611   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1612   if (OMPDebugLoc == nullptr) {
1613     SmallString<128> Buffer2;
1614     llvm::raw_svector_ostream OS2(Buffer2);
1615     // Build debug location
1616     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1617     OS2 << ";" << PLoc.getFilename() << ";";
1618     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1619       OS2 << FD->getQualifiedNameAsString();
1620     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1621     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1622     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1623   }
1624   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1625   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1626 
1627   // Our callers always pass this to a runtime function, so for
1628   // convenience, go ahead and return a naked pointer.
1629   return LocValue.getPointer();
1630 }
1631 
1632 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1633                                           SourceLocation Loc) {
1634   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1635 
1636   llvm::Value *ThreadID = nullptr;
1637   // Check whether we've already cached a load of the thread id in this
1638   // function.
1639   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1640   if (I != OpenMPLocThreadIDMap.end()) {
1641     ThreadID = I->second.ThreadID;
1642     if (ThreadID != nullptr)
1643       return ThreadID;
1644   }
1645   // If exceptions are enabled, do not use parameter to avoid possible crash.
1646   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1647       !CGF.getLangOpts().CXXExceptions ||
1648       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1649     if (auto *OMPRegionInfo =
1650             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1651       if (OMPRegionInfo->getThreadIDVariable()) {
1652         // Check if this an outlined function with thread id passed as argument.
1653         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1654         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1655         // If value loaded in entry block, cache it and use it everywhere in
1656         // function.
1657         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1658           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1659           Elem.second.ThreadID = ThreadID;
1660         }
1661         return ThreadID;
1662       }
1663     }
1664   }
1665 
1666   // This is not an outlined function region - need to call __kmpc_int32
1667   // kmpc_global_thread_num(ident_t *loc).
1668   // Generate thread id value and cache this value for use across the
1669   // function.
1670   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671   if (!Elem.second.ServiceInsertPt)
1672     setLocThreadIdInsertPt(CGF);
1673   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1674   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1675   llvm::CallInst *Call = CGF.Builder.CreateCall(
1676       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1677       emitUpdateLocation(CGF, Loc));
1678   Call->setCallingConv(CGF.getRuntimeCC());
1679   Elem.second.ThreadID = Call;
1680   return Call;
1681 }
1682 
1683 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1684   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1685   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1686     clearLocThreadIdInsertPt(CGF);
1687     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1688   }
1689   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1690     for(auto *D : FunctionUDRMap[CGF.CurFn])
1691       UDRMap.erase(D);
1692     FunctionUDRMap.erase(CGF.CurFn);
1693   }
1694   auto I = FunctionUDMMap.find(CGF.CurFn);
1695   if (I != FunctionUDMMap.end()) {
1696     for(auto *D : I->second)
1697       UDMMap.erase(D);
1698     FunctionUDMMap.erase(I);
1699   }
1700 }
1701 
1702 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1703   return IdentTy->getPointerTo();
1704 }
1705 
1706 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1707   if (!Kmpc_MicroTy) {
1708     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1709     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1710                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1711     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1712   }
1713   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1714 }
1715 
1716 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1717   llvm::FunctionCallee RTLFn = nullptr;
1718   switch (static_cast<OpenMPRTLFunction>(Function)) {
1719   case OMPRTL__kmpc_fork_call: {
1720     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1721     // microtask, ...);
1722     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1723                                 getKmpc_MicroPointerTy()};
1724     auto *FnTy =
1725         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1726     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1727     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1728       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1729         llvm::LLVMContext &Ctx = F->getContext();
1730         llvm::MDBuilder MDB(Ctx);
1731         // Annotate the callback behavior of the __kmpc_fork_call:
1732         //  - The callback callee is argument number 2 (microtask).
1733         //  - The first two arguments of the callback callee are unknown (-1).
1734         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1735         //    callback callee.
1736         F->addMetadata(
1737             llvm::LLVMContext::MD_callback,
1738             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1739                                         2, {-1, -1},
1740                                         /* VarArgsArePassed */ true)}));
1741       }
1742     }
1743     break;
1744   }
1745   case OMPRTL__kmpc_global_thread_num: {
1746     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1747     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1751     break;
1752   }
1753   case OMPRTL__kmpc_threadprivate_cached: {
1754     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1755     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1756     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1757                                 CGM.VoidPtrTy, CGM.SizeTy,
1758                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1759     auto *FnTy =
1760         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1762     break;
1763   }
1764   case OMPRTL__kmpc_critical: {
1765     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit);
1767     llvm::Type *TypeParams[] = {
1768         getIdentTyPointerTy(), CGM.Int32Ty,
1769         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1773     break;
1774   }
1775   case OMPRTL__kmpc_critical_with_hint: {
1776     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1777     // kmp_critical_name *crit, uintptr_t hint);
1778     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1779                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1780                                 CGM.IntPtrTy};
1781     auto *FnTy =
1782         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1783     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1784     break;
1785   }
1786   case OMPRTL__kmpc_threadprivate_register: {
1787     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1788     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1789     // typedef void *(*kmpc_ctor)(void *);
1790     auto *KmpcCtorTy =
1791         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1792                                 /*isVarArg*/ false)->getPointerTo();
1793     // typedef void *(*kmpc_cctor)(void *, void *);
1794     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1795     auto *KmpcCopyCtorTy =
1796         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1797                                 /*isVarArg*/ false)
1798             ->getPointerTo();
1799     // typedef void (*kmpc_dtor)(void *);
1800     auto *KmpcDtorTy =
1801         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1802             ->getPointerTo();
1803     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1804                               KmpcCopyCtorTy, KmpcDtorTy};
1805     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1806                                         /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1808     break;
1809   }
1810   case OMPRTL__kmpc_end_critical: {
1811     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1812     // kmp_critical_name *crit);
1813     llvm::Type *TypeParams[] = {
1814         getIdentTyPointerTy(), CGM.Int32Ty,
1815         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1816     auto *FnTy =
1817         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1818     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1819     break;
1820   }
1821   case OMPRTL__kmpc_cancel_barrier: {
1822     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1823     // global_tid);
1824     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1825     auto *FnTy =
1826         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1827     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1828     break;
1829   }
1830   case OMPRTL__kmpc_barrier: {
1831     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1832     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1833     auto *FnTy =
1834         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1835     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1836     break;
1837   }
1838   case OMPRTL__kmpc_for_static_fini: {
1839     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1840     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1841     auto *FnTy =
1842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1843     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1844     break;
1845   }
1846   case OMPRTL__kmpc_push_num_threads: {
1847     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1848     // kmp_int32 num_threads)
1849     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1850                                 CGM.Int32Ty};
1851     auto *FnTy =
1852         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1853     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1854     break;
1855   }
1856   case OMPRTL__kmpc_serialized_parallel: {
1857     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1858     // global_tid);
1859     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1860     auto *FnTy =
1861         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1862     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1863     break;
1864   }
1865   case OMPRTL__kmpc_end_serialized_parallel: {
1866     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1867     // global_tid);
1868     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1869     auto *FnTy =
1870         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1871     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1872     break;
1873   }
1874   case OMPRTL__kmpc_flush: {
1875     // Build void __kmpc_flush(ident_t *loc);
1876     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1877     auto *FnTy =
1878         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1879     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1880     break;
1881   }
1882   case OMPRTL__kmpc_master: {
1883     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1884     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1885     auto *FnTy =
1886         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1887     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1888     break;
1889   }
1890   case OMPRTL__kmpc_end_master: {
1891     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1892     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1893     auto *FnTy =
1894         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1895     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1896     break;
1897   }
1898   case OMPRTL__kmpc_omp_taskyield: {
1899     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1900     // int end_part);
1901     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1902     auto *FnTy =
1903         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1904     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1905     break;
1906   }
1907   case OMPRTL__kmpc_single: {
1908     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1909     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1913     break;
1914   }
1915   case OMPRTL__kmpc_end_single: {
1916     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1917     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1918     auto *FnTy =
1919         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1920     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1921     break;
1922   }
1923   case OMPRTL__kmpc_omp_task_alloc: {
1924     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1925     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1926     // kmp_routine_entry_t *task_entry);
1927     assert(KmpRoutineEntryPtrTy != nullptr &&
1928            "Type kmp_routine_entry_t must be created.");
1929     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1930                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1931     // Return void * and then cast to particular kmp_task_t type.
1932     auto *FnTy =
1933         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1934     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1935     break;
1936   }
1937   case OMPRTL__kmpc_omp_target_task_alloc: {
1938     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1939     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1940     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1941     assert(KmpRoutineEntryPtrTy != nullptr &&
1942            "Type kmp_routine_entry_t must be created.");
1943     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1944                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1945                                 CGM.Int64Ty};
1946     // Return void * and then cast to particular kmp_task_t type.
1947     auto *FnTy =
1948         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1949     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1950     break;
1951   }
1952   case OMPRTL__kmpc_omp_task: {
1953     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1954     // *new_task);
1955     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1956                                 CGM.VoidPtrTy};
1957     auto *FnTy =
1958         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1959     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1960     break;
1961   }
1962   case OMPRTL__kmpc_copyprivate: {
1963     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1964     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1965     // kmp_int32 didit);
1966     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1967     auto *CpyFnTy =
1968         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1969     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1970                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1971                                 CGM.Int32Ty};
1972     auto *FnTy =
1973         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1974     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1975     break;
1976   }
1977   case OMPRTL__kmpc_reduce: {
1978     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1979     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1980     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1981     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1982     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1983                                                /*isVarArg=*/false);
1984     llvm::Type *TypeParams[] = {
1985         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1986         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1987         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1991     break;
1992   }
1993   case OMPRTL__kmpc_reduce_nowait: {
1994     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1995     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1996     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1997     // *lck);
1998     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1999     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2000                                                /*isVarArg=*/false);
2001     llvm::Type *TypeParams[] = {
2002         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2003         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2004         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2008     break;
2009   }
2010   case OMPRTL__kmpc_end_reduce: {
2011     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2012     // kmp_critical_name *lck);
2013     llvm::Type *TypeParams[] = {
2014         getIdentTyPointerTy(), CGM.Int32Ty,
2015         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2019     break;
2020   }
2021   case OMPRTL__kmpc_end_reduce_nowait: {
2022     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2023     // kmp_critical_name *lck);
2024     llvm::Type *TypeParams[] = {
2025         getIdentTyPointerTy(), CGM.Int32Ty,
2026         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2027     auto *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn =
2030         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2031     break;
2032   }
2033   case OMPRTL__kmpc_omp_task_begin_if0: {
2034     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035     // *new_task);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037                                 CGM.VoidPtrTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040     RTLFn =
2041         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2042     break;
2043   }
2044   case OMPRTL__kmpc_omp_task_complete_if0: {
2045     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2046     // *new_task);
2047     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2048                                 CGM.VoidPtrTy};
2049     auto *FnTy =
2050         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2051     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2052                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2053     break;
2054   }
2055   case OMPRTL__kmpc_ordered: {
2056     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2057     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2058     auto *FnTy =
2059         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2060     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2061     break;
2062   }
2063   case OMPRTL__kmpc_end_ordered: {
2064     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2065     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2066     auto *FnTy =
2067         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2068     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2069     break;
2070   }
2071   case OMPRTL__kmpc_omp_taskwait: {
2072     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2073     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2074     auto *FnTy =
2075         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2076     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2077     break;
2078   }
2079   case OMPRTL__kmpc_taskgroup: {
2080     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2081     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2082     auto *FnTy =
2083         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2084     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2085     break;
2086   }
2087   case OMPRTL__kmpc_end_taskgroup: {
2088     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2089     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2090     auto *FnTy =
2091         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2092     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2093     break;
2094   }
2095   case OMPRTL__kmpc_push_proc_bind: {
2096     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2097     // int proc_bind)
2098     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2099     auto *FnTy =
2100         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2102     break;
2103   }
2104   case OMPRTL__kmpc_omp_task_with_deps: {
2105     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2106     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2107     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2108     llvm::Type *TypeParams[] = {
2109         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2110         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2111     auto *FnTy =
2112         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2113     RTLFn =
2114         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2115     break;
2116   }
2117   case OMPRTL__kmpc_omp_wait_deps: {
2118     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2119     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2120     // kmp_depend_info_t *noalias_dep_list);
2121     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2122                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2123                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2124     auto *FnTy =
2125         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2126     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2127     break;
2128   }
2129   case OMPRTL__kmpc_cancellationpoint: {
2130     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2131     // global_tid, kmp_int32 cncl_kind)
2132     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2133     auto *FnTy =
2134         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2135     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2136     break;
2137   }
2138   case OMPRTL__kmpc_cancel: {
2139     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2140     // kmp_int32 cncl_kind)
2141     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2142     auto *FnTy =
2143         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2144     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2145     break;
2146   }
2147   case OMPRTL__kmpc_push_num_teams: {
2148     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2149     // kmp_int32 num_teams, kmp_int32 num_threads)
2150     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2151         CGM.Int32Ty};
2152     auto *FnTy =
2153         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2154     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2155     break;
2156   }
2157   case OMPRTL__kmpc_fork_teams: {
2158     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2159     // microtask, ...);
2160     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2161                                 getKmpc_MicroPointerTy()};
2162     auto *FnTy =
2163         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2164     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2165     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2166       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2167         llvm::LLVMContext &Ctx = F->getContext();
2168         llvm::MDBuilder MDB(Ctx);
2169         // Annotate the callback behavior of the __kmpc_fork_teams:
2170         //  - The callback callee is argument number 2 (microtask).
2171         //  - The first two arguments of the callback callee are unknown (-1).
2172         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2173         //    callback callee.
2174         F->addMetadata(
2175             llvm::LLVMContext::MD_callback,
2176             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2177                                         2, {-1, -1},
2178                                         /* VarArgsArePassed */ true)}));
2179       }
2180     }
2181     break;
2182   }
2183   case OMPRTL__kmpc_taskloop: {
2184     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2185     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2186     // sched, kmp_uint64 grainsize, void *task_dup);
2187     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2188                                 CGM.IntTy,
2189                                 CGM.VoidPtrTy,
2190                                 CGM.IntTy,
2191                                 CGM.Int64Ty->getPointerTo(),
2192                                 CGM.Int64Ty->getPointerTo(),
2193                                 CGM.Int64Ty,
2194                                 CGM.IntTy,
2195                                 CGM.IntTy,
2196                                 CGM.Int64Ty,
2197                                 CGM.VoidPtrTy};
2198     auto *FnTy =
2199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2201     break;
2202   }
2203   case OMPRTL__kmpc_doacross_init: {
2204     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2205     // num_dims, struct kmp_dim *dims);
2206     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2207                                 CGM.Int32Ty,
2208                                 CGM.Int32Ty,
2209                                 CGM.VoidPtrTy};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2212     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2213     break;
2214   }
2215   case OMPRTL__kmpc_doacross_fini: {
2216     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2217     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2218     auto *FnTy =
2219         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2220     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2221     break;
2222   }
2223   case OMPRTL__kmpc_doacross_post: {
2224     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2225     // *vec);
2226     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2227                                 CGM.Int64Ty->getPointerTo()};
2228     auto *FnTy =
2229         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2230     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2231     break;
2232   }
2233   case OMPRTL__kmpc_doacross_wait: {
2234     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2235     // *vec);
2236     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2237                                 CGM.Int64Ty->getPointerTo()};
2238     auto *FnTy =
2239         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2240     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2241     break;
2242   }
2243   case OMPRTL__kmpc_task_reduction_init: {
2244     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2245     // *data);
2246     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2247     auto *FnTy =
2248         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2249     RTLFn =
2250         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2251     break;
2252   }
2253   case OMPRTL__kmpc_task_reduction_get_th_data: {
2254     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2255     // *d);
2256     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2257     auto *FnTy =
2258         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2259     RTLFn = CGM.CreateRuntimeFunction(
2260         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2261     break;
2262   }
2263   case OMPRTL__kmpc_alloc: {
2264     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2265     // al); omp_allocator_handle_t type is void *.
2266     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2267     auto *FnTy =
2268         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2269     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2270     break;
2271   }
2272   case OMPRTL__kmpc_free: {
2273     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2274     // al); omp_allocator_handle_t type is void *.
2275     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2276     auto *FnTy =
2277         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2278     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2279     break;
2280   }
2281   case OMPRTL__kmpc_push_target_tripcount: {
2282     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2283     // size);
2284     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2285     llvm::FunctionType *FnTy =
2286         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2287     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2288     break;
2289   }
2290   case OMPRTL__tgt_target: {
2291     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2292     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2293     // *arg_types);
2294     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2295                                 CGM.VoidPtrTy,
2296                                 CGM.Int32Ty,
2297                                 CGM.VoidPtrPtrTy,
2298                                 CGM.VoidPtrPtrTy,
2299                                 CGM.Int64Ty->getPointerTo(),
2300                                 CGM.Int64Ty->getPointerTo()};
2301     auto *FnTy =
2302         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2303     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2304     break;
2305   }
2306   case OMPRTL__tgt_target_nowait: {
2307     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2308     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2309     // int64_t *arg_types);
2310     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2311                                 CGM.VoidPtrTy,
2312                                 CGM.Int32Ty,
2313                                 CGM.VoidPtrPtrTy,
2314                                 CGM.VoidPtrPtrTy,
2315                                 CGM.Int64Ty->getPointerTo(),
2316                                 CGM.Int64Ty->getPointerTo()};
2317     auto *FnTy =
2318         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2319     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2320     break;
2321   }
2322   case OMPRTL__tgt_target_teams: {
2323     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2324     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2325     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2326     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2327                                 CGM.VoidPtrTy,
2328                                 CGM.Int32Ty,
2329                                 CGM.VoidPtrPtrTy,
2330                                 CGM.VoidPtrPtrTy,
2331                                 CGM.Int64Ty->getPointerTo(),
2332                                 CGM.Int64Ty->getPointerTo(),
2333                                 CGM.Int32Ty,
2334                                 CGM.Int32Ty};
2335     auto *FnTy =
2336         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2337     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2338     break;
2339   }
2340   case OMPRTL__tgt_target_teams_nowait: {
2341     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2342     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2343     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2344     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2345                                 CGM.VoidPtrTy,
2346                                 CGM.Int32Ty,
2347                                 CGM.VoidPtrPtrTy,
2348                                 CGM.VoidPtrPtrTy,
2349                                 CGM.Int64Ty->getPointerTo(),
2350                                 CGM.Int64Ty->getPointerTo(),
2351                                 CGM.Int32Ty,
2352                                 CGM.Int32Ty};
2353     auto *FnTy =
2354         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2355     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2356     break;
2357   }
2358   case OMPRTL__tgt_register_requires: {
2359     // Build void __tgt_register_requires(int64_t flags);
2360     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2361     auto *FnTy =
2362         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2363     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2364     break;
2365   }
2366   case OMPRTL__tgt_register_lib: {
2367     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2368     QualType ParamTy =
2369         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2370     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2371     auto *FnTy =
2372         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2373     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2374     break;
2375   }
2376   case OMPRTL__tgt_unregister_lib: {
2377     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2378     QualType ParamTy =
2379         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2380     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2381     auto *FnTy =
2382         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2383     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2384     break;
2385   }
2386   case OMPRTL__tgt_target_data_begin: {
2387     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2388     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2389     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2390                                 CGM.Int32Ty,
2391                                 CGM.VoidPtrPtrTy,
2392                                 CGM.VoidPtrPtrTy,
2393                                 CGM.Int64Ty->getPointerTo(),
2394                                 CGM.Int64Ty->getPointerTo()};
2395     auto *FnTy =
2396         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2397     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2398     break;
2399   }
2400   case OMPRTL__tgt_target_data_begin_nowait: {
2401     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2402     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2403     // *arg_types);
2404     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2405                                 CGM.Int32Ty,
2406                                 CGM.VoidPtrPtrTy,
2407                                 CGM.VoidPtrPtrTy,
2408                                 CGM.Int64Ty->getPointerTo(),
2409                                 CGM.Int64Ty->getPointerTo()};
2410     auto *FnTy =
2411         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2412     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2413     break;
2414   }
2415   case OMPRTL__tgt_target_data_end: {
2416     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2417     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2418     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2419                                 CGM.Int32Ty,
2420                                 CGM.VoidPtrPtrTy,
2421                                 CGM.VoidPtrPtrTy,
2422                                 CGM.Int64Ty->getPointerTo(),
2423                                 CGM.Int64Ty->getPointerTo()};
2424     auto *FnTy =
2425         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2426     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2427     break;
2428   }
2429   case OMPRTL__tgt_target_data_end_nowait: {
2430     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2431     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2432     // *arg_types);
2433     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2434                                 CGM.Int32Ty,
2435                                 CGM.VoidPtrPtrTy,
2436                                 CGM.VoidPtrPtrTy,
2437                                 CGM.Int64Ty->getPointerTo(),
2438                                 CGM.Int64Ty->getPointerTo()};
2439     auto *FnTy =
2440         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2441     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2442     break;
2443   }
2444   case OMPRTL__tgt_target_data_update: {
2445     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2446     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2447     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2448                                 CGM.Int32Ty,
2449                                 CGM.VoidPtrPtrTy,
2450                                 CGM.VoidPtrPtrTy,
2451                                 CGM.Int64Ty->getPointerTo(),
2452                                 CGM.Int64Ty->getPointerTo()};
2453     auto *FnTy =
2454         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2455     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2456     break;
2457   }
2458   case OMPRTL__tgt_target_data_update_nowait: {
2459     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2460     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2461     // *arg_types);
2462     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2463                                 CGM.Int32Ty,
2464                                 CGM.VoidPtrPtrTy,
2465                                 CGM.VoidPtrPtrTy,
2466                                 CGM.Int64Ty->getPointerTo(),
2467                                 CGM.Int64Ty->getPointerTo()};
2468     auto *FnTy =
2469         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2470     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2471     break;
2472   }
2473   case OMPRTL__tgt_mapper_num_components: {
2474     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2475     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2476     auto *FnTy =
2477         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2478     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2479     break;
2480   }
2481   case OMPRTL__tgt_push_mapper_component: {
2482     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2483     // *base, void *begin, int64_t size, int64_t type);
2484     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2485                                 CGM.Int64Ty, CGM.Int64Ty};
2486     auto *FnTy =
2487         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2488     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2489     break;
2490   }
2491   }
2492   assert(RTLFn && "Unable to find OpenMP runtime function");
2493   return RTLFn;
2494 }
2495 
2496 llvm::FunctionCallee
2497 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2498   assert((IVSize == 32 || IVSize == 64) &&
2499          "IV size is not compatible with the omp runtime");
2500   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2501                                             : "__kmpc_for_static_init_4u")
2502                                 : (IVSigned ? "__kmpc_for_static_init_8"
2503                                             : "__kmpc_for_static_init_8u");
2504   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2505   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2506   llvm::Type *TypeParams[] = {
2507     getIdentTyPointerTy(),                     // loc
2508     CGM.Int32Ty,                               // tid
2509     CGM.Int32Ty,                               // schedtype
2510     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2511     PtrTy,                                     // p_lower
2512     PtrTy,                                     // p_upper
2513     PtrTy,                                     // p_stride
2514     ITy,                                       // incr
2515     ITy                                        // chunk
2516   };
2517   auto *FnTy =
2518       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519   return CGM.CreateRuntimeFunction(FnTy, Name);
2520 }
2521 
2522 llvm::FunctionCallee
2523 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2524   assert((IVSize == 32 || IVSize == 64) &&
2525          "IV size is not compatible with the omp runtime");
2526   StringRef Name =
2527       IVSize == 32
2528           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2529           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2530   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2531   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2532                                CGM.Int32Ty,           // tid
2533                                CGM.Int32Ty,           // schedtype
2534                                ITy,                   // lower
2535                                ITy,                   // upper
2536                                ITy,                   // stride
2537                                ITy                    // chunk
2538   };
2539   auto *FnTy =
2540       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2541   return CGM.CreateRuntimeFunction(FnTy, Name);
2542 }
2543 
2544 llvm::FunctionCallee
2545 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2546   assert((IVSize == 32 || IVSize == 64) &&
2547          "IV size is not compatible with the omp runtime");
2548   StringRef Name =
2549       IVSize == 32
2550           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2551           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2552   llvm::Type *TypeParams[] = {
2553       getIdentTyPointerTy(), // loc
2554       CGM.Int32Ty,           // tid
2555   };
2556   auto *FnTy =
2557       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2558   return CGM.CreateRuntimeFunction(FnTy, Name);
2559 }
2560 
2561 llvm::FunctionCallee
2562 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2563   assert((IVSize == 32 || IVSize == 64) &&
2564          "IV size is not compatible with the omp runtime");
2565   StringRef Name =
2566       IVSize == 32
2567           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2568           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2569   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2570   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2571   llvm::Type *TypeParams[] = {
2572     getIdentTyPointerTy(),                     // loc
2573     CGM.Int32Ty,                               // tid
2574     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2575     PtrTy,                                     // p_lower
2576     PtrTy,                                     // p_upper
2577     PtrTy                                      // p_stride
2578   };
2579   auto *FnTy =
2580       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2581   return CGM.CreateRuntimeFunction(FnTy, Name);
2582 }
2583 
2584 /// Obtain information that uniquely identifies a target entry. This
2585 /// consists of the file and device IDs as well as line number associated with
2586 /// the relevant entry source location.
2587 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2588                                      unsigned &DeviceID, unsigned &FileID,
2589                                      unsigned &LineNum) {
2590   SourceManager &SM = C.getSourceManager();
2591 
2592   // The loc should be always valid and have a file ID (the user cannot use
2593   // #pragma directives in macros)
2594 
2595   assert(Loc.isValid() && "Source location is expected to be always valid.");
2596 
2597   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2598   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2599 
2600   llvm::sys::fs::UniqueID ID;
2601   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2602     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2603         << PLoc.getFilename() << EC.message();
2604 
2605   DeviceID = ID.getDevice();
2606   FileID = ID.getFile();
2607   LineNum = PLoc.getLine();
2608 }
2609 
2610 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2611   if (CGM.getLangOpts().OpenMPSimd)
2612     return Address::invalid();
2613   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2614       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2615   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2616               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2617                HasRequiresUnifiedSharedMemory))) {
2618     SmallString<64> PtrName;
2619     {
2620       llvm::raw_svector_ostream OS(PtrName);
2621       OS << CGM.getMangledName(GlobalDecl(VD));
2622       if (!VD->isExternallyVisible()) {
2623         unsigned DeviceID, FileID, Line;
2624         getTargetEntryUniqueInfo(CGM.getContext(),
2625                                  VD->getCanonicalDecl()->getBeginLoc(),
2626                                  DeviceID, FileID, Line);
2627         OS << llvm::format("_%x", FileID);
2628       }
2629       OS << "_decl_tgt_ref_ptr";
2630     }
2631     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2632     if (!Ptr) {
2633       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2634       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2635                                         PtrName);
2636 
2637       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2638       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2639 
2640       if (!CGM.getLangOpts().OpenMPIsDevice)
2641         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2642       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2643     }
2644     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2645   }
2646   return Address::invalid();
2647 }
2648 
2649 llvm::Constant *
2650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2651   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2652          !CGM.getContext().getTargetInfo().isTLSSupported());
2653   // Lookup the entry, lazily creating it if necessary.
2654   std::string Suffix = getName({"cache", ""});
2655   return getOrCreateInternalVariable(
2656       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2657 }
2658 
2659 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2660                                                 const VarDecl *VD,
2661                                                 Address VDAddr,
2662                                                 SourceLocation Loc) {
2663   if (CGM.getLangOpts().OpenMPUseTLS &&
2664       CGM.getContext().getTargetInfo().isTLSSupported())
2665     return VDAddr;
2666 
2667   llvm::Type *VarTy = VDAddr.getElementType();
2668   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2669                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2670                                                        CGM.Int8PtrTy),
2671                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2672                          getOrCreateThreadPrivateCache(VD)};
2673   return Address(CGF.EmitRuntimeCall(
2674       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2675                  VDAddr.getAlignment());
2676 }
2677 
2678 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2679     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2680     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2681   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2682   // library.
2683   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2684   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2685                       OMPLoc);
2686   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2687   // to register constructor/destructor for variable.
2688   llvm::Value *Args[] = {
2689       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2690       Ctor, CopyCtor, Dtor};
2691   CGF.EmitRuntimeCall(
2692       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2693 }
2694 
2695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2696     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2697     bool PerformInit, CodeGenFunction *CGF) {
2698   if (CGM.getLangOpts().OpenMPUseTLS &&
2699       CGM.getContext().getTargetInfo().isTLSSupported())
2700     return nullptr;
2701 
2702   VD = VD->getDefinition(CGM.getContext());
2703   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2704     QualType ASTTy = VD->getType();
2705 
2706     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2707     const Expr *Init = VD->getAnyInitializer();
2708     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2709       // Generate function that re-emits the declaration's initializer into the
2710       // threadprivate copy of the variable VD
2711       CodeGenFunction CtorCGF(CGM);
2712       FunctionArgList Args;
2713       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2714                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2715                             ImplicitParamDecl::Other);
2716       Args.push_back(&Dst);
2717 
2718       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2719           CGM.getContext().VoidPtrTy, Args);
2720       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2721       std::string Name = getName({"__kmpc_global_ctor_", ""});
2722       llvm::Function *Fn =
2723           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2724       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2725                             Args, Loc, Loc);
2726       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2727           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2728           CGM.getContext().VoidPtrTy, Dst.getLocation());
2729       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2730       Arg = CtorCGF.Builder.CreateElementBitCast(
2731           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2732       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2733                                /*IsInitializer=*/true);
2734       ArgVal = CtorCGF.EmitLoadOfScalar(
2735           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2736           CGM.getContext().VoidPtrTy, Dst.getLocation());
2737       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2738       CtorCGF.FinishFunction();
2739       Ctor = Fn;
2740     }
2741     if (VD->getType().isDestructedType() != QualType::DK_none) {
2742       // Generate function that emits destructor call for the threadprivate copy
2743       // of the variable VD
2744       CodeGenFunction DtorCGF(CGM);
2745       FunctionArgList Args;
2746       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2747                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2748                             ImplicitParamDecl::Other);
2749       Args.push_back(&Dst);
2750 
2751       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2752           CGM.getContext().VoidTy, Args);
2753       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2754       std::string Name = getName({"__kmpc_global_dtor_", ""});
2755       llvm::Function *Fn =
2756           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2757       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2758       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2759                             Loc, Loc);
2760       // Create a scope with an artificial location for the body of this function.
2761       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2762       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2763           DtorCGF.GetAddrOfLocalVar(&Dst),
2764           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2765       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2766                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2767                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2768       DtorCGF.FinishFunction();
2769       Dtor = Fn;
2770     }
2771     // Do not emit init function if it is not required.
2772     if (!Ctor && !Dtor)
2773       return nullptr;
2774 
2775     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2776     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2777                                                /*isVarArg=*/false)
2778                            ->getPointerTo();
2779     // Copying constructor for the threadprivate variable.
2780     // Must be NULL - reserved by runtime, but currently it requires that this
2781     // parameter is always NULL. Otherwise it fires assertion.
2782     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2783     if (Ctor == nullptr) {
2784       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2785                                              /*isVarArg=*/false)
2786                          ->getPointerTo();
2787       Ctor = llvm::Constant::getNullValue(CtorTy);
2788     }
2789     if (Dtor == nullptr) {
2790       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2791                                              /*isVarArg=*/false)
2792                          ->getPointerTo();
2793       Dtor = llvm::Constant::getNullValue(DtorTy);
2794     }
2795     if (!CGF) {
2796       auto *InitFunctionTy =
2797           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2798       std::string Name = getName({"__omp_threadprivate_init_", ""});
2799       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2800           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2801       CodeGenFunction InitCGF(CGM);
2802       FunctionArgList ArgList;
2803       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2804                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2805                             Loc, Loc);
2806       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2807       InitCGF.FinishFunction();
2808       return InitFunction;
2809     }
2810     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2811   }
2812   return nullptr;
2813 }
2814 
2815 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2816                                                      llvm::GlobalVariable *Addr,
2817                                                      bool PerformInit) {
2818   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2819       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2820   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2821       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2822        HasRequiresUnifiedSharedMemory))
2823     return CGM.getLangOpts().OpenMPIsDevice;
2824   VD = VD->getDefinition(CGM.getContext());
2825   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2826     return CGM.getLangOpts().OpenMPIsDevice;
2827 
2828   QualType ASTTy = VD->getType();
2829 
2830   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2831   // Produce the unique prefix to identify the new target regions. We use
2832   // the source location of the variable declaration which we know to not
2833   // conflict with any target region.
2834   unsigned DeviceID;
2835   unsigned FileID;
2836   unsigned Line;
2837   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2838   SmallString<128> Buffer, Out;
2839   {
2840     llvm::raw_svector_ostream OS(Buffer);
2841     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2842        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2843   }
2844 
2845   const Expr *Init = VD->getAnyInitializer();
2846   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2847     llvm::Constant *Ctor;
2848     llvm::Constant *ID;
2849     if (CGM.getLangOpts().OpenMPIsDevice) {
2850       // Generate function that re-emits the declaration's initializer into
2851       // the threadprivate copy of the variable VD
2852       CodeGenFunction CtorCGF(CGM);
2853 
2854       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2855       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2856       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2857           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2858       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2859       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2860                             FunctionArgList(), Loc, Loc);
2861       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2862       CtorCGF.EmitAnyExprToMem(Init,
2863                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2864                                Init->getType().getQualifiers(),
2865                                /*IsInitializer=*/true);
2866       CtorCGF.FinishFunction();
2867       Ctor = Fn;
2868       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2869       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2870     } else {
2871       Ctor = new llvm::GlobalVariable(
2872           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2873           llvm::GlobalValue::PrivateLinkage,
2874           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2875       ID = Ctor;
2876     }
2877 
2878     // Register the information for the entry associated with the constructor.
2879     Out.clear();
2880     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2881         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2882         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2883   }
2884   if (VD->getType().isDestructedType() != QualType::DK_none) {
2885     llvm::Constant *Dtor;
2886     llvm::Constant *ID;
2887     if (CGM.getLangOpts().OpenMPIsDevice) {
2888       // Generate function that emits destructor call for the threadprivate
2889       // copy of the variable VD
2890       CodeGenFunction DtorCGF(CGM);
2891 
2892       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2893       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2894       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2895           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2896       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2897       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2898                             FunctionArgList(), Loc, Loc);
2899       // Create a scope with an artificial location for the body of this
2900       // function.
2901       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2902       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2903                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2904                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2905       DtorCGF.FinishFunction();
2906       Dtor = Fn;
2907       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2908       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2909     } else {
2910       Dtor = new llvm::GlobalVariable(
2911           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2912           llvm::GlobalValue::PrivateLinkage,
2913           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2914       ID = Dtor;
2915     }
2916     // Register the information for the entry associated with the destructor.
2917     Out.clear();
2918     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2919         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2920         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2921   }
2922   return CGM.getLangOpts().OpenMPIsDevice;
2923 }
2924 
2925 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2926                                                           QualType VarType,
2927                                                           StringRef Name) {
2928   std::string Suffix = getName({"artificial", ""});
2929   std::string CacheSuffix = getName({"cache", ""});
2930   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2931   llvm::Value *GAddr =
2932       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2933   llvm::Value *Args[] = {
2934       emitUpdateLocation(CGF, SourceLocation()),
2935       getThreadID(CGF, SourceLocation()),
2936       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2937       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2938                                 /*isSigned=*/false),
2939       getOrCreateInternalVariable(
2940           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2941   return Address(
2942       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2943           CGF.EmitRuntimeCall(
2944               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2945           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2946       CGM.getPointerAlign());
2947 }
2948 
2949 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2950                                       const RegionCodeGenTy &ThenGen,
2951                                       const RegionCodeGenTy &ElseGen) {
2952   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2953 
2954   // If the condition constant folds and can be elided, try to avoid emitting
2955   // the condition and the dead arm of the if/else.
2956   bool CondConstant;
2957   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2958     if (CondConstant)
2959       ThenGen(CGF);
2960     else
2961       ElseGen(CGF);
2962     return;
2963   }
2964 
2965   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2966   // emit the conditional branch.
2967   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2968   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2969   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2970   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2971 
2972   // Emit the 'then' code.
2973   CGF.EmitBlock(ThenBlock);
2974   ThenGen(CGF);
2975   CGF.EmitBranch(ContBlock);
2976   // Emit the 'else' code if present.
2977   // There is no need to emit line number for unconditional branch.
2978   (void)ApplyDebugLocation::CreateEmpty(CGF);
2979   CGF.EmitBlock(ElseBlock);
2980   ElseGen(CGF);
2981   // There is no need to emit line number for unconditional branch.
2982   (void)ApplyDebugLocation::CreateEmpty(CGF);
2983   CGF.EmitBranch(ContBlock);
2984   // Emit the continuation block for code after the if.
2985   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2986 }
2987 
2988 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2989                                        llvm::Function *OutlinedFn,
2990                                        ArrayRef<llvm::Value *> CapturedVars,
2991                                        const Expr *IfCond) {
2992   if (!CGF.HaveInsertPoint())
2993     return;
2994   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2995   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2996                                                      PrePostActionTy &) {
2997     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2998     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2999     llvm::Value *Args[] = {
3000         RTLoc,
3001         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3002         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3003     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3004     RealArgs.append(std::begin(Args), std::end(Args));
3005     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3006 
3007     llvm::FunctionCallee RTLFn =
3008         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3009     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3010   };
3011   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3012                                                           PrePostActionTy &) {
3013     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3014     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3015     // Build calls:
3016     // __kmpc_serialized_parallel(&Loc, GTid);
3017     llvm::Value *Args[] = {RTLoc, ThreadID};
3018     CGF.EmitRuntimeCall(
3019         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3020 
3021     // OutlinedFn(&GTid, &zero, CapturedStruct);
3022     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3023                                                         /*Name*/ ".zero.addr");
3024     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
3025     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3026     // ThreadId for serialized parallels is 0.
3027     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3028     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3029     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3030     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3031 
3032     // __kmpc_end_serialized_parallel(&Loc, GTid);
3033     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3034     CGF.EmitRuntimeCall(
3035         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3036         EndArgs);
3037   };
3038   if (IfCond) {
3039     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3040   } else {
3041     RegionCodeGenTy ThenRCG(ThenGen);
3042     ThenRCG(CGF);
3043   }
3044 }
3045 
3046 // If we're inside an (outlined) parallel region, use the region info's
3047 // thread-ID variable (it is passed in a first argument of the outlined function
3048 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3049 // regular serial code region, get thread ID by calling kmp_int32
3050 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3051 // return the address of that temp.
3052 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3053                                              SourceLocation Loc) {
3054   if (auto *OMPRegionInfo =
3055           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3056     if (OMPRegionInfo->getThreadIDVariable())
3057       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3058 
3059   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3060   QualType Int32Ty =
3061       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3062   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3063   CGF.EmitStoreOfScalar(ThreadID,
3064                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3065 
3066   return ThreadIDTemp;
3067 }
3068 
3069 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3070     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3071   SmallString<256> Buffer;
3072   llvm::raw_svector_ostream Out(Buffer);
3073   Out << Name;
3074   StringRef RuntimeName = Out.str();
3075   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3076   if (Elem.second) {
3077     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3078            "OMP internal variable has different type than requested");
3079     return &*Elem.second;
3080   }
3081 
3082   return Elem.second = new llvm::GlobalVariable(
3083              CGM.getModule(), Ty, /*IsConstant*/ false,
3084              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3085              Elem.first(), /*InsertBefore=*/nullptr,
3086              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3087 }
3088 
3089 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3090   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3091   std::string Name = getName({Prefix, "var"});
3092   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3093 }
3094 
3095 namespace {
3096 /// Common pre(post)-action for different OpenMP constructs.
3097 class CommonActionTy final : public PrePostActionTy {
3098   llvm::FunctionCallee EnterCallee;
3099   ArrayRef<llvm::Value *> EnterArgs;
3100   llvm::FunctionCallee ExitCallee;
3101   ArrayRef<llvm::Value *> ExitArgs;
3102   bool Conditional;
3103   llvm::BasicBlock *ContBlock = nullptr;
3104 
3105 public:
3106   CommonActionTy(llvm::FunctionCallee EnterCallee,
3107                  ArrayRef<llvm::Value *> EnterArgs,
3108                  llvm::FunctionCallee ExitCallee,
3109                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3110       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3111         ExitArgs(ExitArgs), Conditional(Conditional) {}
3112   void Enter(CodeGenFunction &CGF) override {
3113     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3114     if (Conditional) {
3115       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3116       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3117       ContBlock = CGF.createBasicBlock("omp_if.end");
3118       // Generate the branch (If-stmt)
3119       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3120       CGF.EmitBlock(ThenBlock);
3121     }
3122   }
3123   void Done(CodeGenFunction &CGF) {
3124     // Emit the rest of blocks/branches
3125     CGF.EmitBranch(ContBlock);
3126     CGF.EmitBlock(ContBlock, true);
3127   }
3128   void Exit(CodeGenFunction &CGF) override {
3129     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3130   }
3131 };
3132 } // anonymous namespace
3133 
3134 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3135                                          StringRef CriticalName,
3136                                          const RegionCodeGenTy &CriticalOpGen,
3137                                          SourceLocation Loc, const Expr *Hint) {
3138   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3139   // CriticalOpGen();
3140   // __kmpc_end_critical(ident_t *, gtid, Lock);
3141   // Prepare arguments and build a call to __kmpc_critical
3142   if (!CGF.HaveInsertPoint())
3143     return;
3144   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3145                          getCriticalRegionLock(CriticalName)};
3146   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3147                                                 std::end(Args));
3148   if (Hint) {
3149     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3150         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3151   }
3152   CommonActionTy Action(
3153       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3154                                  : OMPRTL__kmpc_critical),
3155       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3156   CriticalOpGen.setAction(Action);
3157   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3158 }
3159 
3160 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3161                                        const RegionCodeGenTy &MasterOpGen,
3162                                        SourceLocation Loc) {
3163   if (!CGF.HaveInsertPoint())
3164     return;
3165   // if(__kmpc_master(ident_t *, gtid)) {
3166   //   MasterOpGen();
3167   //   __kmpc_end_master(ident_t *, gtid);
3168   // }
3169   // Prepare arguments and build a call to __kmpc_master
3170   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3171   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3172                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3173                         /*Conditional=*/true);
3174   MasterOpGen.setAction(Action);
3175   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3176   Action.Done(CGF);
3177 }
3178 
3179 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3180                                         SourceLocation Loc) {
3181   if (!CGF.HaveInsertPoint())
3182     return;
3183   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3184   llvm::Value *Args[] = {
3185       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3186       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3187   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3188   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3189     Region->emitUntiedSwitch(CGF);
3190 }
3191 
3192 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3193                                           const RegionCodeGenTy &TaskgroupOpGen,
3194                                           SourceLocation Loc) {
3195   if (!CGF.HaveInsertPoint())
3196     return;
3197   // __kmpc_taskgroup(ident_t *, gtid);
3198   // TaskgroupOpGen();
3199   // __kmpc_end_taskgroup(ident_t *, gtid);
3200   // Prepare arguments and build a call to __kmpc_taskgroup
3201   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3202   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3203                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3204                         Args);
3205   TaskgroupOpGen.setAction(Action);
3206   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3207 }
3208 
3209 /// Given an array of pointers to variables, project the address of a
3210 /// given variable.
3211 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3212                                       unsigned Index, const VarDecl *Var) {
3213   // Pull out the pointer to the variable.
3214   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3215   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3216 
3217   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3218   Addr = CGF.Builder.CreateElementBitCast(
3219       Addr, CGF.ConvertTypeForMem(Var->getType()));
3220   return Addr;
3221 }
3222 
3223 static llvm::Value *emitCopyprivateCopyFunction(
3224     CodeGenModule &CGM, llvm::Type *ArgsType,
3225     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3226     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3227     SourceLocation Loc) {
3228   ASTContext &C = CGM.getContext();
3229   // void copy_func(void *LHSArg, void *RHSArg);
3230   FunctionArgList Args;
3231   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3232                            ImplicitParamDecl::Other);
3233   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3234                            ImplicitParamDecl::Other);
3235   Args.push_back(&LHSArg);
3236   Args.push_back(&RHSArg);
3237   const auto &CGFI =
3238       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3239   std::string Name =
3240       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3241   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3242                                     llvm::GlobalValue::InternalLinkage, Name,
3243                                     &CGM.getModule());
3244   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3245   Fn->setDoesNotRecurse();
3246   CodeGenFunction CGF(CGM);
3247   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3248   // Dest = (void*[n])(LHSArg);
3249   // Src = (void*[n])(RHSArg);
3250   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3251       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3252       ArgsType), CGF.getPointerAlign());
3253   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3254       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3255       ArgsType), CGF.getPointerAlign());
3256   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3257   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3258   // ...
3259   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3260   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3261     const auto *DestVar =
3262         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3263     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3264 
3265     const auto *SrcVar =
3266         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3267     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3268 
3269     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3270     QualType Type = VD->getType();
3271     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3272   }
3273   CGF.FinishFunction();
3274   return Fn;
3275 }
3276 
3277 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3278                                        const RegionCodeGenTy &SingleOpGen,
3279                                        SourceLocation Loc,
3280                                        ArrayRef<const Expr *> CopyprivateVars,
3281                                        ArrayRef<const Expr *> SrcExprs,
3282                                        ArrayRef<const Expr *> DstExprs,
3283                                        ArrayRef<const Expr *> AssignmentOps) {
3284   if (!CGF.HaveInsertPoint())
3285     return;
3286   assert(CopyprivateVars.size() == SrcExprs.size() &&
3287          CopyprivateVars.size() == DstExprs.size() &&
3288          CopyprivateVars.size() == AssignmentOps.size());
3289   ASTContext &C = CGM.getContext();
3290   // int32 did_it = 0;
3291   // if(__kmpc_single(ident_t *, gtid)) {
3292   //   SingleOpGen();
3293   //   __kmpc_end_single(ident_t *, gtid);
3294   //   did_it = 1;
3295   // }
3296   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3297   // <copy_func>, did_it);
3298 
3299   Address DidIt = Address::invalid();
3300   if (!CopyprivateVars.empty()) {
3301     // int32 did_it = 0;
3302     QualType KmpInt32Ty =
3303         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3304     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3305     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3306   }
3307   // Prepare arguments and build a call to __kmpc_single
3308   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3309   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3310                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3311                         /*Conditional=*/true);
3312   SingleOpGen.setAction(Action);
3313   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3314   if (DidIt.isValid()) {
3315     // did_it = 1;
3316     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3317   }
3318   Action.Done(CGF);
3319   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3320   // <copy_func>, did_it);
3321   if (DidIt.isValid()) {
3322     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3323     QualType CopyprivateArrayTy =
3324         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3325                                /*IndexTypeQuals=*/0);
3326     // Create a list of all private variables for copyprivate.
3327     Address CopyprivateList =
3328         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3329     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3330       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3331       CGF.Builder.CreateStore(
3332           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3333               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3334           Elem);
3335     }
3336     // Build function that copies private values from single region to all other
3337     // threads in the corresponding parallel region.
3338     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3339         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3340         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3341     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3342     Address CL =
3343       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3344                                                       CGF.VoidPtrTy);
3345     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3346     llvm::Value *Args[] = {
3347         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3348         getThreadID(CGF, Loc),        // i32 <gtid>
3349         BufSize,                      // size_t <buf_size>
3350         CL.getPointer(),              // void *<copyprivate list>
3351         CpyFn,                        // void (*) (void *, void *) <copy_func>
3352         DidItVal                      // i32 did_it
3353     };
3354     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3355   }
3356 }
3357 
3358 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3359                                         const RegionCodeGenTy &OrderedOpGen,
3360                                         SourceLocation Loc, bool IsThreads) {
3361   if (!CGF.HaveInsertPoint())
3362     return;
3363   // __kmpc_ordered(ident_t *, gtid);
3364   // OrderedOpGen();
3365   // __kmpc_end_ordered(ident_t *, gtid);
3366   // Prepare arguments and build a call to __kmpc_ordered
3367   if (IsThreads) {
3368     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3369     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3370                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3371                           Args);
3372     OrderedOpGen.setAction(Action);
3373     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3374     return;
3375   }
3376   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3377 }
3378 
3379 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3380   unsigned Flags;
3381   if (Kind == OMPD_for)
3382     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3383   else if (Kind == OMPD_sections)
3384     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3385   else if (Kind == OMPD_single)
3386     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3387   else if (Kind == OMPD_barrier)
3388     Flags = OMP_IDENT_BARRIER_EXPL;
3389   else
3390     Flags = OMP_IDENT_BARRIER_IMPL;
3391   return Flags;
3392 }
3393 
3394 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3395     CodeGenFunction &CGF, const OMPLoopDirective &S,
3396     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3397   // Check if the loop directive is actually a doacross loop directive. In this
3398   // case choose static, 1 schedule.
3399   if (llvm::any_of(
3400           S.getClausesOfKind<OMPOrderedClause>(),
3401           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3402     ScheduleKind = OMPC_SCHEDULE_static;
3403     // Chunk size is 1 in this case.
3404     llvm::APInt ChunkSize(32, 1);
3405     ChunkExpr = IntegerLiteral::Create(
3406         CGF.getContext(), ChunkSize,
3407         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3408         SourceLocation());
3409   }
3410 }
3411 
3412 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3413                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3414                                       bool ForceSimpleCall) {
3415   if (!CGF.HaveInsertPoint())
3416     return;
3417   // Build call __kmpc_cancel_barrier(loc, thread_id);
3418   // Build call __kmpc_barrier(loc, thread_id);
3419   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3420   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3421   // thread_id);
3422   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3423                          getThreadID(CGF, Loc)};
3424   if (auto *OMPRegionInfo =
3425           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3426     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3427       llvm::Value *Result = CGF.EmitRuntimeCall(
3428           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3429       if (EmitChecks) {
3430         // if (__kmpc_cancel_barrier()) {
3431         //   exit from construct;
3432         // }
3433         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3434         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3435         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3436         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3437         CGF.EmitBlock(ExitBB);
3438         //   exit from construct;
3439         CodeGenFunction::JumpDest CancelDestination =
3440             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3441         CGF.EmitBranchThroughCleanup(CancelDestination);
3442         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3443       }
3444       return;
3445     }
3446   }
3447   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3448 }
3449 
3450 /// Map the OpenMP loop schedule to the runtime enumeration.
3451 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3452                                           bool Chunked, bool Ordered) {
3453   switch (ScheduleKind) {
3454   case OMPC_SCHEDULE_static:
3455     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3456                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3457   case OMPC_SCHEDULE_dynamic:
3458     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3459   case OMPC_SCHEDULE_guided:
3460     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3461   case OMPC_SCHEDULE_runtime:
3462     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3463   case OMPC_SCHEDULE_auto:
3464     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3465   case OMPC_SCHEDULE_unknown:
3466     assert(!Chunked && "chunk was specified but schedule kind not known");
3467     return Ordered ? OMP_ord_static : OMP_sch_static;
3468   }
3469   llvm_unreachable("Unexpected runtime schedule");
3470 }
3471 
3472 /// Map the OpenMP distribute schedule to the runtime enumeration.
3473 static OpenMPSchedType
3474 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3475   // only static is allowed for dist_schedule
3476   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3477 }
3478 
3479 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3480                                          bool Chunked) const {
3481   OpenMPSchedType Schedule =
3482       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3483   return Schedule == OMP_sch_static;
3484 }
3485 
3486 bool CGOpenMPRuntime::isStaticNonchunked(
3487     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3488   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3489   return Schedule == OMP_dist_sch_static;
3490 }
3491 
3492 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3493                                       bool Chunked) const {
3494   OpenMPSchedType Schedule =
3495       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3496   return Schedule == OMP_sch_static_chunked;
3497 }
3498 
3499 bool CGOpenMPRuntime::isStaticChunked(
3500     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3501   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3502   return Schedule == OMP_dist_sch_static_chunked;
3503 }
3504 
3505 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3506   OpenMPSchedType Schedule =
3507       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3508   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3509   return Schedule != OMP_sch_static;
3510 }
3511 
3512 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3513                                   OpenMPScheduleClauseModifier M1,
3514                                   OpenMPScheduleClauseModifier M2) {
3515   int Modifier = 0;
3516   switch (M1) {
3517   case OMPC_SCHEDULE_MODIFIER_monotonic:
3518     Modifier = OMP_sch_modifier_monotonic;
3519     break;
3520   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3521     Modifier = OMP_sch_modifier_nonmonotonic;
3522     break;
3523   case OMPC_SCHEDULE_MODIFIER_simd:
3524     if (Schedule == OMP_sch_static_chunked)
3525       Schedule = OMP_sch_static_balanced_chunked;
3526     break;
3527   case OMPC_SCHEDULE_MODIFIER_last:
3528   case OMPC_SCHEDULE_MODIFIER_unknown:
3529     break;
3530   }
3531   switch (M2) {
3532   case OMPC_SCHEDULE_MODIFIER_monotonic:
3533     Modifier = OMP_sch_modifier_monotonic;
3534     break;
3535   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3536     Modifier = OMP_sch_modifier_nonmonotonic;
3537     break;
3538   case OMPC_SCHEDULE_MODIFIER_simd:
3539     if (Schedule == OMP_sch_static_chunked)
3540       Schedule = OMP_sch_static_balanced_chunked;
3541     break;
3542   case OMPC_SCHEDULE_MODIFIER_last:
3543   case OMPC_SCHEDULE_MODIFIER_unknown:
3544     break;
3545   }
3546   return Schedule | Modifier;
3547 }
3548 
3549 void CGOpenMPRuntime::emitForDispatchInit(
3550     CodeGenFunction &CGF, SourceLocation Loc,
3551     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3552     bool Ordered, const DispatchRTInput &DispatchValues) {
3553   if (!CGF.HaveInsertPoint())
3554     return;
3555   OpenMPSchedType Schedule = getRuntimeSchedule(
3556       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3557   assert(Ordered ||
3558          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3559           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3560           Schedule != OMP_sch_static_balanced_chunked));
3561   // Call __kmpc_dispatch_init(
3562   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3563   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3564   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3565 
3566   // If the Chunk was not specified in the clause - use default value 1.
3567   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3568                                             : CGF.Builder.getIntN(IVSize, 1);
3569   llvm::Value *Args[] = {
3570       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3571       CGF.Builder.getInt32(addMonoNonMonoModifier(
3572           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3573       DispatchValues.LB,                                // Lower
3574       DispatchValues.UB,                                // Upper
3575       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3576       Chunk                                             // Chunk
3577   };
3578   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3579 }
3580 
3581 static void emitForStaticInitCall(
3582     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3583     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3584     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3585     const CGOpenMPRuntime::StaticRTInput &Values) {
3586   if (!CGF.HaveInsertPoint())
3587     return;
3588 
3589   assert(!Values.Ordered);
3590   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3591          Schedule == OMP_sch_static_balanced_chunked ||
3592          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3593          Schedule == OMP_dist_sch_static ||
3594          Schedule == OMP_dist_sch_static_chunked);
3595 
3596   // Call __kmpc_for_static_init(
3597   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3598   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3599   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3600   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3601   llvm::Value *Chunk = Values.Chunk;
3602   if (Chunk == nullptr) {
3603     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3604             Schedule == OMP_dist_sch_static) &&
3605            "expected static non-chunked schedule");
3606     // If the Chunk was not specified in the clause - use default value 1.
3607     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3608   } else {
3609     assert((Schedule == OMP_sch_static_chunked ||
3610             Schedule == OMP_sch_static_balanced_chunked ||
3611             Schedule == OMP_ord_static_chunked ||
3612             Schedule == OMP_dist_sch_static_chunked) &&
3613            "expected static chunked schedule");
3614   }
3615   llvm::Value *Args[] = {
3616       UpdateLocation,
3617       ThreadId,
3618       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3619                                                   M2)), // Schedule type
3620       Values.IL.getPointer(),                           // &isLastIter
3621       Values.LB.getPointer(),                           // &LB
3622       Values.UB.getPointer(),                           // &UB
3623       Values.ST.getPointer(),                           // &Stride
3624       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3625       Chunk                                             // Chunk
3626   };
3627   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3628 }
3629 
3630 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3631                                         SourceLocation Loc,
3632                                         OpenMPDirectiveKind DKind,
3633                                         const OpenMPScheduleTy &ScheduleKind,
3634                                         const StaticRTInput &Values) {
3635   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3636       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3637   assert(isOpenMPWorksharingDirective(DKind) &&
3638          "Expected loop-based or sections-based directive.");
3639   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3640                                              isOpenMPLoopDirective(DKind)
3641                                                  ? OMP_IDENT_WORK_LOOP
3642                                                  : OMP_IDENT_WORK_SECTIONS);
3643   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3644   llvm::FunctionCallee StaticInitFunction =
3645       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3646   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3647                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3648 }
3649 
3650 void CGOpenMPRuntime::emitDistributeStaticInit(
3651     CodeGenFunction &CGF, SourceLocation Loc,
3652     OpenMPDistScheduleClauseKind SchedKind,
3653     const CGOpenMPRuntime::StaticRTInput &Values) {
3654   OpenMPSchedType ScheduleNum =
3655       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3656   llvm::Value *UpdatedLocation =
3657       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3658   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3659   llvm::FunctionCallee StaticInitFunction =
3660       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3661   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3662                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3663                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3664 }
3665 
3666 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3667                                           SourceLocation Loc,
3668                                           OpenMPDirectiveKind DKind) {
3669   if (!CGF.HaveInsertPoint())
3670     return;
3671   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3672   llvm::Value *Args[] = {
3673       emitUpdateLocation(CGF, Loc,
3674                          isOpenMPDistributeDirective(DKind)
3675                              ? OMP_IDENT_WORK_DISTRIBUTE
3676                              : isOpenMPLoopDirective(DKind)
3677                                    ? OMP_IDENT_WORK_LOOP
3678                                    : OMP_IDENT_WORK_SECTIONS),
3679       getThreadID(CGF, Loc)};
3680   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3681                       Args);
3682 }
3683 
3684 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3685                                                  SourceLocation Loc,
3686                                                  unsigned IVSize,
3687                                                  bool IVSigned) {
3688   if (!CGF.HaveInsertPoint())
3689     return;
3690   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3691   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3692   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3693 }
3694 
3695 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3696                                           SourceLocation Loc, unsigned IVSize,
3697                                           bool IVSigned, Address IL,
3698                                           Address LB, Address UB,
3699                                           Address ST) {
3700   // Call __kmpc_dispatch_next(
3701   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3702   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3703   //          kmp_int[32|64] *p_stride);
3704   llvm::Value *Args[] = {
3705       emitUpdateLocation(CGF, Loc),
3706       getThreadID(CGF, Loc),
3707       IL.getPointer(), // &isLastIter
3708       LB.getPointer(), // &Lower
3709       UB.getPointer(), // &Upper
3710       ST.getPointer()  // &Stride
3711   };
3712   llvm::Value *Call =
3713       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3714   return CGF.EmitScalarConversion(
3715       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3716       CGF.getContext().BoolTy, Loc);
3717 }
3718 
3719 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3720                                            llvm::Value *NumThreads,
3721                                            SourceLocation Loc) {
3722   if (!CGF.HaveInsertPoint())
3723     return;
3724   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3725   llvm::Value *Args[] = {
3726       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3727       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3728   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3729                       Args);
3730 }
3731 
3732 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3733                                          OpenMPProcBindClauseKind ProcBind,
3734                                          SourceLocation Loc) {
3735   if (!CGF.HaveInsertPoint())
3736     return;
3737   // Constants for proc bind value accepted by the runtime.
3738   enum ProcBindTy {
3739     ProcBindFalse = 0,
3740     ProcBindTrue,
3741     ProcBindMaster,
3742     ProcBindClose,
3743     ProcBindSpread,
3744     ProcBindIntel,
3745     ProcBindDefault
3746   } RuntimeProcBind;
3747   switch (ProcBind) {
3748   case OMPC_PROC_BIND_master:
3749     RuntimeProcBind = ProcBindMaster;
3750     break;
3751   case OMPC_PROC_BIND_close:
3752     RuntimeProcBind = ProcBindClose;
3753     break;
3754   case OMPC_PROC_BIND_spread:
3755     RuntimeProcBind = ProcBindSpread;
3756     break;
3757   case OMPC_PROC_BIND_unknown:
3758     llvm_unreachable("Unsupported proc_bind value.");
3759   }
3760   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3761   llvm::Value *Args[] = {
3762       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3763       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3764   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3765 }
3766 
3767 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3768                                 SourceLocation Loc) {
3769   if (!CGF.HaveInsertPoint())
3770     return;
3771   // Build call void __kmpc_flush(ident_t *loc)
3772   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3773                       emitUpdateLocation(CGF, Loc));
3774 }
3775 
3776 namespace {
3777 /// Indexes of fields for type kmp_task_t.
3778 enum KmpTaskTFields {
3779   /// List of shared variables.
3780   KmpTaskTShareds,
3781   /// Task routine.
3782   KmpTaskTRoutine,
3783   /// Partition id for the untied tasks.
3784   KmpTaskTPartId,
3785   /// Function with call of destructors for private variables.
3786   Data1,
3787   /// Task priority.
3788   Data2,
3789   /// (Taskloops only) Lower bound.
3790   KmpTaskTLowerBound,
3791   /// (Taskloops only) Upper bound.
3792   KmpTaskTUpperBound,
3793   /// (Taskloops only) Stride.
3794   KmpTaskTStride,
3795   /// (Taskloops only) Is last iteration flag.
3796   KmpTaskTLastIter,
3797   /// (Taskloops only) Reduction data.
3798   KmpTaskTReductions,
3799 };
3800 } // anonymous namespace
3801 
3802 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3803   return OffloadEntriesTargetRegion.empty() &&
3804          OffloadEntriesDeviceGlobalVar.empty();
3805 }
3806 
3807 /// Initialize target region entry.
3808 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3809     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3810                                     StringRef ParentName, unsigned LineNum,
3811                                     unsigned Order) {
3812   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3813                                              "only required for the device "
3814                                              "code generation.");
3815   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3816       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3817                                    OMPTargetRegionEntryTargetRegion);
3818   ++OffloadingEntriesNum;
3819 }
3820 
3821 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3822     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3823                                   StringRef ParentName, unsigned LineNum,
3824                                   llvm::Constant *Addr, llvm::Constant *ID,
3825                                   OMPTargetRegionEntryKind Flags) {
3826   // If we are emitting code for a target, the entry is already initialized,
3827   // only has to be registered.
3828   if (CGM.getLangOpts().OpenMPIsDevice) {
3829     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3830       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3831           DiagnosticsEngine::Error,
3832           "Unable to find target region on line '%0' in the device code.");
3833       CGM.getDiags().Report(DiagID) << LineNum;
3834       return;
3835     }
3836     auto &Entry =
3837         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3838     assert(Entry.isValid() && "Entry not initialized!");
3839     Entry.setAddress(Addr);
3840     Entry.setID(ID);
3841     Entry.setFlags(Flags);
3842   } else {
3843     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3844     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3845     ++OffloadingEntriesNum;
3846   }
3847 }
3848 
3849 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3850     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3851     unsigned LineNum) const {
3852   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3853   if (PerDevice == OffloadEntriesTargetRegion.end())
3854     return false;
3855   auto PerFile = PerDevice->second.find(FileID);
3856   if (PerFile == PerDevice->second.end())
3857     return false;
3858   auto PerParentName = PerFile->second.find(ParentName);
3859   if (PerParentName == PerFile->second.end())
3860     return false;
3861   auto PerLine = PerParentName->second.find(LineNum);
3862   if (PerLine == PerParentName->second.end())
3863     return false;
3864   // Fail if this entry is already registered.
3865   if (PerLine->second.getAddress() || PerLine->second.getID())
3866     return false;
3867   return true;
3868 }
3869 
3870 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3871     const OffloadTargetRegionEntryInfoActTy &Action) {
3872   // Scan all target region entries and perform the provided action.
3873   for (const auto &D : OffloadEntriesTargetRegion)
3874     for (const auto &F : D.second)
3875       for (const auto &P : F.second)
3876         for (const auto &L : P.second)
3877           Action(D.first, F.first, P.first(), L.first, L.second);
3878 }
3879 
3880 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3881     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3882                                        OMPTargetGlobalVarEntryKind Flags,
3883                                        unsigned Order) {
3884   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3885                                              "only required for the device "
3886                                              "code generation.");
3887   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3888   ++OffloadingEntriesNum;
3889 }
3890 
3891 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3892     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3893                                      CharUnits VarSize,
3894                                      OMPTargetGlobalVarEntryKind Flags,
3895                                      llvm::GlobalValue::LinkageTypes Linkage) {
3896   if (CGM.getLangOpts().OpenMPIsDevice) {
3897     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3898     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3899            "Entry not initialized!");
3900     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3901            "Resetting with the new address.");
3902     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3903       if (Entry.getVarSize().isZero()) {
3904         Entry.setVarSize(VarSize);
3905         Entry.setLinkage(Linkage);
3906       }
3907       return;
3908     }
3909     Entry.setVarSize(VarSize);
3910     Entry.setLinkage(Linkage);
3911     Entry.setAddress(Addr);
3912   } else {
3913     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3914       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3915       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3916              "Entry not initialized!");
3917       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3918              "Resetting with the new address.");
3919       if (Entry.getVarSize().isZero()) {
3920         Entry.setVarSize(VarSize);
3921         Entry.setLinkage(Linkage);
3922       }
3923       return;
3924     }
3925     OffloadEntriesDeviceGlobalVar.try_emplace(
3926         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3927     ++OffloadingEntriesNum;
3928   }
3929 }
3930 
3931 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3932     actOnDeviceGlobalVarEntriesInfo(
3933         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3934   // Scan all target region entries and perform the provided action.
3935   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3936     Action(E.getKey(), E.getValue());
3937 }
3938 
3939 llvm::Function *
3940 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3941   // If we don't have entries or if we are emitting code for the device, we
3942   // don't need to do anything.
3943   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3944     return nullptr;
3945 
3946   llvm::Module &M = CGM.getModule();
3947   ASTContext &C = CGM.getContext();
3948 
3949   // Get list of devices we care about
3950   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3951 
3952   // We should be creating an offloading descriptor only if there are devices
3953   // specified.
3954   assert(!Devices.empty() && "No OpenMP offloading devices??");
3955 
3956   // Create the external variables that will point to the begin and end of the
3957   // host entries section. These will be defined by the linker.
3958   llvm::Type *OffloadEntryTy =
3959       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3960   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3961   auto *HostEntriesBegin = new llvm::GlobalVariable(
3962       M, OffloadEntryTy, /*isConstant=*/true,
3963       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3964       EntriesBeginName);
3965   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3966   auto *HostEntriesEnd =
3967       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3968                                llvm::GlobalValue::ExternalLinkage,
3969                                /*Initializer=*/nullptr, EntriesEndName);
3970 
3971   // Create all device images
3972   auto *DeviceImageTy = cast<llvm::StructType>(
3973       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3974   ConstantInitBuilder DeviceImagesBuilder(CGM);
3975   ConstantArrayBuilder DeviceImagesEntries =
3976       DeviceImagesBuilder.beginArray(DeviceImageTy);
3977 
3978   for (const llvm::Triple &Device : Devices) {
3979     StringRef T = Device.getTriple();
3980     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3981     auto *ImgBegin = new llvm::GlobalVariable(
3982         M, CGM.Int8Ty, /*isConstant=*/true,
3983         llvm::GlobalValue::ExternalWeakLinkage,
3984         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3985     std::string EndName = getName({"omp_offloading", "img_end", ""});
3986     auto *ImgEnd = new llvm::GlobalVariable(
3987         M, CGM.Int8Ty, /*isConstant=*/true,
3988         llvm::GlobalValue::ExternalWeakLinkage,
3989         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3990 
3991     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3992                               HostEntriesEnd};
3993     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3994                                              DeviceImagesEntries);
3995   }
3996 
3997   // Create device images global array.
3998   std::string ImagesName = getName({"omp_offloading", "device_images"});
3999   llvm::GlobalVariable *DeviceImages =
4000       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
4001                                                 CGM.getPointerAlign(),
4002                                                 /*isConstant=*/true);
4003   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4004 
4005   // This is a Zero array to be used in the creation of the constant expressions
4006   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
4007                              llvm::Constant::getNullValue(CGM.Int32Ty)};
4008 
4009   // Create the target region descriptor.
4010   llvm::Constant *Data[] = {
4011       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
4012       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
4013                                            DeviceImages, Index),
4014       HostEntriesBegin, HostEntriesEnd};
4015   std::string Descriptor = getName({"omp_offloading", "descriptor"});
4016   llvm::GlobalVariable *Desc = createGlobalStruct(
4017       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
4018 
4019   // Emit code to register or unregister the descriptor at execution
4020   // startup or closing, respectively.
4021 
4022   llvm::Function *UnRegFn;
4023   {
4024     FunctionArgList Args;
4025     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4026     Args.push_back(&DummyPtr);
4027 
4028     CodeGenFunction CGF(CGM);
4029     // Disable debug info for global (de-)initializer because they are not part
4030     // of some particular construct.
4031     CGF.disableDebugInfo();
4032     const auto &FI =
4033         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4034     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4035     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
4036     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4037     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4038     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4039                         Desc);
4040     CGF.FinishFunction();
4041   }
4042   llvm::Function *RegFn;
4043   {
4044     CodeGenFunction CGF(CGM);
4045     // Disable debug info for global (de-)initializer because they are not part
4046     // of some particular construct.
4047     CGF.disableDebugInfo();
4048     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4049     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4050 
4051     // Encode offload target triples into the registration function name. It
4052     // will serve as a comdat key for the registration/unregistration code for
4053     // this particular combination of offloading targets.
4054     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4055     RegFnNameParts[0] = "omp_offloading";
4056     RegFnNameParts[1] = "descriptor_reg";
4057     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4058                     [](const llvm::Triple &T) -> const std::string& {
4059                       return T.getTriple();
4060                     });
4061     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4062     std::string Descriptor = getName(RegFnNameParts);
4063     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4064     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4065     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4066     // Create a variable to drive the registration and unregistration of the
4067     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4068     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4069                                   SourceLocation(), nullptr, C.CharTy,
4070                                   ImplicitParamDecl::Other);
4071     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4072     CGF.FinishFunction();
4073   }
4074   if (CGM.supportsCOMDAT()) {
4075     // It is sufficient to call registration function only once, so create a
4076     // COMDAT group for registration/unregistration functions and associated
4077     // data. That would reduce startup time and code size. Registration
4078     // function serves as a COMDAT group key.
4079     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4080     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4081     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4082     RegFn->setComdat(ComdatKey);
4083     UnRegFn->setComdat(ComdatKey);
4084     DeviceImages->setComdat(ComdatKey);
4085     Desc->setComdat(ComdatKey);
4086   }
4087   return RegFn;
4088 }
4089 
4090 void CGOpenMPRuntime::createOffloadEntry(
4091     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4092     llvm::GlobalValue::LinkageTypes Linkage) {
4093   StringRef Name = Addr->getName();
4094   llvm::Module &M = CGM.getModule();
4095   llvm::LLVMContext &C = M.getContext();
4096 
4097   // Create constant string with the name.
4098   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4099 
4100   std::string StringName = getName({"omp_offloading", "entry_name"});
4101   auto *Str = new llvm::GlobalVariable(
4102       M, StrPtrInit->getType(), /*isConstant=*/true,
4103       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4104   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4105 
4106   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4107                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4108                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4109                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4110                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4111   std::string EntryName = getName({"omp_offloading", "entry", ""});
4112   llvm::GlobalVariable *Entry = createGlobalStruct(
4113       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4114       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4115 
4116   // The entry has to be created in the section the linker expects it to be.
4117   std::string Section = getName({"omp_offloading", "entries"});
4118   Entry->setSection(Section);
4119 }
4120 
4121 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4122   // Emit the offloading entries and metadata so that the device codegen side
4123   // can easily figure out what to emit. The produced metadata looks like
4124   // this:
4125   //
4126   // !omp_offload.info = !{!1, ...}
4127   //
4128   // Right now we only generate metadata for function that contain target
4129   // regions.
4130 
4131   // If we do not have entries, we don't need to do anything.
4132   if (OffloadEntriesInfoManager.empty())
4133     return;
4134 
4135   llvm::Module &M = CGM.getModule();
4136   llvm::LLVMContext &C = M.getContext();
4137   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4138       OrderedEntries(OffloadEntriesInfoManager.size());
4139   llvm::SmallVector<StringRef, 16> ParentFunctions(
4140       OffloadEntriesInfoManager.size());
4141 
4142   // Auxiliary methods to create metadata values and strings.
4143   auto &&GetMDInt = [this](unsigned V) {
4144     return llvm::ConstantAsMetadata::get(
4145         llvm::ConstantInt::get(CGM.Int32Ty, V));
4146   };
4147 
4148   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4149 
4150   // Create the offloading info metadata node.
4151   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4152 
4153   // Create function that emits metadata for each target region entry;
4154   auto &&TargetRegionMetadataEmitter =
4155       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4156           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4157           unsigned Line,
4158           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4159         // Generate metadata for target regions. Each entry of this metadata
4160         // contains:
4161         // - Entry 0 -> Kind of this type of metadata (0).
4162         // - Entry 1 -> Device ID of the file where the entry was identified.
4163         // - Entry 2 -> File ID of the file where the entry was identified.
4164         // - Entry 3 -> Mangled name of the function where the entry was
4165         // identified.
4166         // - Entry 4 -> Line in the file where the entry was identified.
4167         // - Entry 5 -> Order the entry was created.
4168         // The first element of the metadata node is the kind.
4169         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4170                                  GetMDInt(FileID),      GetMDString(ParentName),
4171                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4172 
4173         // Save this entry in the right position of the ordered entries array.
4174         OrderedEntries[E.getOrder()] = &E;
4175         ParentFunctions[E.getOrder()] = ParentName;
4176 
4177         // Add metadata to the named metadata node.
4178         MD->addOperand(llvm::MDNode::get(C, Ops));
4179       };
4180 
4181   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4182       TargetRegionMetadataEmitter);
4183 
4184   // Create function that emits metadata for each device global variable entry;
4185   auto &&DeviceGlobalVarMetadataEmitter =
4186       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4187        MD](StringRef MangledName,
4188            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4189                &E) {
4190         // Generate metadata for global variables. Each entry of this metadata
4191         // contains:
4192         // - Entry 0 -> Kind of this type of metadata (1).
4193         // - Entry 1 -> Mangled name of the variable.
4194         // - Entry 2 -> Declare target kind.
4195         // - Entry 3 -> Order the entry was created.
4196         // The first element of the metadata node is the kind.
4197         llvm::Metadata *Ops[] = {
4198             GetMDInt(E.getKind()), GetMDString(MangledName),
4199             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4200 
4201         // Save this entry in the right position of the ordered entries array.
4202         OrderedEntries[E.getOrder()] = &E;
4203 
4204         // Add metadata to the named metadata node.
4205         MD->addOperand(llvm::MDNode::get(C, Ops));
4206       };
4207 
4208   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4209       DeviceGlobalVarMetadataEmitter);
4210 
4211   for (const auto *E : OrderedEntries) {
4212     assert(E && "All ordered entries must exist!");
4213     if (const auto *CE =
4214             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4215                 E)) {
4216       if (!CE->getID() || !CE->getAddress()) {
4217         // Do not blame the entry if the parent funtion is not emitted.
4218         StringRef FnName = ParentFunctions[CE->getOrder()];
4219         if (!CGM.GetGlobalValue(FnName))
4220           continue;
4221         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4222             DiagnosticsEngine::Error,
4223             "Offloading entry for target region is incorrect: either the "
4224             "address or the ID is invalid.");
4225         CGM.getDiags().Report(DiagID);
4226         continue;
4227       }
4228       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4229                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4230     } else if (const auto *CE =
4231                    dyn_cast<OffloadEntriesInfoManagerTy::
4232                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4233       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4234           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4235               CE->getFlags());
4236       switch (Flags) {
4237       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4238         if (CGM.getLangOpts().OpenMPIsDevice &&
4239             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4240           continue;
4241         if (!CE->getAddress()) {
4242           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4243               DiagnosticsEngine::Error,
4244               "Offloading entry for declare target variable is incorrect: the "
4245               "address is invalid.");
4246           CGM.getDiags().Report(DiagID);
4247           continue;
4248         }
4249         // The vaiable has no definition - no need to add the entry.
4250         if (CE->getVarSize().isZero())
4251           continue;
4252         break;
4253       }
4254       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4255         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4256                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4257                "Declaret target link address is set.");
4258         if (CGM.getLangOpts().OpenMPIsDevice)
4259           continue;
4260         if (!CE->getAddress()) {
4261           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4262               DiagnosticsEngine::Error,
4263               "Offloading entry for declare target variable is incorrect: the "
4264               "address is invalid.");
4265           CGM.getDiags().Report(DiagID);
4266           continue;
4267         }
4268         break;
4269       }
4270       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4271                          CE->getVarSize().getQuantity(), Flags,
4272                          CE->getLinkage());
4273     } else {
4274       llvm_unreachable("Unsupported entry kind.");
4275     }
4276   }
4277 }
4278 
4279 /// Loads all the offload entries information from the host IR
4280 /// metadata.
4281 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4282   // If we are in target mode, load the metadata from the host IR. This code has
4283   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4284 
4285   if (!CGM.getLangOpts().OpenMPIsDevice)
4286     return;
4287 
4288   if (CGM.getLangOpts().OMPHostIRFile.empty())
4289     return;
4290 
4291   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4292   if (auto EC = Buf.getError()) {
4293     CGM.getDiags().Report(diag::err_cannot_open_file)
4294         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4295     return;
4296   }
4297 
4298   llvm::LLVMContext C;
4299   auto ME = expectedToErrorOrAndEmitErrors(
4300       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4301 
4302   if (auto EC = ME.getError()) {
4303     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4304         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4305     CGM.getDiags().Report(DiagID)
4306         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4307     return;
4308   }
4309 
4310   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4311   if (!MD)
4312     return;
4313 
4314   for (llvm::MDNode *MN : MD->operands()) {
4315     auto &&GetMDInt = [MN](unsigned Idx) {
4316       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4317       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4318     };
4319 
4320     auto &&GetMDString = [MN](unsigned Idx) {
4321       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4322       return V->getString();
4323     };
4324 
4325     switch (GetMDInt(0)) {
4326     default:
4327       llvm_unreachable("Unexpected metadata!");
4328       break;
4329     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4330         OffloadingEntryInfoTargetRegion:
4331       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4332           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4333           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4334           /*Order=*/GetMDInt(5));
4335       break;
4336     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4337         OffloadingEntryInfoDeviceGlobalVar:
4338       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4339           /*MangledName=*/GetMDString(1),
4340           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4341               /*Flags=*/GetMDInt(2)),
4342           /*Order=*/GetMDInt(3));
4343       break;
4344     }
4345   }
4346 }
4347 
4348 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4349   if (!KmpRoutineEntryPtrTy) {
4350     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4351     ASTContext &C = CGM.getContext();
4352     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4353     FunctionProtoType::ExtProtoInfo EPI;
4354     KmpRoutineEntryPtrQTy = C.getPointerType(
4355         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4356     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4357   }
4358 }
4359 
4360 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4361   // Make sure the type of the entry is already created. This is the type we
4362   // have to create:
4363   // struct __tgt_offload_entry{
4364   //   void      *addr;       // Pointer to the offload entry info.
4365   //                          // (function or global)
4366   //   char      *name;       // Name of the function or global.
4367   //   size_t     size;       // Size of the entry info (0 if it a function).
4368   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4369   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4370   // };
4371   if (TgtOffloadEntryQTy.isNull()) {
4372     ASTContext &C = CGM.getContext();
4373     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4374     RD->startDefinition();
4375     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4376     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4377     addFieldToRecordDecl(C, RD, C.getSizeType());
4378     addFieldToRecordDecl(
4379         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4380     addFieldToRecordDecl(
4381         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4382     RD->completeDefinition();
4383     RD->addAttr(PackedAttr::CreateImplicit(C));
4384     TgtOffloadEntryQTy = C.getRecordType(RD);
4385   }
4386   return TgtOffloadEntryQTy;
4387 }
4388 
4389 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4390   // These are the types we need to build:
4391   // struct __tgt_device_image{
4392   // void   *ImageStart;       // Pointer to the target code start.
4393   // void   *ImageEnd;         // Pointer to the target code end.
4394   // // We also add the host entries to the device image, as it may be useful
4395   // // for the target runtime to have access to that information.
4396   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4397   //                                       // the entries.
4398   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4399   //                                       // entries (non inclusive).
4400   // };
4401   if (TgtDeviceImageQTy.isNull()) {
4402     ASTContext &C = CGM.getContext();
4403     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4404     RD->startDefinition();
4405     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4406     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4407     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4408     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4409     RD->completeDefinition();
4410     TgtDeviceImageQTy = C.getRecordType(RD);
4411   }
4412   return TgtDeviceImageQTy;
4413 }
4414 
4415 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4416   // struct __tgt_bin_desc{
4417   //   int32_t              NumDevices;      // Number of devices supported.
4418   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4419   //                                         // (one per device).
4420   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4421   //                                         // entries.
4422   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4423   //                                         // entries (non inclusive).
4424   // };
4425   if (TgtBinaryDescriptorQTy.isNull()) {
4426     ASTContext &C = CGM.getContext();
4427     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4428     RD->startDefinition();
4429     addFieldToRecordDecl(
4430         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4431     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4432     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4433     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4434     RD->completeDefinition();
4435     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4436   }
4437   return TgtBinaryDescriptorQTy;
4438 }
4439 
4440 namespace {
4441 struct PrivateHelpersTy {
4442   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4443                    const VarDecl *PrivateElemInit)
4444       : Original(Original), PrivateCopy(PrivateCopy),
4445         PrivateElemInit(PrivateElemInit) {}
4446   const VarDecl *Original;
4447   const VarDecl *PrivateCopy;
4448   const VarDecl *PrivateElemInit;
4449 };
4450 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4451 } // anonymous namespace
4452 
4453 static RecordDecl *
4454 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4455   if (!Privates.empty()) {
4456     ASTContext &C = CGM.getContext();
4457     // Build struct .kmp_privates_t. {
4458     //         /*  private vars  */
4459     //       };
4460     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4461     RD->startDefinition();
4462     for (const auto &Pair : Privates) {
4463       const VarDecl *VD = Pair.second.Original;
4464       QualType Type = VD->getType().getNonReferenceType();
4465       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4466       if (VD->hasAttrs()) {
4467         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4468              E(VD->getAttrs().end());
4469              I != E; ++I)
4470           FD->addAttr(*I);
4471       }
4472     }
4473     RD->completeDefinition();
4474     return RD;
4475   }
4476   return nullptr;
4477 }
4478 
4479 static RecordDecl *
4480 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4481                          QualType KmpInt32Ty,
4482                          QualType KmpRoutineEntryPointerQTy) {
4483   ASTContext &C = CGM.getContext();
4484   // Build struct kmp_task_t {
4485   //         void *              shareds;
4486   //         kmp_routine_entry_t routine;
4487   //         kmp_int32           part_id;
4488   //         kmp_cmplrdata_t data1;
4489   //         kmp_cmplrdata_t data2;
4490   // For taskloops additional fields:
4491   //         kmp_uint64          lb;
4492   //         kmp_uint64          ub;
4493   //         kmp_int64           st;
4494   //         kmp_int32           liter;
4495   //         void *              reductions;
4496   //       };
4497   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4498   UD->startDefinition();
4499   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4500   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4501   UD->completeDefinition();
4502   QualType KmpCmplrdataTy = C.getRecordType(UD);
4503   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4504   RD->startDefinition();
4505   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4506   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4507   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4508   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4509   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4510   if (isOpenMPTaskLoopDirective(Kind)) {
4511     QualType KmpUInt64Ty =
4512         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4513     QualType KmpInt64Ty =
4514         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4515     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4516     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4517     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4518     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4519     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4520   }
4521   RD->completeDefinition();
4522   return RD;
4523 }
4524 
4525 static RecordDecl *
4526 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4527                                      ArrayRef<PrivateDataTy> Privates) {
4528   ASTContext &C = CGM.getContext();
4529   // Build struct kmp_task_t_with_privates {
4530   //         kmp_task_t task_data;
4531   //         .kmp_privates_t. privates;
4532   //       };
4533   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4534   RD->startDefinition();
4535   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4536   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4537     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4538   RD->completeDefinition();
4539   return RD;
4540 }
4541 
4542 /// Emit a proxy function which accepts kmp_task_t as the second
4543 /// argument.
4544 /// \code
4545 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4546 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4547 ///   For taskloops:
4548 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4549 ///   tt->reductions, tt->shareds);
4550 ///   return 0;
4551 /// }
4552 /// \endcode
4553 static llvm::Function *
4554 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4555                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4556                       QualType KmpTaskTWithPrivatesPtrQTy,
4557                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4558                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4559                       llvm::Value *TaskPrivatesMap) {
4560   ASTContext &C = CGM.getContext();
4561   FunctionArgList Args;
4562   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4563                             ImplicitParamDecl::Other);
4564   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4565                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4566                                 ImplicitParamDecl::Other);
4567   Args.push_back(&GtidArg);
4568   Args.push_back(&TaskTypeArg);
4569   const auto &TaskEntryFnInfo =
4570       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4571   llvm::FunctionType *TaskEntryTy =
4572       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4573   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4574   auto *TaskEntry = llvm::Function::Create(
4575       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4576   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4577   TaskEntry->setDoesNotRecurse();
4578   CodeGenFunction CGF(CGM);
4579   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4580                     Loc, Loc);
4581 
4582   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4583   // tt,
4584   // For taskloops:
4585   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4586   // tt->task_data.shareds);
4587   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4588       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4589   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4590       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4591       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4592   const auto *KmpTaskTWithPrivatesQTyRD =
4593       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4594   LValue Base =
4595       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4596   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4597   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4598   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4599   llvm::Value *PartidParam = PartIdLVal.getPointer();
4600 
4601   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4602   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4603   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4604       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4605       CGF.ConvertTypeForMem(SharedsPtrTy));
4606 
4607   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4608   llvm::Value *PrivatesParam;
4609   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4610     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4611     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4612         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4613   } else {
4614     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4615   }
4616 
4617   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4618                                TaskPrivatesMap,
4619                                CGF.Builder
4620                                    .CreatePointerBitCastOrAddrSpaceCast(
4621                                        TDBase.getAddress(), CGF.VoidPtrTy)
4622                                    .getPointer()};
4623   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4624                                           std::end(CommonArgs));
4625   if (isOpenMPTaskLoopDirective(Kind)) {
4626     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4627     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4628     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4629     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4630     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4631     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4632     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4633     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4634     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4635     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4636     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4637     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4638     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4639     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4640     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4641     CallArgs.push_back(LBParam);
4642     CallArgs.push_back(UBParam);
4643     CallArgs.push_back(StParam);
4644     CallArgs.push_back(LIParam);
4645     CallArgs.push_back(RParam);
4646   }
4647   CallArgs.push_back(SharedsParam);
4648 
4649   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4650                                                   CallArgs);
4651   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4652                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4653   CGF.FinishFunction();
4654   return TaskEntry;
4655 }
4656 
4657 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4658                                             SourceLocation Loc,
4659                                             QualType KmpInt32Ty,
4660                                             QualType KmpTaskTWithPrivatesPtrQTy,
4661                                             QualType KmpTaskTWithPrivatesQTy) {
4662   ASTContext &C = CGM.getContext();
4663   FunctionArgList Args;
4664   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4665                             ImplicitParamDecl::Other);
4666   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4667                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4668                                 ImplicitParamDecl::Other);
4669   Args.push_back(&GtidArg);
4670   Args.push_back(&TaskTypeArg);
4671   const auto &DestructorFnInfo =
4672       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4673   llvm::FunctionType *DestructorFnTy =
4674       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4675   std::string Name =
4676       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4677   auto *DestructorFn =
4678       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4679                              Name, &CGM.getModule());
4680   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4681                                     DestructorFnInfo);
4682   DestructorFn->setDoesNotRecurse();
4683   CodeGenFunction CGF(CGM);
4684   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4685                     Args, Loc, Loc);
4686 
4687   LValue Base = CGF.EmitLoadOfPointerLValue(
4688       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4689       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4690   const auto *KmpTaskTWithPrivatesQTyRD =
4691       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4692   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4693   Base = CGF.EmitLValueForField(Base, *FI);
4694   for (const auto *Field :
4695        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4696     if (QualType::DestructionKind DtorKind =
4697             Field->getType().isDestructedType()) {
4698       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4699       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4700     }
4701   }
4702   CGF.FinishFunction();
4703   return DestructorFn;
4704 }
4705 
4706 /// Emit a privates mapping function for correct handling of private and
4707 /// firstprivate variables.
4708 /// \code
4709 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4710 /// **noalias priv1,...,  <tyn> **noalias privn) {
4711 ///   *priv1 = &.privates.priv1;
4712 ///   ...;
4713 ///   *privn = &.privates.privn;
4714 /// }
4715 /// \endcode
4716 static llvm::Value *
4717 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4718                                ArrayRef<const Expr *> PrivateVars,
4719                                ArrayRef<const Expr *> FirstprivateVars,
4720                                ArrayRef<const Expr *> LastprivateVars,
4721                                QualType PrivatesQTy,
4722                                ArrayRef<PrivateDataTy> Privates) {
4723   ASTContext &C = CGM.getContext();
4724   FunctionArgList Args;
4725   ImplicitParamDecl TaskPrivatesArg(
4726       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4727       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4728       ImplicitParamDecl::Other);
4729   Args.push_back(&TaskPrivatesArg);
4730   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4731   unsigned Counter = 1;
4732   for (const Expr *E : PrivateVars) {
4733     Args.push_back(ImplicitParamDecl::Create(
4734         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4735         C.getPointerType(C.getPointerType(E->getType()))
4736             .withConst()
4737             .withRestrict(),
4738         ImplicitParamDecl::Other));
4739     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4740     PrivateVarsPos[VD] = Counter;
4741     ++Counter;
4742   }
4743   for (const Expr *E : FirstprivateVars) {
4744     Args.push_back(ImplicitParamDecl::Create(
4745         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4746         C.getPointerType(C.getPointerType(E->getType()))
4747             .withConst()
4748             .withRestrict(),
4749         ImplicitParamDecl::Other));
4750     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4751     PrivateVarsPos[VD] = Counter;
4752     ++Counter;
4753   }
4754   for (const Expr *E : LastprivateVars) {
4755     Args.push_back(ImplicitParamDecl::Create(
4756         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4757         C.getPointerType(C.getPointerType(E->getType()))
4758             .withConst()
4759             .withRestrict(),
4760         ImplicitParamDecl::Other));
4761     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4762     PrivateVarsPos[VD] = Counter;
4763     ++Counter;
4764   }
4765   const auto &TaskPrivatesMapFnInfo =
4766       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4767   llvm::FunctionType *TaskPrivatesMapTy =
4768       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4769   std::string Name =
4770       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4771   auto *TaskPrivatesMap = llvm::Function::Create(
4772       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4773       &CGM.getModule());
4774   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4775                                     TaskPrivatesMapFnInfo);
4776   if (CGM.getLangOpts().Optimize) {
4777     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4778     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4779     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4780   }
4781   CodeGenFunction CGF(CGM);
4782   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4783                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4784 
4785   // *privi = &.privates.privi;
4786   LValue Base = CGF.EmitLoadOfPointerLValue(
4787       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4788       TaskPrivatesArg.getType()->castAs<PointerType>());
4789   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4790   Counter = 0;
4791   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4792     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4793     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4794     LValue RefLVal =
4795         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4796     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4797         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4798     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4799     ++Counter;
4800   }
4801   CGF.FinishFunction();
4802   return TaskPrivatesMap;
4803 }
4804 
4805 /// Emit initialization for private variables in task-based directives.
4806 static void emitPrivatesInit(CodeGenFunction &CGF,
4807                              const OMPExecutableDirective &D,
4808                              Address KmpTaskSharedsPtr, LValue TDBase,
4809                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4810                              QualType SharedsTy, QualType SharedsPtrTy,
4811                              const OMPTaskDataTy &Data,
4812                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4813   ASTContext &C = CGF.getContext();
4814   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4815   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4816   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4817                                  ? OMPD_taskloop
4818                                  : OMPD_task;
4819   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4820   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4821   LValue SrcBase;
4822   bool IsTargetTask =
4823       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4824       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4825   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4826   // PointersArray and SizesArray. The original variables for these arrays are
4827   // not captured and we get their addresses explicitly.
4828   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4829       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4830     SrcBase = CGF.MakeAddrLValue(
4831         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4833         SharedsTy);
4834   }
4835   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4836   for (const PrivateDataTy &Pair : Privates) {
4837     const VarDecl *VD = Pair.second.PrivateCopy;
4838     const Expr *Init = VD->getAnyInitializer();
4839     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4840                              !CGF.isTrivialInitializer(Init)))) {
4841       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4842       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4843         const VarDecl *OriginalVD = Pair.second.Original;
4844         // Check if the variable is the target-based BasePointersArray,
4845         // PointersArray or SizesArray.
4846         LValue SharedRefLValue;
4847         QualType Type = PrivateLValue.getType();
4848         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4849         if (IsTargetTask && !SharedField) {
4850           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4851                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4852                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4853                          ->getNumParams() == 0 &&
4854                  isa<TranslationUnitDecl>(
4855                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4856                          ->getDeclContext()) &&
4857                  "Expected artificial target data variable.");
4858           SharedRefLValue =
4859               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4860         } else {
4861           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4862           SharedRefLValue = CGF.MakeAddrLValue(
4863               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4864               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4865               SharedRefLValue.getTBAAInfo());
4866         }
4867         if (Type->isArrayType()) {
4868           // Initialize firstprivate array.
4869           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4870             // Perform simple memcpy.
4871             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4872           } else {
4873             // Initialize firstprivate array using element-by-element
4874             // initialization.
4875             CGF.EmitOMPAggregateAssign(
4876                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4877                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4878                                                   Address SrcElement) {
4879                   // Clean up any temporaries needed by the initialization.
4880                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4881                   InitScope.addPrivate(
4882                       Elem, [SrcElement]() -> Address { return SrcElement; });
4883                   (void)InitScope.Privatize();
4884                   // Emit initialization for single element.
4885                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4886                       CGF, &CapturesInfo);
4887                   CGF.EmitAnyExprToMem(Init, DestElement,
4888                                        Init->getType().getQualifiers(),
4889                                        /*IsInitializer=*/false);
4890                 });
4891           }
4892         } else {
4893           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4894           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4895             return SharedRefLValue.getAddress();
4896           });
4897           (void)InitScope.Privatize();
4898           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4899           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4900                              /*capturedByInit=*/false);
4901         }
4902       } else {
4903         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4904       }
4905     }
4906     ++FI;
4907   }
4908 }
4909 
4910 /// Check if duplication function is required for taskloops.
4911 static bool checkInitIsRequired(CodeGenFunction &CGF,
4912                                 ArrayRef<PrivateDataTy> Privates) {
4913   bool InitRequired = false;
4914   for (const PrivateDataTy &Pair : Privates) {
4915     const VarDecl *VD = Pair.second.PrivateCopy;
4916     const Expr *Init = VD->getAnyInitializer();
4917     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4918                                     !CGF.isTrivialInitializer(Init));
4919     if (InitRequired)
4920       break;
4921   }
4922   return InitRequired;
4923 }
4924 
4925 
4926 /// Emit task_dup function (for initialization of
4927 /// private/firstprivate/lastprivate vars and last_iter flag)
4928 /// \code
4929 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4930 /// lastpriv) {
4931 /// // setup lastprivate flag
4932 ///    task_dst->last = lastpriv;
4933 /// // could be constructor calls here...
4934 /// }
4935 /// \endcode
4936 static llvm::Value *
4937 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4938                     const OMPExecutableDirective &D,
4939                     QualType KmpTaskTWithPrivatesPtrQTy,
4940                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4941                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4942                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4943                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4944   ASTContext &C = CGM.getContext();
4945   FunctionArgList Args;
4946   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4947                            KmpTaskTWithPrivatesPtrQTy,
4948                            ImplicitParamDecl::Other);
4949   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4950                            KmpTaskTWithPrivatesPtrQTy,
4951                            ImplicitParamDecl::Other);
4952   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4953                                 ImplicitParamDecl::Other);
4954   Args.push_back(&DstArg);
4955   Args.push_back(&SrcArg);
4956   Args.push_back(&LastprivArg);
4957   const auto &TaskDupFnInfo =
4958       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4959   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4960   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4961   auto *TaskDup = llvm::Function::Create(
4962       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4963   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4964   TaskDup->setDoesNotRecurse();
4965   CodeGenFunction CGF(CGM);
4966   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4967                     Loc);
4968 
4969   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4970       CGF.GetAddrOfLocalVar(&DstArg),
4971       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4972   // task_dst->liter = lastpriv;
4973   if (WithLastIter) {
4974     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4975     LValue Base = CGF.EmitLValueForField(
4976         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4977     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4978     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4979         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4980     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4981   }
4982 
4983   // Emit initial values for private copies (if any).
4984   assert(!Privates.empty());
4985   Address KmpTaskSharedsPtr = Address::invalid();
4986   if (!Data.FirstprivateVars.empty()) {
4987     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4988         CGF.GetAddrOfLocalVar(&SrcArg),
4989         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4990     LValue Base = CGF.EmitLValueForField(
4991         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4992     KmpTaskSharedsPtr = Address(
4993         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4994                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4995                                                   KmpTaskTShareds)),
4996                              Loc),
4997         CGF.getNaturalTypeAlignment(SharedsTy));
4998   }
4999   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
5000                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
5001   CGF.FinishFunction();
5002   return TaskDup;
5003 }
5004 
5005 /// Checks if destructor function is required to be generated.
5006 /// \return true if cleanups are required, false otherwise.
5007 static bool
5008 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
5009   bool NeedsCleanup = false;
5010   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
5011   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
5012   for (const FieldDecl *FD : PrivateRD->fields()) {
5013     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
5014     if (NeedsCleanup)
5015       break;
5016   }
5017   return NeedsCleanup;
5018 }
5019 
5020 CGOpenMPRuntime::TaskResultTy
5021 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
5022                               const OMPExecutableDirective &D,
5023                               llvm::Function *TaskFunction, QualType SharedsTy,
5024                               Address Shareds, const OMPTaskDataTy &Data) {
5025   ASTContext &C = CGM.getContext();
5026   llvm::SmallVector<PrivateDataTy, 4> Privates;
5027   // Aggregate privates and sort them by the alignment.
5028   auto I = Data.PrivateCopies.begin();
5029   for (const Expr *E : Data.PrivateVars) {
5030     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5031     Privates.emplace_back(
5032         C.getDeclAlign(VD),
5033         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5034                          /*PrivateElemInit=*/nullptr));
5035     ++I;
5036   }
5037   I = Data.FirstprivateCopies.begin();
5038   auto IElemInitRef = Data.FirstprivateInits.begin();
5039   for (const Expr *E : Data.FirstprivateVars) {
5040     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5041     Privates.emplace_back(
5042         C.getDeclAlign(VD),
5043         PrivateHelpersTy(
5044             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5045             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5046     ++I;
5047     ++IElemInitRef;
5048   }
5049   I = Data.LastprivateCopies.begin();
5050   for (const Expr *E : Data.LastprivateVars) {
5051     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5052     Privates.emplace_back(
5053         C.getDeclAlign(VD),
5054         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5055                          /*PrivateElemInit=*/nullptr));
5056     ++I;
5057   }
5058   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5059     return L.first > R.first;
5060   });
5061   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5062   // Build type kmp_routine_entry_t (if not built yet).
5063   emitKmpRoutineEntryT(KmpInt32Ty);
5064   // Build type kmp_task_t (if not built yet).
5065   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5066     if (SavedKmpTaskloopTQTy.isNull()) {
5067       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5068           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5069     }
5070     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5071   } else {
5072     assert((D.getDirectiveKind() == OMPD_task ||
5073             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5074             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5075            "Expected taskloop, task or target directive");
5076     if (SavedKmpTaskTQTy.isNull()) {
5077       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5078           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5079     }
5080     KmpTaskTQTy = SavedKmpTaskTQTy;
5081   }
5082   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5083   // Build particular struct kmp_task_t for the given task.
5084   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5085       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5086   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5087   QualType KmpTaskTWithPrivatesPtrQTy =
5088       C.getPointerType(KmpTaskTWithPrivatesQTy);
5089   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5090   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5091       KmpTaskTWithPrivatesTy->getPointerTo();
5092   llvm::Value *KmpTaskTWithPrivatesTySize =
5093       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5094   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5095 
5096   // Emit initial values for private copies (if any).
5097   llvm::Value *TaskPrivatesMap = nullptr;
5098   llvm::Type *TaskPrivatesMapTy =
5099       std::next(TaskFunction->arg_begin(), 3)->getType();
5100   if (!Privates.empty()) {
5101     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5102     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5103         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5104         FI->getType(), Privates);
5105     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5106         TaskPrivatesMap, TaskPrivatesMapTy);
5107   } else {
5108     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5109         cast<llvm::PointerType>(TaskPrivatesMapTy));
5110   }
5111   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5112   // kmp_task_t *tt);
5113   llvm::Function *TaskEntry = emitProxyTaskFunction(
5114       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5115       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5116       TaskPrivatesMap);
5117 
5118   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5119   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5120   // kmp_routine_entry_t *task_entry);
5121   // Task flags. Format is taken from
5122   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5123   // description of kmp_tasking_flags struct.
5124   enum {
5125     TiedFlag = 0x1,
5126     FinalFlag = 0x2,
5127     DestructorsFlag = 0x8,
5128     PriorityFlag = 0x20
5129   };
5130   unsigned Flags = Data.Tied ? TiedFlag : 0;
5131   bool NeedsCleanup = false;
5132   if (!Privates.empty()) {
5133     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5134     if (NeedsCleanup)
5135       Flags = Flags | DestructorsFlag;
5136   }
5137   if (Data.Priority.getInt())
5138     Flags = Flags | PriorityFlag;
5139   llvm::Value *TaskFlags =
5140       Data.Final.getPointer()
5141           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5142                                      CGF.Builder.getInt32(FinalFlag),
5143                                      CGF.Builder.getInt32(/*C=*/0))
5144           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5145   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5146   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5147   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5148       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5149       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5150           TaskEntry, KmpRoutineEntryPtrTy)};
5151   llvm::Value *NewTask;
5152   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5153     // Check if we have any device clause associated with the directive.
5154     const Expr *Device = nullptr;
5155     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5156       Device = C->getDevice();
5157     // Emit device ID if any otherwise use default value.
5158     llvm::Value *DeviceID;
5159     if (Device)
5160       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5161                                            CGF.Int64Ty, /*isSigned=*/true);
5162     else
5163       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5164     AllocArgs.push_back(DeviceID);
5165     NewTask = CGF.EmitRuntimeCall(
5166       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5167   } else {
5168     NewTask = CGF.EmitRuntimeCall(
5169       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5170   }
5171   llvm::Value *NewTaskNewTaskTTy =
5172       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5173           NewTask, KmpTaskTWithPrivatesPtrTy);
5174   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5175                                                KmpTaskTWithPrivatesQTy);
5176   LValue TDBase =
5177       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5178   // Fill the data in the resulting kmp_task_t record.
5179   // Copy shareds if there are any.
5180   Address KmpTaskSharedsPtr = Address::invalid();
5181   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5182     KmpTaskSharedsPtr =
5183         Address(CGF.EmitLoadOfScalar(
5184                     CGF.EmitLValueForField(
5185                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5186                                            KmpTaskTShareds)),
5187                     Loc),
5188                 CGF.getNaturalTypeAlignment(SharedsTy));
5189     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5190     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5191     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5192   }
5193   // Emit initial values for private copies (if any).
5194   TaskResultTy Result;
5195   if (!Privates.empty()) {
5196     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5197                      SharedsTy, SharedsPtrTy, Data, Privates,
5198                      /*ForDup=*/false);
5199     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5200         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5201       Result.TaskDupFn = emitTaskDupFunction(
5202           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5203           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5204           /*WithLastIter=*/!Data.LastprivateVars.empty());
5205     }
5206   }
5207   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5208   enum { Priority = 0, Destructors = 1 };
5209   // Provide pointer to function with destructors for privates.
5210   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5211   const RecordDecl *KmpCmplrdataUD =
5212       (*FI)->getType()->getAsUnionType()->getDecl();
5213   if (NeedsCleanup) {
5214     llvm::Value *DestructorFn = emitDestructorsFunction(
5215         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5216         KmpTaskTWithPrivatesQTy);
5217     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5218     LValue DestructorsLV = CGF.EmitLValueForField(
5219         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5220     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5221                               DestructorFn, KmpRoutineEntryPtrTy),
5222                           DestructorsLV);
5223   }
5224   // Set priority.
5225   if (Data.Priority.getInt()) {
5226     LValue Data2LV = CGF.EmitLValueForField(
5227         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5228     LValue PriorityLV = CGF.EmitLValueForField(
5229         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5230     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5231   }
5232   Result.NewTask = NewTask;
5233   Result.TaskEntry = TaskEntry;
5234   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5235   Result.TDBase = TDBase;
5236   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5237   return Result;
5238 }
5239 
5240 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5241                                    const OMPExecutableDirective &D,
5242                                    llvm::Function *TaskFunction,
5243                                    QualType SharedsTy, Address Shareds,
5244                                    const Expr *IfCond,
5245                                    const OMPTaskDataTy &Data) {
5246   if (!CGF.HaveInsertPoint())
5247     return;
5248 
5249   TaskResultTy Result =
5250       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5251   llvm::Value *NewTask = Result.NewTask;
5252   llvm::Function *TaskEntry = Result.TaskEntry;
5253   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5254   LValue TDBase = Result.TDBase;
5255   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5256   ASTContext &C = CGM.getContext();
5257   // Process list of dependences.
5258   Address DependenciesArray = Address::invalid();
5259   unsigned NumDependencies = Data.Dependences.size();
5260   if (NumDependencies) {
5261     // Dependence kind for RTL.
5262     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5263     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5264     RecordDecl *KmpDependInfoRD;
5265     QualType FlagsTy =
5266         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5267     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5268     if (KmpDependInfoTy.isNull()) {
5269       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5270       KmpDependInfoRD->startDefinition();
5271       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5272       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5273       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5274       KmpDependInfoRD->completeDefinition();
5275       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5276     } else {
5277       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5278     }
5279     // Define type kmp_depend_info[<Dependences.size()>];
5280     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5281         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5282         ArrayType::Normal, /*IndexTypeQuals=*/0);
5283     // kmp_depend_info[<Dependences.size()>] deps;
5284     DependenciesArray =
5285         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5286     for (unsigned I = 0; I < NumDependencies; ++I) {
5287       const Expr *E = Data.Dependences[I].second;
5288       LValue Addr = CGF.EmitLValue(E);
5289       llvm::Value *Size;
5290       QualType Ty = E->getType();
5291       if (const auto *ASE =
5292               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5293         LValue UpAddrLVal =
5294             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5295         llvm::Value *UpAddr =
5296             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5297         llvm::Value *LowIntPtr =
5298             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5299         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5300         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5301       } else {
5302         Size = CGF.getTypeSize(Ty);
5303       }
5304       LValue Base = CGF.MakeAddrLValue(
5305           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5306           KmpDependInfoTy);
5307       // deps[i].base_addr = &<Dependences[i].second>;
5308       LValue BaseAddrLVal = CGF.EmitLValueForField(
5309           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5310       CGF.EmitStoreOfScalar(
5311           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5312           BaseAddrLVal);
5313       // deps[i].len = sizeof(<Dependences[i].second>);
5314       LValue LenLVal = CGF.EmitLValueForField(
5315           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5316       CGF.EmitStoreOfScalar(Size, LenLVal);
5317       // deps[i].flags = <Dependences[i].first>;
5318       RTLDependenceKindTy DepKind;
5319       switch (Data.Dependences[I].first) {
5320       case OMPC_DEPEND_in:
5321         DepKind = DepIn;
5322         break;
5323       // Out and InOut dependencies must use the same code.
5324       case OMPC_DEPEND_out:
5325       case OMPC_DEPEND_inout:
5326         DepKind = DepInOut;
5327         break;
5328       case OMPC_DEPEND_mutexinoutset:
5329         DepKind = DepMutexInOutSet;
5330         break;
5331       case OMPC_DEPEND_source:
5332       case OMPC_DEPEND_sink:
5333       case OMPC_DEPEND_unknown:
5334         llvm_unreachable("Unknown task dependence type");
5335       }
5336       LValue FlagsLVal = CGF.EmitLValueForField(
5337           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5338       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5339                             FlagsLVal);
5340     }
5341     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5342         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5343   }
5344 
5345   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5346   // libcall.
5347   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5348   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5349   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5350   // list is not empty
5351   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5352   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5353   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5354   llvm::Value *DepTaskArgs[7];
5355   if (NumDependencies) {
5356     DepTaskArgs[0] = UpLoc;
5357     DepTaskArgs[1] = ThreadID;
5358     DepTaskArgs[2] = NewTask;
5359     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5360     DepTaskArgs[4] = DependenciesArray.getPointer();
5361     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5362     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5363   }
5364   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5365                         &TaskArgs,
5366                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5367     if (!Data.Tied) {
5368       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5369       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5370       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5371     }
5372     if (NumDependencies) {
5373       CGF.EmitRuntimeCall(
5374           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5375     } else {
5376       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5377                           TaskArgs);
5378     }
5379     // Check if parent region is untied and build return for untied task;
5380     if (auto *Region =
5381             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5382       Region->emitUntiedSwitch(CGF);
5383   };
5384 
5385   llvm::Value *DepWaitTaskArgs[6];
5386   if (NumDependencies) {
5387     DepWaitTaskArgs[0] = UpLoc;
5388     DepWaitTaskArgs[1] = ThreadID;
5389     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5390     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5391     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5392     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5393   }
5394   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5395                         NumDependencies, &DepWaitTaskArgs,
5396                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5397     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5398     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5399     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5400     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5401     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5402     // is specified.
5403     if (NumDependencies)
5404       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5405                           DepWaitTaskArgs);
5406     // Call proxy_task_entry(gtid, new_task);
5407     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5408                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5409       Action.Enter(CGF);
5410       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5411       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5412                                                           OutlinedFnArgs);
5413     };
5414 
5415     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5416     // kmp_task_t *new_task);
5417     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5418     // kmp_task_t *new_task);
5419     RegionCodeGenTy RCG(CodeGen);
5420     CommonActionTy Action(
5421         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5422         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5423     RCG.setAction(Action);
5424     RCG(CGF);
5425   };
5426 
5427   if (IfCond) {
5428     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5429   } else {
5430     RegionCodeGenTy ThenRCG(ThenCodeGen);
5431     ThenRCG(CGF);
5432   }
5433 }
5434 
5435 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5436                                        const OMPLoopDirective &D,
5437                                        llvm::Function *TaskFunction,
5438                                        QualType SharedsTy, Address Shareds,
5439                                        const Expr *IfCond,
5440                                        const OMPTaskDataTy &Data) {
5441   if (!CGF.HaveInsertPoint())
5442     return;
5443   TaskResultTy Result =
5444       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5445   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5446   // libcall.
5447   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5448   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5449   // sched, kmp_uint64 grainsize, void *task_dup);
5450   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5451   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5452   llvm::Value *IfVal;
5453   if (IfCond) {
5454     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5455                                       /*isSigned=*/true);
5456   } else {
5457     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5458   }
5459 
5460   LValue LBLVal = CGF.EmitLValueForField(
5461       Result.TDBase,
5462       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5463   const auto *LBVar =
5464       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5465   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5466                        /*IsInitializer=*/true);
5467   LValue UBLVal = CGF.EmitLValueForField(
5468       Result.TDBase,
5469       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5470   const auto *UBVar =
5471       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5472   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5473                        /*IsInitializer=*/true);
5474   LValue StLVal = CGF.EmitLValueForField(
5475       Result.TDBase,
5476       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5477   const auto *StVar =
5478       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5479   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5480                        /*IsInitializer=*/true);
5481   // Store reductions address.
5482   LValue RedLVal = CGF.EmitLValueForField(
5483       Result.TDBase,
5484       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5485   if (Data.Reductions) {
5486     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5487   } else {
5488     CGF.EmitNullInitialization(RedLVal.getAddress(),
5489                                CGF.getContext().VoidPtrTy);
5490   }
5491   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5492   llvm::Value *TaskArgs[] = {
5493       UpLoc,
5494       ThreadID,
5495       Result.NewTask,
5496       IfVal,
5497       LBLVal.getPointer(),
5498       UBLVal.getPointer(),
5499       CGF.EmitLoadOfScalar(StLVal, Loc),
5500       llvm::ConstantInt::getSigned(
5501               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5502       llvm::ConstantInt::getSigned(
5503           CGF.IntTy, Data.Schedule.getPointer()
5504                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5505                          : NoSchedule),
5506       Data.Schedule.getPointer()
5507           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5508                                       /*isSigned=*/false)
5509           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5510       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5511                              Result.TaskDupFn, CGF.VoidPtrTy)
5512                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5513   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5514 }
5515 
5516 /// Emit reduction operation for each element of array (required for
5517 /// array sections) LHS op = RHS.
5518 /// \param Type Type of array.
5519 /// \param LHSVar Variable on the left side of the reduction operation
5520 /// (references element of array in original variable).
5521 /// \param RHSVar Variable on the right side of the reduction operation
5522 /// (references element of array in original variable).
5523 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5524 /// RHSVar.
5525 static void EmitOMPAggregateReduction(
5526     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5527     const VarDecl *RHSVar,
5528     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5529                                   const Expr *, const Expr *)> &RedOpGen,
5530     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5531     const Expr *UpExpr = nullptr) {
5532   // Perform element-by-element initialization.
5533   QualType ElementTy;
5534   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5535   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5536 
5537   // Drill down to the base element type on both arrays.
5538   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5539   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5540 
5541   llvm::Value *RHSBegin = RHSAddr.getPointer();
5542   llvm::Value *LHSBegin = LHSAddr.getPointer();
5543   // Cast from pointer to array type to pointer to single element.
5544   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5545   // The basic structure here is a while-do loop.
5546   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5547   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5548   llvm::Value *IsEmpty =
5549       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5550   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5551 
5552   // Enter the loop body, making that address the current address.
5553   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5554   CGF.EmitBlock(BodyBB);
5555 
5556   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5557 
5558   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5559       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5560   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5561   Address RHSElementCurrent =
5562       Address(RHSElementPHI,
5563               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5564 
5565   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5566       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5567   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5568   Address LHSElementCurrent =
5569       Address(LHSElementPHI,
5570               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5571 
5572   // Emit copy.
5573   CodeGenFunction::OMPPrivateScope Scope(CGF);
5574   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5575   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5576   Scope.Privatize();
5577   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5578   Scope.ForceCleanup();
5579 
5580   // Shift the address forward by one element.
5581   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5582       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5583   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5584       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5585   // Check whether we've reached the end.
5586   llvm::Value *Done =
5587       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5588   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5589   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5590   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5591 
5592   // Done.
5593   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5594 }
5595 
5596 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5597 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5598 /// UDR combiner function.
5599 static void emitReductionCombiner(CodeGenFunction &CGF,
5600                                   const Expr *ReductionOp) {
5601   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5602     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5603       if (const auto *DRE =
5604               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5605         if (const auto *DRD =
5606                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5607           std::pair<llvm::Function *, llvm::Function *> Reduction =
5608               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5609           RValue Func = RValue::get(Reduction.first);
5610           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5611           CGF.EmitIgnoredExpr(ReductionOp);
5612           return;
5613         }
5614   CGF.EmitIgnoredExpr(ReductionOp);
5615 }
5616 
5617 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5618     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5619     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5620     ArrayRef<const Expr *> ReductionOps) {
5621   ASTContext &C = CGM.getContext();
5622 
5623   // void reduction_func(void *LHSArg, void *RHSArg);
5624   FunctionArgList Args;
5625   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5626                            ImplicitParamDecl::Other);
5627   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5628                            ImplicitParamDecl::Other);
5629   Args.push_back(&LHSArg);
5630   Args.push_back(&RHSArg);
5631   const auto &CGFI =
5632       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5633   std::string Name = getName({"omp", "reduction", "reduction_func"});
5634   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5635                                     llvm::GlobalValue::InternalLinkage, Name,
5636                                     &CGM.getModule());
5637   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5638   Fn->setDoesNotRecurse();
5639   CodeGenFunction CGF(CGM);
5640   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5641 
5642   // Dst = (void*[n])(LHSArg);
5643   // Src = (void*[n])(RHSArg);
5644   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5645       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5646       ArgsType), CGF.getPointerAlign());
5647   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5648       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5649       ArgsType), CGF.getPointerAlign());
5650 
5651   //  ...
5652   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5653   //  ...
5654   CodeGenFunction::OMPPrivateScope Scope(CGF);
5655   auto IPriv = Privates.begin();
5656   unsigned Idx = 0;
5657   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5658     const auto *RHSVar =
5659         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5660     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5661       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5662     });
5663     const auto *LHSVar =
5664         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5665     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5666       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5667     });
5668     QualType PrivTy = (*IPriv)->getType();
5669     if (PrivTy->isVariablyModifiedType()) {
5670       // Get array size and emit VLA type.
5671       ++Idx;
5672       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5673       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5674       const VariableArrayType *VLA =
5675           CGF.getContext().getAsVariableArrayType(PrivTy);
5676       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5677       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5678           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5679       CGF.EmitVariablyModifiedType(PrivTy);
5680     }
5681   }
5682   Scope.Privatize();
5683   IPriv = Privates.begin();
5684   auto ILHS = LHSExprs.begin();
5685   auto IRHS = RHSExprs.begin();
5686   for (const Expr *E : ReductionOps) {
5687     if ((*IPriv)->getType()->isArrayType()) {
5688       // Emit reduction for array section.
5689       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5690       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5691       EmitOMPAggregateReduction(
5692           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5693           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5694             emitReductionCombiner(CGF, E);
5695           });
5696     } else {
5697       // Emit reduction for array subscript or single variable.
5698       emitReductionCombiner(CGF, E);
5699     }
5700     ++IPriv;
5701     ++ILHS;
5702     ++IRHS;
5703   }
5704   Scope.ForceCleanup();
5705   CGF.FinishFunction();
5706   return Fn;
5707 }
5708 
5709 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5710                                                   const Expr *ReductionOp,
5711                                                   const Expr *PrivateRef,
5712                                                   const DeclRefExpr *LHS,
5713                                                   const DeclRefExpr *RHS) {
5714   if (PrivateRef->getType()->isArrayType()) {
5715     // Emit reduction for array section.
5716     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5717     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5718     EmitOMPAggregateReduction(
5719         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5720         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5721           emitReductionCombiner(CGF, ReductionOp);
5722         });
5723   } else {
5724     // Emit reduction for array subscript or single variable.
5725     emitReductionCombiner(CGF, ReductionOp);
5726   }
5727 }
5728 
5729 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5730                                     ArrayRef<const Expr *> Privates,
5731                                     ArrayRef<const Expr *> LHSExprs,
5732                                     ArrayRef<const Expr *> RHSExprs,
5733                                     ArrayRef<const Expr *> ReductionOps,
5734                                     ReductionOptionsTy Options) {
5735   if (!CGF.HaveInsertPoint())
5736     return;
5737 
5738   bool WithNowait = Options.WithNowait;
5739   bool SimpleReduction = Options.SimpleReduction;
5740 
5741   // Next code should be emitted for reduction:
5742   //
5743   // static kmp_critical_name lock = { 0 };
5744   //
5745   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5746   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5747   //  ...
5748   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5749   //  *(Type<n>-1*)rhs[<n>-1]);
5750   // }
5751   //
5752   // ...
5753   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5754   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5755   // RedList, reduce_func, &<lock>)) {
5756   // case 1:
5757   //  ...
5758   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5759   //  ...
5760   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5761   // break;
5762   // case 2:
5763   //  ...
5764   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5765   //  ...
5766   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5767   // break;
5768   // default:;
5769   // }
5770   //
5771   // if SimpleReduction is true, only the next code is generated:
5772   //  ...
5773   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5774   //  ...
5775 
5776   ASTContext &C = CGM.getContext();
5777 
5778   if (SimpleReduction) {
5779     CodeGenFunction::RunCleanupsScope Scope(CGF);
5780     auto IPriv = Privates.begin();
5781     auto ILHS = LHSExprs.begin();
5782     auto IRHS = RHSExprs.begin();
5783     for (const Expr *E : ReductionOps) {
5784       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5785                                   cast<DeclRefExpr>(*IRHS));
5786       ++IPriv;
5787       ++ILHS;
5788       ++IRHS;
5789     }
5790     return;
5791   }
5792 
5793   // 1. Build a list of reduction variables.
5794   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5795   auto Size = RHSExprs.size();
5796   for (const Expr *E : Privates) {
5797     if (E->getType()->isVariablyModifiedType())
5798       // Reserve place for array size.
5799       ++Size;
5800   }
5801   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5802   QualType ReductionArrayTy =
5803       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5804                              /*IndexTypeQuals=*/0);
5805   Address ReductionList =
5806       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5807   auto IPriv = Privates.begin();
5808   unsigned Idx = 0;
5809   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5810     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5811     CGF.Builder.CreateStore(
5812         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5813             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5814         Elem);
5815     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5816       // Store array size.
5817       ++Idx;
5818       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5819       llvm::Value *Size = CGF.Builder.CreateIntCast(
5820           CGF.getVLASize(
5821                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5822               .NumElts,
5823           CGF.SizeTy, /*isSigned=*/false);
5824       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5825                               Elem);
5826     }
5827   }
5828 
5829   // 2. Emit reduce_func().
5830   llvm::Function *ReductionFn = emitReductionFunction(
5831       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5832       LHSExprs, RHSExprs, ReductionOps);
5833 
5834   // 3. Create static kmp_critical_name lock = { 0 };
5835   std::string Name = getName({"reduction"});
5836   llvm::Value *Lock = getCriticalRegionLock(Name);
5837 
5838   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5839   // RedList, reduce_func, &<lock>);
5840   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5841   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5842   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5843   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5844       ReductionList.getPointer(), CGF.VoidPtrTy);
5845   llvm::Value *Args[] = {
5846       IdentTLoc,                             // ident_t *<loc>
5847       ThreadId,                              // i32 <gtid>
5848       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5849       ReductionArrayTySize,                  // size_type sizeof(RedList)
5850       RL,                                    // void *RedList
5851       ReductionFn, // void (*) (void *, void *) <reduce_func>
5852       Lock         // kmp_critical_name *&<lock>
5853   };
5854   llvm::Value *Res = CGF.EmitRuntimeCall(
5855       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5856                                        : OMPRTL__kmpc_reduce),
5857       Args);
5858 
5859   // 5. Build switch(res)
5860   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5861   llvm::SwitchInst *SwInst =
5862       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5863 
5864   // 6. Build case 1:
5865   //  ...
5866   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5867   //  ...
5868   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5869   // break;
5870   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5871   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5872   CGF.EmitBlock(Case1BB);
5873 
5874   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5875   llvm::Value *EndArgs[] = {
5876       IdentTLoc, // ident_t *<loc>
5877       ThreadId,  // i32 <gtid>
5878       Lock       // kmp_critical_name *&<lock>
5879   };
5880   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5881                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5882     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5883     auto IPriv = Privates.begin();
5884     auto ILHS = LHSExprs.begin();
5885     auto IRHS = RHSExprs.begin();
5886     for (const Expr *E : ReductionOps) {
5887       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5888                                      cast<DeclRefExpr>(*IRHS));
5889       ++IPriv;
5890       ++ILHS;
5891       ++IRHS;
5892     }
5893   };
5894   RegionCodeGenTy RCG(CodeGen);
5895   CommonActionTy Action(
5896       nullptr, llvm::None,
5897       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5898                                        : OMPRTL__kmpc_end_reduce),
5899       EndArgs);
5900   RCG.setAction(Action);
5901   RCG(CGF);
5902 
5903   CGF.EmitBranch(DefaultBB);
5904 
5905   // 7. Build case 2:
5906   //  ...
5907   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5908   //  ...
5909   // break;
5910   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5911   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5912   CGF.EmitBlock(Case2BB);
5913 
5914   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5915                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5916     auto ILHS = LHSExprs.begin();
5917     auto IRHS = RHSExprs.begin();
5918     auto IPriv = Privates.begin();
5919     for (const Expr *E : ReductionOps) {
5920       const Expr *XExpr = nullptr;
5921       const Expr *EExpr = nullptr;
5922       const Expr *UpExpr = nullptr;
5923       BinaryOperatorKind BO = BO_Comma;
5924       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5925         if (BO->getOpcode() == BO_Assign) {
5926           XExpr = BO->getLHS();
5927           UpExpr = BO->getRHS();
5928         }
5929       }
5930       // Try to emit update expression as a simple atomic.
5931       const Expr *RHSExpr = UpExpr;
5932       if (RHSExpr) {
5933         // Analyze RHS part of the whole expression.
5934         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5935                 RHSExpr->IgnoreParenImpCasts())) {
5936           // If this is a conditional operator, analyze its condition for
5937           // min/max reduction operator.
5938           RHSExpr = ACO->getCond();
5939         }
5940         if (const auto *BORHS =
5941                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5942           EExpr = BORHS->getRHS();
5943           BO = BORHS->getOpcode();
5944         }
5945       }
5946       if (XExpr) {
5947         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5948         auto &&AtomicRedGen = [BO, VD,
5949                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5950                                     const Expr *EExpr, const Expr *UpExpr) {
5951           LValue X = CGF.EmitLValue(XExpr);
5952           RValue E;
5953           if (EExpr)
5954             E = CGF.EmitAnyExpr(EExpr);
5955           CGF.EmitOMPAtomicSimpleUpdateExpr(
5956               X, E, BO, /*IsXLHSInRHSPart=*/true,
5957               llvm::AtomicOrdering::Monotonic, Loc,
5958               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5959                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5960                 PrivateScope.addPrivate(
5961                     VD, [&CGF, VD, XRValue, Loc]() {
5962                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5963                       CGF.emitOMPSimpleStore(
5964                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5965                           VD->getType().getNonReferenceType(), Loc);
5966                       return LHSTemp;
5967                     });
5968                 (void)PrivateScope.Privatize();
5969                 return CGF.EmitAnyExpr(UpExpr);
5970               });
5971         };
5972         if ((*IPriv)->getType()->isArrayType()) {
5973           // Emit atomic reduction for array section.
5974           const auto *RHSVar =
5975               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5976           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5977                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5978         } else {
5979           // Emit atomic reduction for array subscript or single variable.
5980           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5981         }
5982       } else {
5983         // Emit as a critical region.
5984         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5985                                            const Expr *, const Expr *) {
5986           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5987           std::string Name = RT.getName({"atomic_reduction"});
5988           RT.emitCriticalRegion(
5989               CGF, Name,
5990               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5991                 Action.Enter(CGF);
5992                 emitReductionCombiner(CGF, E);
5993               },
5994               Loc);
5995         };
5996         if ((*IPriv)->getType()->isArrayType()) {
5997           const auto *LHSVar =
5998               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5999           const auto *RHSVar =
6000               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6001           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6002                                     CritRedGen);
6003         } else {
6004           CritRedGen(CGF, nullptr, nullptr, nullptr);
6005         }
6006       }
6007       ++ILHS;
6008       ++IRHS;
6009       ++IPriv;
6010     }
6011   };
6012   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6013   if (!WithNowait) {
6014     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6015     llvm::Value *EndArgs[] = {
6016         IdentTLoc, // ident_t *<loc>
6017         ThreadId,  // i32 <gtid>
6018         Lock       // kmp_critical_name *&<lock>
6019     };
6020     CommonActionTy Action(nullptr, llvm::None,
6021                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6022                           EndArgs);
6023     AtomicRCG.setAction(Action);
6024     AtomicRCG(CGF);
6025   } else {
6026     AtomicRCG(CGF);
6027   }
6028 
6029   CGF.EmitBranch(DefaultBB);
6030   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6031 }
6032 
6033 /// Generates unique name for artificial threadprivate variables.
6034 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6035 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6036                                       const Expr *Ref) {
6037   SmallString<256> Buffer;
6038   llvm::raw_svector_ostream Out(Buffer);
6039   const clang::DeclRefExpr *DE;
6040   const VarDecl *D = ::getBaseDecl(Ref, DE);
6041   if (!D)
6042     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6043   D = D->getCanonicalDecl();
6044   std::string Name = CGM.getOpenMPRuntime().getName(
6045       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6046   Out << Prefix << Name << "_"
6047       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6048   return Out.str();
6049 }
6050 
6051 /// Emits reduction initializer function:
6052 /// \code
6053 /// void @.red_init(void* %arg) {
6054 /// %0 = bitcast void* %arg to <type>*
6055 /// store <type> <init>, <type>* %0
6056 /// ret void
6057 /// }
6058 /// \endcode
6059 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6060                                            SourceLocation Loc,
6061                                            ReductionCodeGen &RCG, unsigned N) {
6062   ASTContext &C = CGM.getContext();
6063   FunctionArgList Args;
6064   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6065                           ImplicitParamDecl::Other);
6066   Args.emplace_back(&Param);
6067   const auto &FnInfo =
6068       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6069   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6070   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6071   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6072                                     Name, &CGM.getModule());
6073   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6074   Fn->setDoesNotRecurse();
6075   CodeGenFunction CGF(CGM);
6076   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6077   Address PrivateAddr = CGF.EmitLoadOfPointer(
6078       CGF.GetAddrOfLocalVar(&Param),
6079       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6080   llvm::Value *Size = nullptr;
6081   // If the size of the reduction item is non-constant, load it from global
6082   // threadprivate variable.
6083   if (RCG.getSizes(N).second) {
6084     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6085         CGF, CGM.getContext().getSizeType(),
6086         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6087     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6088                                 CGM.getContext().getSizeType(), Loc);
6089   }
6090   RCG.emitAggregateType(CGF, N, Size);
6091   LValue SharedLVal;
6092   // If initializer uses initializer from declare reduction construct, emit a
6093   // pointer to the address of the original reduction item (reuired by reduction
6094   // initializer)
6095   if (RCG.usesReductionInitializer(N)) {
6096     Address SharedAddr =
6097         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6098             CGF, CGM.getContext().VoidPtrTy,
6099             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6100     SharedAddr = CGF.EmitLoadOfPointer(
6101         SharedAddr,
6102         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6103     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6104   } else {
6105     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6106         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6107         CGM.getContext().VoidPtrTy);
6108   }
6109   // Emit the initializer:
6110   // %0 = bitcast void* %arg to <type>*
6111   // store <type> <init>, <type>* %0
6112   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6113                          [](CodeGenFunction &) { return false; });
6114   CGF.FinishFunction();
6115   return Fn;
6116 }
6117 
6118 /// Emits reduction combiner function:
6119 /// \code
6120 /// void @.red_comb(void* %arg0, void* %arg1) {
6121 /// %lhs = bitcast void* %arg0 to <type>*
6122 /// %rhs = bitcast void* %arg1 to <type>*
6123 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6124 /// store <type> %2, <type>* %lhs
6125 /// ret void
6126 /// }
6127 /// \endcode
6128 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6129                                            SourceLocation Loc,
6130                                            ReductionCodeGen &RCG, unsigned N,
6131                                            const Expr *ReductionOp,
6132                                            const Expr *LHS, const Expr *RHS,
6133                                            const Expr *PrivateRef) {
6134   ASTContext &C = CGM.getContext();
6135   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6136   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6137   FunctionArgList Args;
6138   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6139                                C.VoidPtrTy, ImplicitParamDecl::Other);
6140   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6141                             ImplicitParamDecl::Other);
6142   Args.emplace_back(&ParamInOut);
6143   Args.emplace_back(&ParamIn);
6144   const auto &FnInfo =
6145       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6146   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6147   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6148   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6149                                     Name, &CGM.getModule());
6150   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6151   Fn->setDoesNotRecurse();
6152   CodeGenFunction CGF(CGM);
6153   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6154   llvm::Value *Size = nullptr;
6155   // If the size of the reduction item is non-constant, load it from global
6156   // threadprivate variable.
6157   if (RCG.getSizes(N).second) {
6158     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6159         CGF, CGM.getContext().getSizeType(),
6160         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6161     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6162                                 CGM.getContext().getSizeType(), Loc);
6163   }
6164   RCG.emitAggregateType(CGF, N, Size);
6165   // Remap lhs and rhs variables to the addresses of the function arguments.
6166   // %lhs = bitcast void* %arg0 to <type>*
6167   // %rhs = bitcast void* %arg1 to <type>*
6168   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6169   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6170     // Pull out the pointer to the variable.
6171     Address PtrAddr = CGF.EmitLoadOfPointer(
6172         CGF.GetAddrOfLocalVar(&ParamInOut),
6173         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6174     return CGF.Builder.CreateElementBitCast(
6175         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6176   });
6177   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6178     // Pull out the pointer to the variable.
6179     Address PtrAddr = CGF.EmitLoadOfPointer(
6180         CGF.GetAddrOfLocalVar(&ParamIn),
6181         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6182     return CGF.Builder.CreateElementBitCast(
6183         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6184   });
6185   PrivateScope.Privatize();
6186   // Emit the combiner body:
6187   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6188   // store <type> %2, <type>* %lhs
6189   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6190       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6191       cast<DeclRefExpr>(RHS));
6192   CGF.FinishFunction();
6193   return Fn;
6194 }
6195 
6196 /// Emits reduction finalizer function:
6197 /// \code
6198 /// void @.red_fini(void* %arg) {
6199 /// %0 = bitcast void* %arg to <type>*
6200 /// <destroy>(<type>* %0)
6201 /// ret void
6202 /// }
6203 /// \endcode
6204 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6205                                            SourceLocation Loc,
6206                                            ReductionCodeGen &RCG, unsigned N) {
6207   if (!RCG.needCleanups(N))
6208     return nullptr;
6209   ASTContext &C = CGM.getContext();
6210   FunctionArgList Args;
6211   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6212                           ImplicitParamDecl::Other);
6213   Args.emplace_back(&Param);
6214   const auto &FnInfo =
6215       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6216   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6217   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6218   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6219                                     Name, &CGM.getModule());
6220   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6221   Fn->setDoesNotRecurse();
6222   CodeGenFunction CGF(CGM);
6223   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6224   Address PrivateAddr = CGF.EmitLoadOfPointer(
6225       CGF.GetAddrOfLocalVar(&Param),
6226       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6227   llvm::Value *Size = nullptr;
6228   // If the size of the reduction item is non-constant, load it from global
6229   // threadprivate variable.
6230   if (RCG.getSizes(N).second) {
6231     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6232         CGF, CGM.getContext().getSizeType(),
6233         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6234     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6235                                 CGM.getContext().getSizeType(), Loc);
6236   }
6237   RCG.emitAggregateType(CGF, N, Size);
6238   // Emit the finalizer body:
6239   // <destroy>(<type>* %0)
6240   RCG.emitCleanups(CGF, N, PrivateAddr);
6241   CGF.FinishFunction();
6242   return Fn;
6243 }
6244 
6245 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6246     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6247     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6248   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6249     return nullptr;
6250 
6251   // Build typedef struct:
6252   // kmp_task_red_input {
6253   //   void *reduce_shar; // shared reduction item
6254   //   size_t reduce_size; // size of data item
6255   //   void *reduce_init; // data initialization routine
6256   //   void *reduce_fini; // data finalization routine
6257   //   void *reduce_comb; // data combiner routine
6258   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6259   // } kmp_task_red_input_t;
6260   ASTContext &C = CGM.getContext();
6261   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6262   RD->startDefinition();
6263   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6264   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6265   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6266   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6267   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6268   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6269       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6270   RD->completeDefinition();
6271   QualType RDType = C.getRecordType(RD);
6272   unsigned Size = Data.ReductionVars.size();
6273   llvm::APInt ArraySize(/*numBits=*/64, Size);
6274   QualType ArrayRDType = C.getConstantArrayType(
6275       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6276   // kmp_task_red_input_t .rd_input.[Size];
6277   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6278   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6279                        Data.ReductionOps);
6280   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6281     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6282     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6283                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6284     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6285         TaskRedInput.getPointer(), Idxs,
6286         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6287         ".rd_input.gep.");
6288     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6289     // ElemLVal.reduce_shar = &Shareds[Cnt];
6290     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6291     RCG.emitSharedLValue(CGF, Cnt);
6292     llvm::Value *CastedShared =
6293         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6294     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6295     RCG.emitAggregateType(CGF, Cnt);
6296     llvm::Value *SizeValInChars;
6297     llvm::Value *SizeVal;
6298     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6299     // We use delayed creation/initialization for VLAs, array sections and
6300     // custom reduction initializations. It is required because runtime does not
6301     // provide the way to pass the sizes of VLAs/array sections to
6302     // initializer/combiner/finalizer functions and does not pass the pointer to
6303     // original reduction item to the initializer. Instead threadprivate global
6304     // variables are used to store these values and use them in the functions.
6305     bool DelayedCreation = !!SizeVal;
6306     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6307                                                /*isSigned=*/false);
6308     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6309     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6310     // ElemLVal.reduce_init = init;
6311     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6312     llvm::Value *InitAddr =
6313         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6314     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6315     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6316     // ElemLVal.reduce_fini = fini;
6317     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6318     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6319     llvm::Value *FiniAddr = Fini
6320                                 ? CGF.EmitCastToVoidPtr(Fini)
6321                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6322     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6323     // ElemLVal.reduce_comb = comb;
6324     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6325     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6326         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6327         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6328     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6329     // ElemLVal.flags = 0;
6330     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6331     if (DelayedCreation) {
6332       CGF.EmitStoreOfScalar(
6333           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6334           FlagsLVal);
6335     } else
6336       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6337   }
6338   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6339   // *data);
6340   llvm::Value *Args[] = {
6341       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6342                                 /*isSigned=*/true),
6343       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6344       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6345                                                       CGM.VoidPtrTy)};
6346   return CGF.EmitRuntimeCall(
6347       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6348 }
6349 
6350 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6351                                               SourceLocation Loc,
6352                                               ReductionCodeGen &RCG,
6353                                               unsigned N) {
6354   auto Sizes = RCG.getSizes(N);
6355   // Emit threadprivate global variable if the type is non-constant
6356   // (Sizes.second = nullptr).
6357   if (Sizes.second) {
6358     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6359                                                      /*isSigned=*/false);
6360     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6361         CGF, CGM.getContext().getSizeType(),
6362         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6363     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6364   }
6365   // Store address of the original reduction item if custom initializer is used.
6366   if (RCG.usesReductionInitializer(N)) {
6367     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6368         CGF, CGM.getContext().VoidPtrTy,
6369         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6370     CGF.Builder.CreateStore(
6371         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6372             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6373         SharedAddr, /*IsVolatile=*/false);
6374   }
6375 }
6376 
6377 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6378                                               SourceLocation Loc,
6379                                               llvm::Value *ReductionsPtr,
6380                                               LValue SharedLVal) {
6381   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6382   // *d);
6383   llvm::Value *Args[] = {
6384       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6385                                 /*isSigned=*/true),
6386       ReductionsPtr,
6387       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6388                                                       CGM.VoidPtrTy)};
6389   return Address(
6390       CGF.EmitRuntimeCall(
6391           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6392       SharedLVal.getAlignment());
6393 }
6394 
6395 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6396                                        SourceLocation Loc) {
6397   if (!CGF.HaveInsertPoint())
6398     return;
6399   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6400   // global_tid);
6401   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6402   // Ignore return result until untied tasks are supported.
6403   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6404   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6405     Region->emitUntiedSwitch(CGF);
6406 }
6407 
6408 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6409                                            OpenMPDirectiveKind InnerKind,
6410                                            const RegionCodeGenTy &CodeGen,
6411                                            bool HasCancel) {
6412   if (!CGF.HaveInsertPoint())
6413     return;
6414   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6415   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6416 }
6417 
6418 namespace {
6419 enum RTCancelKind {
6420   CancelNoreq = 0,
6421   CancelParallel = 1,
6422   CancelLoop = 2,
6423   CancelSections = 3,
6424   CancelTaskgroup = 4
6425 };
6426 } // anonymous namespace
6427 
6428 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6429   RTCancelKind CancelKind = CancelNoreq;
6430   if (CancelRegion == OMPD_parallel)
6431     CancelKind = CancelParallel;
6432   else if (CancelRegion == OMPD_for)
6433     CancelKind = CancelLoop;
6434   else if (CancelRegion == OMPD_sections)
6435     CancelKind = CancelSections;
6436   else {
6437     assert(CancelRegion == OMPD_taskgroup);
6438     CancelKind = CancelTaskgroup;
6439   }
6440   return CancelKind;
6441 }
6442 
6443 void CGOpenMPRuntime::emitCancellationPointCall(
6444     CodeGenFunction &CGF, SourceLocation Loc,
6445     OpenMPDirectiveKind CancelRegion) {
6446   if (!CGF.HaveInsertPoint())
6447     return;
6448   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6449   // global_tid, kmp_int32 cncl_kind);
6450   if (auto *OMPRegionInfo =
6451           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6452     // For 'cancellation point taskgroup', the task region info may not have a
6453     // cancel. This may instead happen in another adjacent task.
6454     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6455       llvm::Value *Args[] = {
6456           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6457           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6458       // Ignore return result until untied tasks are supported.
6459       llvm::Value *Result = CGF.EmitRuntimeCall(
6460           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6461       // if (__kmpc_cancellationpoint()) {
6462       //   exit from construct;
6463       // }
6464       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6465       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6466       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6467       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6468       CGF.EmitBlock(ExitBB);
6469       // exit from construct;
6470       CodeGenFunction::JumpDest CancelDest =
6471           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6472       CGF.EmitBranchThroughCleanup(CancelDest);
6473       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6474     }
6475   }
6476 }
6477 
6478 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6479                                      const Expr *IfCond,
6480                                      OpenMPDirectiveKind CancelRegion) {
6481   if (!CGF.HaveInsertPoint())
6482     return;
6483   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6484   // kmp_int32 cncl_kind);
6485   if (auto *OMPRegionInfo =
6486           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6487     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6488                                                         PrePostActionTy &) {
6489       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6490       llvm::Value *Args[] = {
6491           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6492           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6493       // Ignore return result until untied tasks are supported.
6494       llvm::Value *Result = CGF.EmitRuntimeCall(
6495           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6496       // if (__kmpc_cancel()) {
6497       //   exit from construct;
6498       // }
6499       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6500       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6501       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6502       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6503       CGF.EmitBlock(ExitBB);
6504       // exit from construct;
6505       CodeGenFunction::JumpDest CancelDest =
6506           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6507       CGF.EmitBranchThroughCleanup(CancelDest);
6508       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6509     };
6510     if (IfCond) {
6511       emitOMPIfClause(CGF, IfCond, ThenGen,
6512                       [](CodeGenFunction &, PrePostActionTy &) {});
6513     } else {
6514       RegionCodeGenTy ThenRCG(ThenGen);
6515       ThenRCG(CGF);
6516     }
6517   }
6518 }
6519 
6520 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6521     const OMPExecutableDirective &D, StringRef ParentName,
6522     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6523     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6524   assert(!ParentName.empty() && "Invalid target region parent name!");
6525   HasEmittedTargetRegion = true;
6526   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6527                                    IsOffloadEntry, CodeGen);
6528 }
6529 
6530 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6531     const OMPExecutableDirective &D, StringRef ParentName,
6532     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6533     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6534   // Create a unique name for the entry function using the source location
6535   // information of the current target region. The name will be something like:
6536   //
6537   // __omp_offloading_DD_FFFF_PP_lBB
6538   //
6539   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6540   // mangled name of the function that encloses the target region and BB is the
6541   // line number of the target region.
6542 
6543   unsigned DeviceID;
6544   unsigned FileID;
6545   unsigned Line;
6546   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6547                            Line);
6548   SmallString<64> EntryFnName;
6549   {
6550     llvm::raw_svector_ostream OS(EntryFnName);
6551     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6552        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6553   }
6554 
6555   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6556 
6557   CodeGenFunction CGF(CGM, true);
6558   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6559   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6560 
6561   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6562 
6563   // If this target outline function is not an offload entry, we don't need to
6564   // register it.
6565   if (!IsOffloadEntry)
6566     return;
6567 
6568   // The target region ID is used by the runtime library to identify the current
6569   // target region, so it only has to be unique and not necessarily point to
6570   // anything. It could be the pointer to the outlined function that implements
6571   // the target region, but we aren't using that so that the compiler doesn't
6572   // need to keep that, and could therefore inline the host function if proven
6573   // worthwhile during optimization. In the other hand, if emitting code for the
6574   // device, the ID has to be the function address so that it can retrieved from
6575   // the offloading entry and launched by the runtime library. We also mark the
6576   // outlined function to have external linkage in case we are emitting code for
6577   // the device, because these functions will be entry points to the device.
6578 
6579   if (CGM.getLangOpts().OpenMPIsDevice) {
6580     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6581     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6582     OutlinedFn->setDSOLocal(false);
6583   } else {
6584     std::string Name = getName({EntryFnName, "region_id"});
6585     OutlinedFnID = new llvm::GlobalVariable(
6586         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6587         llvm::GlobalValue::WeakAnyLinkage,
6588         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6589   }
6590 
6591   // Register the information for the entry associated with this target region.
6592   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6593       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6594       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6595 }
6596 
6597 /// Checks if the expression is constant or does not have non-trivial function
6598 /// calls.
6599 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6600   // We can skip constant expressions.
6601   // We can skip expressions with trivial calls or simple expressions.
6602   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6603           !E->hasNonTrivialCall(Ctx)) &&
6604          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6605 }
6606 
6607 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6608                                                     const Stmt *Body) {
6609   const Stmt *Child = Body->IgnoreContainers();
6610   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6611     Child = nullptr;
6612     for (const Stmt *S : C->body()) {
6613       if (const auto *E = dyn_cast<Expr>(S)) {
6614         if (isTrivial(Ctx, E))
6615           continue;
6616       }
6617       // Some of the statements can be ignored.
6618       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6619           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6620         continue;
6621       // Analyze declarations.
6622       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6623         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6624               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6625                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6626                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6627                   isa<UsingDirectiveDecl>(D) ||
6628                   isa<OMPDeclareReductionDecl>(D) ||
6629                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6630                 return true;
6631               const auto *VD = dyn_cast<VarDecl>(D);
6632               if (!VD)
6633                 return false;
6634               return VD->isConstexpr() ||
6635                      ((VD->getType().isTrivialType(Ctx) ||
6636                        VD->getType()->isReferenceType()) &&
6637                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6638             }))
6639           continue;
6640       }
6641       // Found multiple children - cannot get the one child only.
6642       if (Child)
6643         return nullptr;
6644       Child = S;
6645     }
6646     if (Child)
6647       Child = Child->IgnoreContainers();
6648   }
6649   return Child;
6650 }
6651 
6652 /// Emit the number of teams for a target directive.  Inspect the num_teams
6653 /// clause associated with a teams construct combined or closely nested
6654 /// with the target directive.
6655 ///
6656 /// Emit a team of size one for directives such as 'target parallel' that
6657 /// have no associated teams construct.
6658 ///
6659 /// Otherwise, return nullptr.
6660 static llvm::Value *
6661 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6662                                const OMPExecutableDirective &D) {
6663   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6664          "Clauses associated with the teams directive expected to be emitted "
6665          "only for the host!");
6666   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6667   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6668          "Expected target-based executable directive.");
6669   CGBuilderTy &Bld = CGF.Builder;
6670   switch (DirectiveKind) {
6671   case OMPD_target: {
6672     const auto *CS = D.getInnermostCapturedStmt();
6673     const auto *Body =
6674         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6675     const Stmt *ChildStmt =
6676         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6677     if (const auto *NestedDir =
6678             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6679       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6680         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6681           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6682           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6683           const Expr *NumTeams =
6684               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6685           llvm::Value *NumTeamsVal =
6686               CGF.EmitScalarExpr(NumTeams,
6687                                  /*IgnoreResultAssign*/ true);
6688           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6689                                    /*isSigned=*/true);
6690         }
6691         return Bld.getInt32(0);
6692       }
6693       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6694           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6695         return Bld.getInt32(1);
6696       return Bld.getInt32(0);
6697     }
6698     return nullptr;
6699   }
6700   case OMPD_target_teams:
6701   case OMPD_target_teams_distribute:
6702   case OMPD_target_teams_distribute_simd:
6703   case OMPD_target_teams_distribute_parallel_for:
6704   case OMPD_target_teams_distribute_parallel_for_simd: {
6705     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6706       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6707       const Expr *NumTeams =
6708           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6709       llvm::Value *NumTeamsVal =
6710           CGF.EmitScalarExpr(NumTeams,
6711                              /*IgnoreResultAssign*/ true);
6712       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6713                                /*isSigned=*/true);
6714     }
6715     return Bld.getInt32(0);
6716   }
6717   case OMPD_target_parallel:
6718   case OMPD_target_parallel_for:
6719   case OMPD_target_parallel_for_simd:
6720   case OMPD_target_simd:
6721     return Bld.getInt32(1);
6722   case OMPD_parallel:
6723   case OMPD_for:
6724   case OMPD_parallel_for:
6725   case OMPD_parallel_sections:
6726   case OMPD_for_simd:
6727   case OMPD_parallel_for_simd:
6728   case OMPD_cancel:
6729   case OMPD_cancellation_point:
6730   case OMPD_ordered:
6731   case OMPD_threadprivate:
6732   case OMPD_allocate:
6733   case OMPD_task:
6734   case OMPD_simd:
6735   case OMPD_sections:
6736   case OMPD_section:
6737   case OMPD_single:
6738   case OMPD_master:
6739   case OMPD_critical:
6740   case OMPD_taskyield:
6741   case OMPD_barrier:
6742   case OMPD_taskwait:
6743   case OMPD_taskgroup:
6744   case OMPD_atomic:
6745   case OMPD_flush:
6746   case OMPD_teams:
6747   case OMPD_target_data:
6748   case OMPD_target_exit_data:
6749   case OMPD_target_enter_data:
6750   case OMPD_distribute:
6751   case OMPD_distribute_simd:
6752   case OMPD_distribute_parallel_for:
6753   case OMPD_distribute_parallel_for_simd:
6754   case OMPD_teams_distribute:
6755   case OMPD_teams_distribute_simd:
6756   case OMPD_teams_distribute_parallel_for:
6757   case OMPD_teams_distribute_parallel_for_simd:
6758   case OMPD_target_update:
6759   case OMPD_declare_simd:
6760   case OMPD_declare_target:
6761   case OMPD_end_declare_target:
6762   case OMPD_declare_reduction:
6763   case OMPD_declare_mapper:
6764   case OMPD_taskloop:
6765   case OMPD_taskloop_simd:
6766   case OMPD_requires:
6767   case OMPD_unknown:
6768     break;
6769   }
6770   llvm_unreachable("Unexpected directive kind.");
6771 }
6772 
6773 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6774                                   llvm::Value *DefaultThreadLimitVal) {
6775   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6776       CGF.getContext(), CS->getCapturedStmt());
6777   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6778     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6779       llvm::Value *NumThreads = nullptr;
6780       llvm::Value *CondVal = nullptr;
6781       // Handle if clause. If if clause present, the number of threads is
6782       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6783       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6784         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6785         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6786         const OMPIfClause *IfClause = nullptr;
6787         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6788           if (C->getNameModifier() == OMPD_unknown ||
6789               C->getNameModifier() == OMPD_parallel) {
6790             IfClause = C;
6791             break;
6792           }
6793         }
6794         if (IfClause) {
6795           const Expr *Cond = IfClause->getCondition();
6796           bool Result;
6797           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6798             if (!Result)
6799               return CGF.Builder.getInt32(1);
6800           } else {
6801             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6802             if (const auto *PreInit =
6803                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6804               for (const auto *I : PreInit->decls()) {
6805                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6806                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6807                 } else {
6808                   CodeGenFunction::AutoVarEmission Emission =
6809                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6810                   CGF.EmitAutoVarCleanups(Emission);
6811                 }
6812               }
6813             }
6814             CondVal = CGF.EvaluateExprAsBool(Cond);
6815           }
6816         }
6817       }
6818       // Check the value of num_threads clause iff if clause was not specified
6819       // or is not evaluated to false.
6820       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6821         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6822         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6823         const auto *NumThreadsClause =
6824             Dir->getSingleClause<OMPNumThreadsClause>();
6825         CodeGenFunction::LexicalScope Scope(
6826             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6827         if (const auto *PreInit =
6828                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6829           for (const auto *I : PreInit->decls()) {
6830             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6831               CGF.EmitVarDecl(cast<VarDecl>(*I));
6832             } else {
6833               CodeGenFunction::AutoVarEmission Emission =
6834                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6835               CGF.EmitAutoVarCleanups(Emission);
6836             }
6837           }
6838         }
6839         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6840         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6841                                                /*isSigned=*/false);
6842         if (DefaultThreadLimitVal)
6843           NumThreads = CGF.Builder.CreateSelect(
6844               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6845               DefaultThreadLimitVal, NumThreads);
6846       } else {
6847         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6848                                            : CGF.Builder.getInt32(0);
6849       }
6850       // Process condition of the if clause.
6851       if (CondVal) {
6852         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6853                                               CGF.Builder.getInt32(1));
6854       }
6855       return NumThreads;
6856     }
6857     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6858       return CGF.Builder.getInt32(1);
6859     return DefaultThreadLimitVal;
6860   }
6861   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6862                                : CGF.Builder.getInt32(0);
6863 }
6864 
6865 /// Emit the number of threads for a target directive.  Inspect the
6866 /// thread_limit clause associated with a teams construct combined or closely
6867 /// nested with the target directive.
6868 ///
6869 /// Emit the num_threads clause for directives such as 'target parallel' that
6870 /// have no associated teams construct.
6871 ///
6872 /// Otherwise, return nullptr.
6873 static llvm::Value *
6874 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6875                                  const OMPExecutableDirective &D) {
6876   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6877          "Clauses associated with the teams directive expected to be emitted "
6878          "only for the host!");
6879   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6880   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6881          "Expected target-based executable directive.");
6882   CGBuilderTy &Bld = CGF.Builder;
6883   llvm::Value *ThreadLimitVal = nullptr;
6884   llvm::Value *NumThreadsVal = nullptr;
6885   switch (DirectiveKind) {
6886   case OMPD_target: {
6887     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6888     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6889       return NumThreads;
6890     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6891         CGF.getContext(), CS->getCapturedStmt());
6892     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6893       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6894         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6895         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6896         const auto *ThreadLimitClause =
6897             Dir->getSingleClause<OMPThreadLimitClause>();
6898         CodeGenFunction::LexicalScope Scope(
6899             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6900         if (const auto *PreInit =
6901                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6902           for (const auto *I : PreInit->decls()) {
6903             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6904               CGF.EmitVarDecl(cast<VarDecl>(*I));
6905             } else {
6906               CodeGenFunction::AutoVarEmission Emission =
6907                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6908               CGF.EmitAutoVarCleanups(Emission);
6909             }
6910           }
6911         }
6912         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6913             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6914         ThreadLimitVal =
6915             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6916       }
6917       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6918           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6919         CS = Dir->getInnermostCapturedStmt();
6920         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6921             CGF.getContext(), CS->getCapturedStmt());
6922         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6923       }
6924       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6925           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6926         CS = Dir->getInnermostCapturedStmt();
6927         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6928           return NumThreads;
6929       }
6930       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6931         return Bld.getInt32(1);
6932     }
6933     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6934   }
6935   case OMPD_target_teams: {
6936     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6937       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6938       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6939       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6940           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6941       ThreadLimitVal =
6942           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6943     }
6944     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6945     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6946       return NumThreads;
6947     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6948         CGF.getContext(), CS->getCapturedStmt());
6949     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6950       if (Dir->getDirectiveKind() == OMPD_distribute) {
6951         CS = Dir->getInnermostCapturedStmt();
6952         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6953           return NumThreads;
6954       }
6955     }
6956     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6957   }
6958   case OMPD_target_teams_distribute:
6959     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6960       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6961       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6962       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6963           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6964       ThreadLimitVal =
6965           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6966     }
6967     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6968   case OMPD_target_parallel:
6969   case OMPD_target_parallel_for:
6970   case OMPD_target_parallel_for_simd:
6971   case OMPD_target_teams_distribute_parallel_for:
6972   case OMPD_target_teams_distribute_parallel_for_simd: {
6973     llvm::Value *CondVal = nullptr;
6974     // Handle if clause. If if clause present, the number of threads is
6975     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6976     if (D.hasClausesOfKind<OMPIfClause>()) {
6977       const OMPIfClause *IfClause = nullptr;
6978       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6979         if (C->getNameModifier() == OMPD_unknown ||
6980             C->getNameModifier() == OMPD_parallel) {
6981           IfClause = C;
6982           break;
6983         }
6984       }
6985       if (IfClause) {
6986         const Expr *Cond = IfClause->getCondition();
6987         bool Result;
6988         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6989           if (!Result)
6990             return Bld.getInt32(1);
6991         } else {
6992           CodeGenFunction::RunCleanupsScope Scope(CGF);
6993           CondVal = CGF.EvaluateExprAsBool(Cond);
6994         }
6995       }
6996     }
6997     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6998       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6999       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7000       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7001           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7002       ThreadLimitVal =
7003           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7004     }
7005     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7006       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7007       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7008       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7009           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7010       NumThreadsVal =
7011           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7012       ThreadLimitVal = ThreadLimitVal
7013                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7014                                                                 ThreadLimitVal),
7015                                               NumThreadsVal, ThreadLimitVal)
7016                            : NumThreadsVal;
7017     }
7018     if (!ThreadLimitVal)
7019       ThreadLimitVal = Bld.getInt32(0);
7020     if (CondVal)
7021       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7022     return ThreadLimitVal;
7023   }
7024   case OMPD_target_teams_distribute_simd:
7025   case OMPD_target_simd:
7026     return Bld.getInt32(1);
7027   case OMPD_parallel:
7028   case OMPD_for:
7029   case OMPD_parallel_for:
7030   case OMPD_parallel_sections:
7031   case OMPD_for_simd:
7032   case OMPD_parallel_for_simd:
7033   case OMPD_cancel:
7034   case OMPD_cancellation_point:
7035   case OMPD_ordered:
7036   case OMPD_threadprivate:
7037   case OMPD_allocate:
7038   case OMPD_task:
7039   case OMPD_simd:
7040   case OMPD_sections:
7041   case OMPD_section:
7042   case OMPD_single:
7043   case OMPD_master:
7044   case OMPD_critical:
7045   case OMPD_taskyield:
7046   case OMPD_barrier:
7047   case OMPD_taskwait:
7048   case OMPD_taskgroup:
7049   case OMPD_atomic:
7050   case OMPD_flush:
7051   case OMPD_teams:
7052   case OMPD_target_data:
7053   case OMPD_target_exit_data:
7054   case OMPD_target_enter_data:
7055   case OMPD_distribute:
7056   case OMPD_distribute_simd:
7057   case OMPD_distribute_parallel_for:
7058   case OMPD_distribute_parallel_for_simd:
7059   case OMPD_teams_distribute:
7060   case OMPD_teams_distribute_simd:
7061   case OMPD_teams_distribute_parallel_for:
7062   case OMPD_teams_distribute_parallel_for_simd:
7063   case OMPD_target_update:
7064   case OMPD_declare_simd:
7065   case OMPD_declare_target:
7066   case OMPD_end_declare_target:
7067   case OMPD_declare_reduction:
7068   case OMPD_declare_mapper:
7069   case OMPD_taskloop:
7070   case OMPD_taskloop_simd:
7071   case OMPD_requires:
7072   case OMPD_unknown:
7073     break;
7074   }
7075   llvm_unreachable("Unsupported directive kind.");
7076 }
7077 
7078 namespace {
7079 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7080 
7081 // Utility to handle information from clauses associated with a given
7082 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7083 // It provides a convenient interface to obtain the information and generate
7084 // code for that information.
7085 class MappableExprsHandler {
7086 public:
7087   /// Values for bit flags used to specify the mapping type for
7088   /// offloading.
7089   enum OpenMPOffloadMappingFlags : uint64_t {
7090     /// No flags
7091     OMP_MAP_NONE = 0x0,
7092     /// Allocate memory on the device and move data from host to device.
7093     OMP_MAP_TO = 0x01,
7094     /// Allocate memory on the device and move data from device to host.
7095     OMP_MAP_FROM = 0x02,
7096     /// Always perform the requested mapping action on the element, even
7097     /// if it was already mapped before.
7098     OMP_MAP_ALWAYS = 0x04,
7099     /// Delete the element from the device environment, ignoring the
7100     /// current reference count associated with the element.
7101     OMP_MAP_DELETE = 0x08,
7102     /// The element being mapped is a pointer-pointee pair; both the
7103     /// pointer and the pointee should be mapped.
7104     OMP_MAP_PTR_AND_OBJ = 0x10,
7105     /// This flags signals that the base address of an entry should be
7106     /// passed to the target kernel as an argument.
7107     OMP_MAP_TARGET_PARAM = 0x20,
7108     /// Signal that the runtime library has to return the device pointer
7109     /// in the current position for the data being mapped. Used when we have the
7110     /// use_device_ptr clause.
7111     OMP_MAP_RETURN_PARAM = 0x40,
7112     /// This flag signals that the reference being passed is a pointer to
7113     /// private data.
7114     OMP_MAP_PRIVATE = 0x80,
7115     /// Pass the element to the device by value.
7116     OMP_MAP_LITERAL = 0x100,
7117     /// Implicit map
7118     OMP_MAP_IMPLICIT = 0x200,
7119     /// The 16 MSBs of the flags indicate whether the entry is member of some
7120     /// struct/class.
7121     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7122     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7123   };
7124 
7125   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7126   static unsigned getFlagMemberOffset() {
7127     unsigned Offset = 0;
7128     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7129          Remain = Remain >> 1)
7130       Offset++;
7131     return Offset;
7132   }
7133 
7134   /// Class that associates information with a base pointer to be passed to the
7135   /// runtime library.
7136   class BasePointerInfo {
7137     /// The base pointer.
7138     llvm::Value *Ptr = nullptr;
7139     /// The base declaration that refers to this device pointer, or null if
7140     /// there is none.
7141     const ValueDecl *DevPtrDecl = nullptr;
7142 
7143   public:
7144     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7145         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7146     llvm::Value *operator*() const { return Ptr; }
7147     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7148     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7149   };
7150 
7151   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7152   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7153   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7154 
7155   /// Map between a struct and the its lowest & highest elements which have been
7156   /// mapped.
7157   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7158   ///                    HE(FieldIndex, Pointer)}
7159   struct StructRangeInfoTy {
7160     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7161         0, Address::invalid()};
7162     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7163         0, Address::invalid()};
7164     Address Base = Address::invalid();
7165   };
7166 
7167 private:
7168   /// Kind that defines how a device pointer has to be returned.
7169   struct MapInfo {
7170     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7171     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7172     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7173     bool ReturnDevicePointer = false;
7174     bool IsImplicit = false;
7175 
7176     MapInfo() = default;
7177     MapInfo(
7178         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7179         OpenMPMapClauseKind MapType,
7180         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7181         bool ReturnDevicePointer, bool IsImplicit)
7182         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7183           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7184   };
7185 
7186   /// If use_device_ptr is used on a pointer which is a struct member and there
7187   /// is no map information about it, then emission of that entry is deferred
7188   /// until the whole struct has been processed.
7189   struct DeferredDevicePtrEntryTy {
7190     const Expr *IE = nullptr;
7191     const ValueDecl *VD = nullptr;
7192 
7193     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7194         : IE(IE), VD(VD) {}
7195   };
7196 
7197   /// The target directive from where the mappable clauses were extracted. It
7198   /// is either a executable directive or a user-defined mapper directive.
7199   llvm::PointerUnion<const OMPExecutableDirective *,
7200                      const OMPDeclareMapperDecl *>
7201       CurDir;
7202 
7203   /// Function the directive is being generated for.
7204   CodeGenFunction &CGF;
7205 
7206   /// Set of all first private variables in the current directive.
7207   /// bool data is set to true if the variable is implicitly marked as
7208   /// firstprivate, false otherwise.
7209   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7210 
7211   /// Map between device pointer declarations and their expression components.
7212   /// The key value for declarations in 'this' is null.
7213   llvm::DenseMap<
7214       const ValueDecl *,
7215       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7216       DevPointersMap;
7217 
7218   llvm::Value *getExprTypeSize(const Expr *E) const {
7219     QualType ExprTy = E->getType().getCanonicalType();
7220 
7221     // Reference types are ignored for mapping purposes.
7222     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7223       ExprTy = RefTy->getPointeeType().getCanonicalType();
7224 
7225     // Given that an array section is considered a built-in type, we need to
7226     // do the calculation based on the length of the section instead of relying
7227     // on CGF.getTypeSize(E->getType()).
7228     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7229       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7230                             OAE->getBase()->IgnoreParenImpCasts())
7231                             .getCanonicalType();
7232 
7233       // If there is no length associated with the expression, that means we
7234       // are using the whole length of the base.
7235       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7236         return CGF.getTypeSize(BaseTy);
7237 
7238       llvm::Value *ElemSize;
7239       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7240         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7241       } else {
7242         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7243         assert(ATy && "Expecting array type if not a pointer type.");
7244         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7245       }
7246 
7247       // If we don't have a length at this point, that is because we have an
7248       // array section with a single element.
7249       if (!OAE->getLength())
7250         return ElemSize;
7251 
7252       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7253       LengthVal =
7254           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7255       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7256     }
7257     return CGF.getTypeSize(ExprTy);
7258   }
7259 
7260   /// Return the corresponding bits for a given map clause modifier. Add
7261   /// a flag marking the map as a pointer if requested. Add a flag marking the
7262   /// map as the first one of a series of maps that relate to the same map
7263   /// expression.
7264   OpenMPOffloadMappingFlags getMapTypeBits(
7265       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7266       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7267     OpenMPOffloadMappingFlags Bits =
7268         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7269     switch (MapType) {
7270     case OMPC_MAP_alloc:
7271     case OMPC_MAP_release:
7272       // alloc and release is the default behavior in the runtime library,  i.e.
7273       // if we don't pass any bits alloc/release that is what the runtime is
7274       // going to do. Therefore, we don't need to signal anything for these two
7275       // type modifiers.
7276       break;
7277     case OMPC_MAP_to:
7278       Bits |= OMP_MAP_TO;
7279       break;
7280     case OMPC_MAP_from:
7281       Bits |= OMP_MAP_FROM;
7282       break;
7283     case OMPC_MAP_tofrom:
7284       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7285       break;
7286     case OMPC_MAP_delete:
7287       Bits |= OMP_MAP_DELETE;
7288       break;
7289     case OMPC_MAP_unknown:
7290       llvm_unreachable("Unexpected map type!");
7291     }
7292     if (AddPtrFlag)
7293       Bits |= OMP_MAP_PTR_AND_OBJ;
7294     if (AddIsTargetParamFlag)
7295       Bits |= OMP_MAP_TARGET_PARAM;
7296     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7297         != MapModifiers.end())
7298       Bits |= OMP_MAP_ALWAYS;
7299     return Bits;
7300   }
7301 
7302   /// Return true if the provided expression is a final array section. A
7303   /// final array section, is one whose length can't be proved to be one.
7304   bool isFinalArraySectionExpression(const Expr *E) const {
7305     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7306 
7307     // It is not an array section and therefore not a unity-size one.
7308     if (!OASE)
7309       return false;
7310 
7311     // An array section with no colon always refer to a single element.
7312     if (OASE->getColonLoc().isInvalid())
7313       return false;
7314 
7315     const Expr *Length = OASE->getLength();
7316 
7317     // If we don't have a length we have to check if the array has size 1
7318     // for this dimension. Also, we should always expect a length if the
7319     // base type is pointer.
7320     if (!Length) {
7321       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7322                              OASE->getBase()->IgnoreParenImpCasts())
7323                              .getCanonicalType();
7324       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7325         return ATy->getSize().getSExtValue() != 1;
7326       // If we don't have a constant dimension length, we have to consider
7327       // the current section as having any size, so it is not necessarily
7328       // unitary. If it happen to be unity size, that's user fault.
7329       return true;
7330     }
7331 
7332     // Check if the length evaluates to 1.
7333     Expr::EvalResult Result;
7334     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7335       return true; // Can have more that size 1.
7336 
7337     llvm::APSInt ConstLength = Result.Val.getInt();
7338     return ConstLength.getSExtValue() != 1;
7339   }
7340 
7341   /// Generate the base pointers, section pointers, sizes and map type
7342   /// bits for the provided map type, map modifier, and expression components.
7343   /// \a IsFirstComponent should be set to true if the provided set of
7344   /// components is the first associated with a capture.
7345   void generateInfoForComponentList(
7346       OpenMPMapClauseKind MapType,
7347       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7348       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7349       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7350       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7351       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7352       bool IsImplicit,
7353       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7354           OverlappedElements = llvm::None) const {
7355     // The following summarizes what has to be generated for each map and the
7356     // types below. The generated information is expressed in this order:
7357     // base pointer, section pointer, size, flags
7358     // (to add to the ones that come from the map type and modifier).
7359     //
7360     // double d;
7361     // int i[100];
7362     // float *p;
7363     //
7364     // struct S1 {
7365     //   int i;
7366     //   float f[50];
7367     // }
7368     // struct S2 {
7369     //   int i;
7370     //   float f[50];
7371     //   S1 s;
7372     //   double *p;
7373     //   struct S2 *ps;
7374     // }
7375     // S2 s;
7376     // S2 *ps;
7377     //
7378     // map(d)
7379     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7380     //
7381     // map(i)
7382     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7383     //
7384     // map(i[1:23])
7385     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7386     //
7387     // map(p)
7388     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7389     //
7390     // map(p[1:24])
7391     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7392     //
7393     // map(s)
7394     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7395     //
7396     // map(s.i)
7397     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7398     //
7399     // map(s.s.f)
7400     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7401     //
7402     // map(s.p)
7403     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7404     //
7405     // map(to: s.p[:22])
7406     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7407     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7408     // &(s.p), &(s.p[0]), 22*sizeof(double),
7409     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7410     // (*) alloc space for struct members, only this is a target parameter
7411     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7412     //      optimizes this entry out, same in the examples below)
7413     // (***) map the pointee (map: to)
7414     //
7415     // map(s.ps)
7416     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7417     //
7418     // map(from: s.ps->s.i)
7419     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7420     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7421     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7422     //
7423     // map(to: s.ps->ps)
7424     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7425     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7426     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7427     //
7428     // map(s.ps->ps->ps)
7429     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7430     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7431     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7432     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7433     //
7434     // map(to: s.ps->ps->s.f[:22])
7435     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7436     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7437     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7438     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7439     //
7440     // map(ps)
7441     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7442     //
7443     // map(ps->i)
7444     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7445     //
7446     // map(ps->s.f)
7447     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7448     //
7449     // map(from: ps->p)
7450     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7451     //
7452     // map(to: ps->p[:22])
7453     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7454     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7455     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7456     //
7457     // map(ps->ps)
7458     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7459     //
7460     // map(from: ps->ps->s.i)
7461     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7462     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7463     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7464     //
7465     // map(from: ps->ps->ps)
7466     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7467     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7468     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7469     //
7470     // map(ps->ps->ps->ps)
7471     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7472     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7473     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7474     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7475     //
7476     // map(to: ps->ps->ps->s.f[:22])
7477     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7478     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7479     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7480     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7481     //
7482     // map(to: s.f[:22]) map(from: s.p[:33])
7483     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7484     //     sizeof(double*) (**), TARGET_PARAM
7485     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7486     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7487     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7488     // (*) allocate contiguous space needed to fit all mapped members even if
7489     //     we allocate space for members not mapped (in this example,
7490     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7491     //     them as well because they fall between &s.f[0] and &s.p)
7492     //
7493     // map(from: s.f[:22]) map(to: ps->p[:33])
7494     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7495     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7496     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7497     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7498     // (*) the struct this entry pertains to is the 2nd element in the list of
7499     //     arguments, hence MEMBER_OF(2)
7500     //
7501     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7502     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7503     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7504     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7505     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7506     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7507     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7508     // (*) the struct this entry pertains to is the 4th element in the list
7509     //     of arguments, hence MEMBER_OF(4)
7510 
7511     // Track if the map information being generated is the first for a capture.
7512     bool IsCaptureFirstInfo = IsFirstComponentList;
7513     // When the variable is on a declare target link or in a to clause with
7514     // unified memory, a reference is needed to hold the host/device address
7515     // of the variable.
7516     bool RequiresReference = false;
7517 
7518     // Scan the components from the base to the complete expression.
7519     auto CI = Components.rbegin();
7520     auto CE = Components.rend();
7521     auto I = CI;
7522 
7523     // Track if the map information being generated is the first for a list of
7524     // components.
7525     bool IsExpressionFirstInfo = true;
7526     Address BP = Address::invalid();
7527     const Expr *AssocExpr = I->getAssociatedExpression();
7528     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7529     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7530 
7531     if (isa<MemberExpr>(AssocExpr)) {
7532       // The base is the 'this' pointer. The content of the pointer is going
7533       // to be the base of the field being mapped.
7534       BP = CGF.LoadCXXThisAddress();
7535     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7536                (OASE &&
7537                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7538       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7539     } else {
7540       // The base is the reference to the variable.
7541       // BP = &Var.
7542       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7543       if (const auto *VD =
7544               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7545         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7546                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7547           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7548               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7549                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7550             RequiresReference = true;
7551             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7552           }
7553         }
7554       }
7555 
7556       // If the variable is a pointer and is being dereferenced (i.e. is not
7557       // the last component), the base has to be the pointer itself, not its
7558       // reference. References are ignored for mapping purposes.
7559       QualType Ty =
7560           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7561       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7562         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7563 
7564         // We do not need to generate individual map information for the
7565         // pointer, it can be associated with the combined storage.
7566         ++I;
7567       }
7568     }
7569 
7570     // Track whether a component of the list should be marked as MEMBER_OF some
7571     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7572     // in a component list should be marked as MEMBER_OF, all subsequent entries
7573     // do not belong to the base struct. E.g.
7574     // struct S2 s;
7575     // s.ps->ps->ps->f[:]
7576     //   (1) (2) (3) (4)
7577     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7578     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7579     // is the pointee of ps(2) which is not member of struct s, so it should not
7580     // be marked as such (it is still PTR_AND_OBJ).
7581     // The variable is initialized to false so that PTR_AND_OBJ entries which
7582     // are not struct members are not considered (e.g. array of pointers to
7583     // data).
7584     bool ShouldBeMemberOf = false;
7585 
7586     // Variable keeping track of whether or not we have encountered a component
7587     // in the component list which is a member expression. Useful when we have a
7588     // pointer or a final array section, in which case it is the previous
7589     // component in the list which tells us whether we have a member expression.
7590     // E.g. X.f[:]
7591     // While processing the final array section "[:]" it is "f" which tells us
7592     // whether we are dealing with a member of a declared struct.
7593     const MemberExpr *EncounteredME = nullptr;
7594 
7595     for (; I != CE; ++I) {
7596       // If the current component is member of a struct (parent struct) mark it.
7597       if (!EncounteredME) {
7598         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7599         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7600         // as MEMBER_OF the parent struct.
7601         if (EncounteredME)
7602           ShouldBeMemberOf = true;
7603       }
7604 
7605       auto Next = std::next(I);
7606 
7607       // We need to generate the addresses and sizes if this is the last
7608       // component, if the component is a pointer or if it is an array section
7609       // whose length can't be proved to be one. If this is a pointer, it
7610       // becomes the base address for the following components.
7611 
7612       // A final array section, is one whose length can't be proved to be one.
7613       bool IsFinalArraySection =
7614           isFinalArraySectionExpression(I->getAssociatedExpression());
7615 
7616       // Get information on whether the element is a pointer. Have to do a
7617       // special treatment for array sections given that they are built-in
7618       // types.
7619       const auto *OASE =
7620           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7621       bool IsPointer =
7622           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7623                        .getCanonicalType()
7624                        ->isAnyPointerType()) ||
7625           I->getAssociatedExpression()->getType()->isAnyPointerType();
7626 
7627       if (Next == CE || IsPointer || IsFinalArraySection) {
7628         // If this is not the last component, we expect the pointer to be
7629         // associated with an array expression or member expression.
7630         assert((Next == CE ||
7631                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7632                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7633                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7634                "Unexpected expression");
7635 
7636         Address LB =
7637             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7638 
7639         // If this component is a pointer inside the base struct then we don't
7640         // need to create any entry for it - it will be combined with the object
7641         // it is pointing to into a single PTR_AND_OBJ entry.
7642         bool IsMemberPointer =
7643             IsPointer && EncounteredME &&
7644             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7645              EncounteredME);
7646         if (!OverlappedElements.empty()) {
7647           // Handle base element with the info for overlapped elements.
7648           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7649           assert(Next == CE &&
7650                  "Expected last element for the overlapped elements.");
7651           assert(!IsPointer &&
7652                  "Unexpected base element with the pointer type.");
7653           // Mark the whole struct as the struct that requires allocation on the
7654           // device.
7655           PartialStruct.LowestElem = {0, LB};
7656           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7657               I->getAssociatedExpression()->getType());
7658           Address HB = CGF.Builder.CreateConstGEP(
7659               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7660                                                               CGF.VoidPtrTy),
7661               TypeSize.getQuantity() - 1);
7662           PartialStruct.HighestElem = {
7663               std::numeric_limits<decltype(
7664                   PartialStruct.HighestElem.first)>::max(),
7665               HB};
7666           PartialStruct.Base = BP;
7667           // Emit data for non-overlapped data.
7668           OpenMPOffloadMappingFlags Flags =
7669               OMP_MAP_MEMBER_OF |
7670               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7671                              /*AddPtrFlag=*/false,
7672                              /*AddIsTargetParamFlag=*/false);
7673           LB = BP;
7674           llvm::Value *Size = nullptr;
7675           // Do bitcopy of all non-overlapped structure elements.
7676           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7677                    Component : OverlappedElements) {
7678             Address ComponentLB = Address::invalid();
7679             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7680                  Component) {
7681               if (MC.getAssociatedDeclaration()) {
7682                 ComponentLB =
7683                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7684                         .getAddress();
7685                 Size = CGF.Builder.CreatePtrDiff(
7686                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7687                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7688                 break;
7689               }
7690             }
7691             BasePointers.push_back(BP.getPointer());
7692             Pointers.push_back(LB.getPointer());
7693             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7694                                                       /*isSigned=*/true));
7695             Types.push_back(Flags);
7696             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7697           }
7698           BasePointers.push_back(BP.getPointer());
7699           Pointers.push_back(LB.getPointer());
7700           Size = CGF.Builder.CreatePtrDiff(
7701               CGF.EmitCastToVoidPtr(
7702                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7703               CGF.EmitCastToVoidPtr(LB.getPointer()));
7704           Sizes.push_back(
7705               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7706           Types.push_back(Flags);
7707           break;
7708         }
7709         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7710         if (!IsMemberPointer) {
7711           BasePointers.push_back(BP.getPointer());
7712           Pointers.push_back(LB.getPointer());
7713           Sizes.push_back(
7714               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7715 
7716           // We need to add a pointer flag for each map that comes from the
7717           // same expression except for the first one. We also need to signal
7718           // this map is the first one that relates with the current capture
7719           // (there is a set of entries for each capture).
7720           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7721               MapType, MapModifiers, IsImplicit,
7722               !IsExpressionFirstInfo || RequiresReference,
7723               IsCaptureFirstInfo && !RequiresReference);
7724 
7725           if (!IsExpressionFirstInfo) {
7726             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7727             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7728             if (IsPointer)
7729               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7730                          OMP_MAP_DELETE);
7731 
7732             if (ShouldBeMemberOf) {
7733               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7734               // should be later updated with the correct value of MEMBER_OF.
7735               Flags |= OMP_MAP_MEMBER_OF;
7736               // From now on, all subsequent PTR_AND_OBJ entries should not be
7737               // marked as MEMBER_OF.
7738               ShouldBeMemberOf = false;
7739             }
7740           }
7741 
7742           Types.push_back(Flags);
7743         }
7744 
7745         // If we have encountered a member expression so far, keep track of the
7746         // mapped member. If the parent is "*this", then the value declaration
7747         // is nullptr.
7748         if (EncounteredME) {
7749           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7750           unsigned FieldIndex = FD->getFieldIndex();
7751 
7752           // Update info about the lowest and highest elements for this struct
7753           if (!PartialStruct.Base.isValid()) {
7754             PartialStruct.LowestElem = {FieldIndex, LB};
7755             PartialStruct.HighestElem = {FieldIndex, LB};
7756             PartialStruct.Base = BP;
7757           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7758             PartialStruct.LowestElem = {FieldIndex, LB};
7759           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7760             PartialStruct.HighestElem = {FieldIndex, LB};
7761           }
7762         }
7763 
7764         // If we have a final array section, we are done with this expression.
7765         if (IsFinalArraySection)
7766           break;
7767 
7768         // The pointer becomes the base for the next element.
7769         if (Next != CE)
7770           BP = LB;
7771 
7772         IsExpressionFirstInfo = false;
7773         IsCaptureFirstInfo = false;
7774       }
7775     }
7776   }
7777 
7778   /// Return the adjusted map modifiers if the declaration a capture refers to
7779   /// appears in a first-private clause. This is expected to be used only with
7780   /// directives that start with 'target'.
7781   MappableExprsHandler::OpenMPOffloadMappingFlags
7782   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7783     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7784 
7785     // A first private variable captured by reference will use only the
7786     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7787     // declaration is known as first-private in this handler.
7788     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7789       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7790           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7791         return MappableExprsHandler::OMP_MAP_ALWAYS |
7792                MappableExprsHandler::OMP_MAP_TO;
7793       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7794         return MappableExprsHandler::OMP_MAP_TO |
7795                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7796       return MappableExprsHandler::OMP_MAP_PRIVATE |
7797              MappableExprsHandler::OMP_MAP_TO;
7798     }
7799     return MappableExprsHandler::OMP_MAP_TO |
7800            MappableExprsHandler::OMP_MAP_FROM;
7801   }
7802 
7803   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7804     // Rotate by getFlagMemberOffset() bits.
7805     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7806                                                   << getFlagMemberOffset());
7807   }
7808 
7809   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7810                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7811     // If the entry is PTR_AND_OBJ but has not been marked with the special
7812     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7813     // marked as MEMBER_OF.
7814     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7815         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7816       return;
7817 
7818     // Reset the placeholder value to prepare the flag for the assignment of the
7819     // proper MEMBER_OF value.
7820     Flags &= ~OMP_MAP_MEMBER_OF;
7821     Flags |= MemberOfFlag;
7822   }
7823 
7824   void getPlainLayout(const CXXRecordDecl *RD,
7825                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7826                       bool AsBase) const {
7827     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7828 
7829     llvm::StructType *St =
7830         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7831 
7832     unsigned NumElements = St->getNumElements();
7833     llvm::SmallVector<
7834         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7835         RecordLayout(NumElements);
7836 
7837     // Fill bases.
7838     for (const auto &I : RD->bases()) {
7839       if (I.isVirtual())
7840         continue;
7841       const auto *Base = I.getType()->getAsCXXRecordDecl();
7842       // Ignore empty bases.
7843       if (Base->isEmpty() || CGF.getContext()
7844                                  .getASTRecordLayout(Base)
7845                                  .getNonVirtualSize()
7846                                  .isZero())
7847         continue;
7848 
7849       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7850       RecordLayout[FieldIndex] = Base;
7851     }
7852     // Fill in virtual bases.
7853     for (const auto &I : RD->vbases()) {
7854       const auto *Base = I.getType()->getAsCXXRecordDecl();
7855       // Ignore empty bases.
7856       if (Base->isEmpty())
7857         continue;
7858       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7859       if (RecordLayout[FieldIndex])
7860         continue;
7861       RecordLayout[FieldIndex] = Base;
7862     }
7863     // Fill in all the fields.
7864     assert(!RD->isUnion() && "Unexpected union.");
7865     for (const auto *Field : RD->fields()) {
7866       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7867       // will fill in later.)
7868       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7869         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7870         RecordLayout[FieldIndex] = Field;
7871       }
7872     }
7873     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7874              &Data : RecordLayout) {
7875       if (Data.isNull())
7876         continue;
7877       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7878         getPlainLayout(Base, Layout, /*AsBase=*/true);
7879       else
7880         Layout.push_back(Data.get<const FieldDecl *>());
7881     }
7882   }
7883 
7884 public:
7885   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7886       : CurDir(&Dir), CGF(CGF) {
7887     // Extract firstprivate clause information.
7888     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7889       for (const auto *D : C->varlists())
7890         FirstPrivateDecls.try_emplace(
7891             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7892     // Extract device pointer clause information.
7893     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7894       for (auto L : C->component_lists())
7895         DevPointersMap[L.first].push_back(L.second);
7896   }
7897 
7898   /// Constructor for the declare mapper directive.
7899   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7900       : CurDir(&Dir), CGF(CGF) {}
7901 
7902   /// Generate code for the combined entry if we have a partially mapped struct
7903   /// and take care of the mapping flags of the arguments corresponding to
7904   /// individual struct members.
7905   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7906                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7907                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7908                          const StructRangeInfoTy &PartialStruct) const {
7909     // Base is the base of the struct
7910     BasePointers.push_back(PartialStruct.Base.getPointer());
7911     // Pointer is the address of the lowest element
7912     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7913     Pointers.push_back(LB);
7914     // Size is (addr of {highest+1} element) - (addr of lowest element)
7915     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7916     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7917     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7918     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7919     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7920     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7921                                                   /*isSigned=*/false);
7922     Sizes.push_back(Size);
7923     // Map type is always TARGET_PARAM
7924     Types.push_back(OMP_MAP_TARGET_PARAM);
7925     // Remove TARGET_PARAM flag from the first element
7926     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7927 
7928     // All other current entries will be MEMBER_OF the combined entry
7929     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7930     // 0xFFFF in the MEMBER_OF field).
7931     OpenMPOffloadMappingFlags MemberOfFlag =
7932         getMemberOfFlag(BasePointers.size() - 1);
7933     for (auto &M : CurTypes)
7934       setCorrectMemberOfFlag(M, MemberOfFlag);
7935   }
7936 
7937   /// Generate all the base pointers, section pointers, sizes and map
7938   /// types for the extracted mappable expressions. Also, for each item that
7939   /// relates with a device pointer, a pair of the relevant declaration and
7940   /// index where it occurs is appended to the device pointers info array.
7941   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7942                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7943                        MapFlagsArrayTy &Types) const {
7944     // We have to process the component lists that relate with the same
7945     // declaration in a single chunk so that we can generate the map flags
7946     // correctly. Therefore, we organize all lists in a map.
7947     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7948 
7949     // Helper function to fill the information map for the different supported
7950     // clauses.
7951     auto &&InfoGen = [&Info](
7952         const ValueDecl *D,
7953         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7954         OpenMPMapClauseKind MapType,
7955         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7956         bool ReturnDevicePointer, bool IsImplicit) {
7957       const ValueDecl *VD =
7958           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7959       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7960                             IsImplicit);
7961     };
7962 
7963     assert(CurDir.is<const OMPExecutableDirective *>() &&
7964            "Expect a executable directive");
7965     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7966     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7967       for (const auto &L : C->component_lists()) {
7968         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7969             /*ReturnDevicePointer=*/false, C->isImplicit());
7970       }
7971     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7972       for (const auto &L : C->component_lists()) {
7973         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7974             /*ReturnDevicePointer=*/false, C->isImplicit());
7975       }
7976     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7977       for (const auto &L : C->component_lists()) {
7978         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7979             /*ReturnDevicePointer=*/false, C->isImplicit());
7980       }
7981 
7982     // Look at the use_device_ptr clause information and mark the existing map
7983     // entries as such. If there is no map information for an entry in the
7984     // use_device_ptr list, we create one with map type 'alloc' and zero size
7985     // section. It is the user fault if that was not mapped before. If there is
7986     // no map information and the pointer is a struct member, then we defer the
7987     // emission of that entry until the whole struct has been processed.
7988     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7989         DeferredInfo;
7990 
7991     for (const auto *C :
7992          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7993       for (const auto &L : C->component_lists()) {
7994         assert(!L.second.empty() && "Not expecting empty list of components!");
7995         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7996         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7997         const Expr *IE = L.second.back().getAssociatedExpression();
7998         // If the first component is a member expression, we have to look into
7999         // 'this', which maps to null in the map of map information. Otherwise
8000         // look directly for the information.
8001         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8002 
8003         // We potentially have map information for this declaration already.
8004         // Look for the first set of components that refer to it.
8005         if (It != Info.end()) {
8006           auto CI = std::find_if(
8007               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8008                 return MI.Components.back().getAssociatedDeclaration() == VD;
8009               });
8010           // If we found a map entry, signal that the pointer has to be returned
8011           // and move on to the next declaration.
8012           if (CI != It->second.end()) {
8013             CI->ReturnDevicePointer = true;
8014             continue;
8015           }
8016         }
8017 
8018         // We didn't find any match in our map information - generate a zero
8019         // size array section - if the pointer is a struct member we defer this
8020         // action until the whole struct has been processed.
8021         if (isa<MemberExpr>(IE)) {
8022           // Insert the pointer into Info to be processed by
8023           // generateInfoForComponentList. Because it is a member pointer
8024           // without a pointee, no entry will be generated for it, therefore
8025           // we need to generate one after the whole struct has been processed.
8026           // Nonetheless, generateInfoForComponentList must be called to take
8027           // the pointer into account for the calculation of the range of the
8028           // partial struct.
8029           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8030                   /*ReturnDevicePointer=*/false, C->isImplicit());
8031           DeferredInfo[nullptr].emplace_back(IE, VD);
8032         } else {
8033           llvm::Value *Ptr =
8034               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8035           BasePointers.emplace_back(Ptr, VD);
8036           Pointers.push_back(Ptr);
8037           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8038           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8039         }
8040       }
8041     }
8042 
8043     for (const auto &M : Info) {
8044       // We need to know when we generate information for the first component
8045       // associated with a capture, because the mapping flags depend on it.
8046       bool IsFirstComponentList = true;
8047 
8048       // Temporary versions of arrays
8049       MapBaseValuesArrayTy CurBasePointers;
8050       MapValuesArrayTy CurPointers;
8051       MapValuesArrayTy CurSizes;
8052       MapFlagsArrayTy CurTypes;
8053       StructRangeInfoTy PartialStruct;
8054 
8055       for (const MapInfo &L : M.second) {
8056         assert(!L.Components.empty() &&
8057                "Not expecting declaration with no component lists.");
8058 
8059         // Remember the current base pointer index.
8060         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8061         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8062                                      CurBasePointers, CurPointers, CurSizes,
8063                                      CurTypes, PartialStruct,
8064                                      IsFirstComponentList, L.IsImplicit);
8065 
8066         // If this entry relates with a device pointer, set the relevant
8067         // declaration and add the 'return pointer' flag.
8068         if (L.ReturnDevicePointer) {
8069           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8070                  "Unexpected number of mapped base pointers.");
8071 
8072           const ValueDecl *RelevantVD =
8073               L.Components.back().getAssociatedDeclaration();
8074           assert(RelevantVD &&
8075                  "No relevant declaration related with device pointer??");
8076 
8077           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8078           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8079         }
8080         IsFirstComponentList = false;
8081       }
8082 
8083       // Append any pending zero-length pointers which are struct members and
8084       // used with use_device_ptr.
8085       auto CI = DeferredInfo.find(M.first);
8086       if (CI != DeferredInfo.end()) {
8087         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8088           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8089           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8090               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8091           CurBasePointers.emplace_back(BasePtr, L.VD);
8092           CurPointers.push_back(Ptr);
8093           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8094           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8095           // value MEMBER_OF=FFFF so that the entry is later updated with the
8096           // correct value of MEMBER_OF.
8097           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8098                              OMP_MAP_MEMBER_OF);
8099         }
8100       }
8101 
8102       // If there is an entry in PartialStruct it means we have a struct with
8103       // individual members mapped. Emit an extra combined entry.
8104       if (PartialStruct.Base.isValid())
8105         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8106                           PartialStruct);
8107 
8108       // We need to append the results of this capture to what we already have.
8109       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8110       Pointers.append(CurPointers.begin(), CurPointers.end());
8111       Sizes.append(CurSizes.begin(), CurSizes.end());
8112       Types.append(CurTypes.begin(), CurTypes.end());
8113     }
8114   }
8115 
8116   /// Generate all the base pointers, section pointers, sizes and map types for
8117   /// the extracted map clauses of user-defined mapper.
8118   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8119                                 MapValuesArrayTy &Pointers,
8120                                 MapValuesArrayTy &Sizes,
8121                                 MapFlagsArrayTy &Types) const {
8122     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8123            "Expect a declare mapper directive");
8124     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8125     // We have to process the component lists that relate with the same
8126     // declaration in a single chunk so that we can generate the map flags
8127     // correctly. Therefore, we organize all lists in a map.
8128     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8129 
8130     // Helper function to fill the information map for the different supported
8131     // clauses.
8132     auto &&InfoGen = [&Info](
8133         const ValueDecl *D,
8134         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8135         OpenMPMapClauseKind MapType,
8136         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8137         bool ReturnDevicePointer, bool IsImplicit) {
8138       const ValueDecl *VD =
8139           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8140       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8141                             IsImplicit);
8142     };
8143 
8144     for (const auto *C : CurMapperDir->clauselists()) {
8145       const auto *MC = cast<OMPMapClause>(C);
8146       for (const auto &L : MC->component_lists()) {
8147         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8148                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8149       }
8150     }
8151 
8152     for (const auto &M : Info) {
8153       // We need to know when we generate information for the first component
8154       // associated with a capture, because the mapping flags depend on it.
8155       bool IsFirstComponentList = true;
8156 
8157       // Temporary versions of arrays
8158       MapBaseValuesArrayTy CurBasePointers;
8159       MapValuesArrayTy CurPointers;
8160       MapValuesArrayTy CurSizes;
8161       MapFlagsArrayTy CurTypes;
8162       StructRangeInfoTy PartialStruct;
8163 
8164       for (const MapInfo &L : M.second) {
8165         assert(!L.Components.empty() &&
8166                "Not expecting declaration with no component lists.");
8167         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8168                                      CurBasePointers, CurPointers, CurSizes,
8169                                      CurTypes, PartialStruct,
8170                                      IsFirstComponentList, L.IsImplicit);
8171         IsFirstComponentList = false;
8172       }
8173 
8174       // If there is an entry in PartialStruct it means we have a struct with
8175       // individual members mapped. Emit an extra combined entry.
8176       if (PartialStruct.Base.isValid())
8177         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8178                           PartialStruct);
8179 
8180       // We need to append the results of this capture to what we already have.
8181       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8182       Pointers.append(CurPointers.begin(), CurPointers.end());
8183       Sizes.append(CurSizes.begin(), CurSizes.end());
8184       Types.append(CurTypes.begin(), CurTypes.end());
8185     }
8186   }
8187 
8188   /// Emit capture info for lambdas for variables captured by reference.
8189   void generateInfoForLambdaCaptures(
8190       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8191       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8192       MapFlagsArrayTy &Types,
8193       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8194     const auto *RD = VD->getType()
8195                          .getCanonicalType()
8196                          .getNonReferenceType()
8197                          ->getAsCXXRecordDecl();
8198     if (!RD || !RD->isLambda())
8199       return;
8200     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8201     LValue VDLVal = CGF.MakeAddrLValue(
8202         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8203     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8204     FieldDecl *ThisCapture = nullptr;
8205     RD->getCaptureFields(Captures, ThisCapture);
8206     if (ThisCapture) {
8207       LValue ThisLVal =
8208           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8209       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8210       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8211       BasePointers.push_back(ThisLVal.getPointer());
8212       Pointers.push_back(ThisLValVal.getPointer());
8213       Sizes.push_back(
8214           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8215                                     CGF.Int64Ty, /*isSigned=*/true));
8216       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8217                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8218     }
8219     for (const LambdaCapture &LC : RD->captures()) {
8220       if (!LC.capturesVariable())
8221         continue;
8222       const VarDecl *VD = LC.getCapturedVar();
8223       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8224         continue;
8225       auto It = Captures.find(VD);
8226       assert(It != Captures.end() && "Found lambda capture without field.");
8227       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8228       if (LC.getCaptureKind() == LCK_ByRef) {
8229         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8230         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8231         BasePointers.push_back(VarLVal.getPointer());
8232         Pointers.push_back(VarLValVal.getPointer());
8233         Sizes.push_back(CGF.Builder.CreateIntCast(
8234             CGF.getTypeSize(
8235                 VD->getType().getCanonicalType().getNonReferenceType()),
8236             CGF.Int64Ty, /*isSigned=*/true));
8237       } else {
8238         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8239         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8240         BasePointers.push_back(VarLVal.getPointer());
8241         Pointers.push_back(VarRVal.getScalarVal());
8242         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8243       }
8244       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8245                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8246     }
8247   }
8248 
8249   /// Set correct indices for lambdas captures.
8250   void adjustMemberOfForLambdaCaptures(
8251       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8252       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8253       MapFlagsArrayTy &Types) const {
8254     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8255       // Set correct member_of idx for all implicit lambda captures.
8256       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8257                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8258         continue;
8259       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8260       assert(BasePtr && "Unable to find base lambda address.");
8261       int TgtIdx = -1;
8262       for (unsigned J = I; J > 0; --J) {
8263         unsigned Idx = J - 1;
8264         if (Pointers[Idx] != BasePtr)
8265           continue;
8266         TgtIdx = Idx;
8267         break;
8268       }
8269       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8270       // All other current entries will be MEMBER_OF the combined entry
8271       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8272       // 0xFFFF in the MEMBER_OF field).
8273       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8274       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8275     }
8276   }
8277 
8278   /// Generate the base pointers, section pointers, sizes and map types
8279   /// associated to a given capture.
8280   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8281                               llvm::Value *Arg,
8282                               MapBaseValuesArrayTy &BasePointers,
8283                               MapValuesArrayTy &Pointers,
8284                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8285                               StructRangeInfoTy &PartialStruct) const {
8286     assert(!Cap->capturesVariableArrayType() &&
8287            "Not expecting to generate map info for a variable array type!");
8288 
8289     // We need to know when we generating information for the first component
8290     const ValueDecl *VD = Cap->capturesThis()
8291                               ? nullptr
8292                               : Cap->getCapturedVar()->getCanonicalDecl();
8293 
8294     // If this declaration appears in a is_device_ptr clause we just have to
8295     // pass the pointer by value. If it is a reference to a declaration, we just
8296     // pass its value.
8297     if (DevPointersMap.count(VD)) {
8298       BasePointers.emplace_back(Arg, VD);
8299       Pointers.push_back(Arg);
8300       Sizes.push_back(
8301           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8302                                     CGF.Int64Ty, /*isSigned=*/true));
8303       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8304       return;
8305     }
8306 
8307     using MapData =
8308         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8309                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8310     SmallVector<MapData, 4> DeclComponentLists;
8311     assert(CurDir.is<const OMPExecutableDirective *>() &&
8312            "Expect a executable directive");
8313     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8314     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8315       for (const auto &L : C->decl_component_lists(VD)) {
8316         assert(L.first == VD &&
8317                "We got information for the wrong declaration??");
8318         assert(!L.second.empty() &&
8319                "Not expecting declaration with no component lists.");
8320         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8321                                         C->getMapTypeModifiers(),
8322                                         C->isImplicit());
8323       }
8324     }
8325 
8326     // Find overlapping elements (including the offset from the base element).
8327     llvm::SmallDenseMap<
8328         const MapData *,
8329         llvm::SmallVector<
8330             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8331         4>
8332         OverlappedData;
8333     size_t Count = 0;
8334     for (const MapData &L : DeclComponentLists) {
8335       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8336       OpenMPMapClauseKind MapType;
8337       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8338       bool IsImplicit;
8339       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8340       ++Count;
8341       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8342         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8343         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8344         auto CI = Components.rbegin();
8345         auto CE = Components.rend();
8346         auto SI = Components1.rbegin();
8347         auto SE = Components1.rend();
8348         for (; CI != CE && SI != SE; ++CI, ++SI) {
8349           if (CI->getAssociatedExpression()->getStmtClass() !=
8350               SI->getAssociatedExpression()->getStmtClass())
8351             break;
8352           // Are we dealing with different variables/fields?
8353           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8354             break;
8355         }
8356         // Found overlapping if, at least for one component, reached the head of
8357         // the components list.
8358         if (CI == CE || SI == SE) {
8359           assert((CI != CE || SI != SE) &&
8360                  "Unexpected full match of the mapping components.");
8361           const MapData &BaseData = CI == CE ? L : L1;
8362           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8363               SI == SE ? Components : Components1;
8364           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8365           OverlappedElements.getSecond().push_back(SubData);
8366         }
8367       }
8368     }
8369     // Sort the overlapped elements for each item.
8370     llvm::SmallVector<const FieldDecl *, 4> Layout;
8371     if (!OverlappedData.empty()) {
8372       if (const auto *CRD =
8373               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8374         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8375       else {
8376         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8377         Layout.append(RD->field_begin(), RD->field_end());
8378       }
8379     }
8380     for (auto &Pair : OverlappedData) {
8381       llvm::sort(
8382           Pair.getSecond(),
8383           [&Layout](
8384               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8385               OMPClauseMappableExprCommon::MappableExprComponentListRef
8386                   Second) {
8387             auto CI = First.rbegin();
8388             auto CE = First.rend();
8389             auto SI = Second.rbegin();
8390             auto SE = Second.rend();
8391             for (; CI != CE && SI != SE; ++CI, ++SI) {
8392               if (CI->getAssociatedExpression()->getStmtClass() !=
8393                   SI->getAssociatedExpression()->getStmtClass())
8394                 break;
8395               // Are we dealing with different variables/fields?
8396               if (CI->getAssociatedDeclaration() !=
8397                   SI->getAssociatedDeclaration())
8398                 break;
8399             }
8400 
8401             // Lists contain the same elements.
8402             if (CI == CE && SI == SE)
8403               return false;
8404 
8405             // List with less elements is less than list with more elements.
8406             if (CI == CE || SI == SE)
8407               return CI == CE;
8408 
8409             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8410             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8411             if (FD1->getParent() == FD2->getParent())
8412               return FD1->getFieldIndex() < FD2->getFieldIndex();
8413             const auto It =
8414                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8415                   return FD == FD1 || FD == FD2;
8416                 });
8417             return *It == FD1;
8418           });
8419     }
8420 
8421     // Associated with a capture, because the mapping flags depend on it.
8422     // Go through all of the elements with the overlapped elements.
8423     for (const auto &Pair : OverlappedData) {
8424       const MapData &L = *Pair.getFirst();
8425       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8426       OpenMPMapClauseKind MapType;
8427       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8428       bool IsImplicit;
8429       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8430       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8431           OverlappedComponents = Pair.getSecond();
8432       bool IsFirstComponentList = true;
8433       generateInfoForComponentList(MapType, MapModifiers, Components,
8434                                    BasePointers, Pointers, Sizes, Types,
8435                                    PartialStruct, IsFirstComponentList,
8436                                    IsImplicit, OverlappedComponents);
8437     }
8438     // Go through other elements without overlapped elements.
8439     bool IsFirstComponentList = OverlappedData.empty();
8440     for (const MapData &L : DeclComponentLists) {
8441       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8442       OpenMPMapClauseKind MapType;
8443       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8444       bool IsImplicit;
8445       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8446       auto It = OverlappedData.find(&L);
8447       if (It == OverlappedData.end())
8448         generateInfoForComponentList(MapType, MapModifiers, Components,
8449                                      BasePointers, Pointers, Sizes, Types,
8450                                      PartialStruct, IsFirstComponentList,
8451                                      IsImplicit);
8452       IsFirstComponentList = false;
8453     }
8454   }
8455 
8456   /// Generate the base pointers, section pointers, sizes and map types
8457   /// associated with the declare target link variables.
8458   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8459                                         MapValuesArrayTy &Pointers,
8460                                         MapValuesArrayTy &Sizes,
8461                                         MapFlagsArrayTy &Types) const {
8462     assert(CurDir.is<const OMPExecutableDirective *>() &&
8463            "Expect a executable directive");
8464     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8465     // Map other list items in the map clause which are not captured variables
8466     // but "declare target link" global variables.
8467     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8468       for (const auto &L : C->component_lists()) {
8469         if (!L.first)
8470           continue;
8471         const auto *VD = dyn_cast<VarDecl>(L.first);
8472         if (!VD)
8473           continue;
8474         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8475             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8476         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8477             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8478           continue;
8479         StructRangeInfoTy PartialStruct;
8480         generateInfoForComponentList(
8481             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8482             Pointers, Sizes, Types, PartialStruct,
8483             /*IsFirstComponentList=*/true, C->isImplicit());
8484         assert(!PartialStruct.Base.isValid() &&
8485                "No partial structs for declare target link expected.");
8486       }
8487     }
8488   }
8489 
8490   /// Generate the default map information for a given capture \a CI,
8491   /// record field declaration \a RI and captured value \a CV.
8492   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8493                               const FieldDecl &RI, llvm::Value *CV,
8494                               MapBaseValuesArrayTy &CurBasePointers,
8495                               MapValuesArrayTy &CurPointers,
8496                               MapValuesArrayTy &CurSizes,
8497                               MapFlagsArrayTy &CurMapTypes) const {
8498     bool IsImplicit = true;
8499     // Do the default mapping.
8500     if (CI.capturesThis()) {
8501       CurBasePointers.push_back(CV);
8502       CurPointers.push_back(CV);
8503       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8504       CurSizes.push_back(
8505           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8506                                     CGF.Int64Ty, /*isSigned=*/true));
8507       // Default map type.
8508       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8509     } else if (CI.capturesVariableByCopy()) {
8510       CurBasePointers.push_back(CV);
8511       CurPointers.push_back(CV);
8512       if (!RI.getType()->isAnyPointerType()) {
8513         // We have to signal to the runtime captures passed by value that are
8514         // not pointers.
8515         CurMapTypes.push_back(OMP_MAP_LITERAL);
8516         CurSizes.push_back(CGF.Builder.CreateIntCast(
8517             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8518       } else {
8519         // Pointers are implicitly mapped with a zero size and no flags
8520         // (other than first map that is added for all implicit maps).
8521         CurMapTypes.push_back(OMP_MAP_NONE);
8522         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8523       }
8524       const VarDecl *VD = CI.getCapturedVar();
8525       auto I = FirstPrivateDecls.find(VD);
8526       if (I != FirstPrivateDecls.end())
8527         IsImplicit = I->getSecond();
8528     } else {
8529       assert(CI.capturesVariable() && "Expected captured reference.");
8530       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8531       QualType ElementType = PtrTy->getPointeeType();
8532       CurSizes.push_back(CGF.Builder.CreateIntCast(
8533           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8534       // The default map type for a scalar/complex type is 'to' because by
8535       // default the value doesn't have to be retrieved. For an aggregate
8536       // type, the default is 'tofrom'.
8537       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8538       const VarDecl *VD = CI.getCapturedVar();
8539       auto I = FirstPrivateDecls.find(VD);
8540       if (I != FirstPrivateDecls.end() &&
8541           VD->getType().isConstant(CGF.getContext())) {
8542         llvm::Constant *Addr =
8543             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8544         // Copy the value of the original variable to the new global copy.
8545         CGF.Builder.CreateMemCpy(
8546             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8547             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8548             CurSizes.back(), /*IsVolatile=*/false);
8549         // Use new global variable as the base pointers.
8550         CurBasePointers.push_back(Addr);
8551         CurPointers.push_back(Addr);
8552       } else {
8553         CurBasePointers.push_back(CV);
8554         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8555           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8556               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8557               AlignmentSource::Decl));
8558           CurPointers.push_back(PtrAddr.getPointer());
8559         } else {
8560           CurPointers.push_back(CV);
8561         }
8562       }
8563       if (I != FirstPrivateDecls.end())
8564         IsImplicit = I->getSecond();
8565     }
8566     // Every default map produces a single argument which is a target parameter.
8567     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8568 
8569     // Add flag stating this is an implicit map.
8570     if (IsImplicit)
8571       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8572   }
8573 };
8574 } // anonymous namespace
8575 
8576 /// Emit the arrays used to pass the captures and map information to the
8577 /// offloading runtime library. If there is no map or capture information,
8578 /// return nullptr by reference.
8579 static void
8580 emitOffloadingArrays(CodeGenFunction &CGF,
8581                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8582                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8583                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8584                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8585                      CGOpenMPRuntime::TargetDataInfo &Info) {
8586   CodeGenModule &CGM = CGF.CGM;
8587   ASTContext &Ctx = CGF.getContext();
8588 
8589   // Reset the array information.
8590   Info.clearArrayInfo();
8591   Info.NumberOfPtrs = BasePointers.size();
8592 
8593   if (Info.NumberOfPtrs) {
8594     // Detect if we have any capture size requiring runtime evaluation of the
8595     // size so that a constant array could be eventually used.
8596     bool hasRuntimeEvaluationCaptureSize = false;
8597     for (llvm::Value *S : Sizes)
8598       if (!isa<llvm::Constant>(S)) {
8599         hasRuntimeEvaluationCaptureSize = true;
8600         break;
8601       }
8602 
8603     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8604     QualType PointerArrayType =
8605         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8606                                  /*IndexTypeQuals=*/0);
8607 
8608     Info.BasePointersArray =
8609         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8610     Info.PointersArray =
8611         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8612 
8613     // If we don't have any VLA types or other types that require runtime
8614     // evaluation, we can use a constant array for the map sizes, otherwise we
8615     // need to fill up the arrays as we do for the pointers.
8616     QualType Int64Ty =
8617         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8618     if (hasRuntimeEvaluationCaptureSize) {
8619       QualType SizeArrayType =
8620           Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8621                                    /*IndexTypeQuals=*/0);
8622       Info.SizesArray =
8623           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8624     } else {
8625       // We expect all the sizes to be constant, so we collect them to create
8626       // a constant array.
8627       SmallVector<llvm::Constant *, 16> ConstSizes;
8628       for (llvm::Value *S : Sizes)
8629         ConstSizes.push_back(cast<llvm::Constant>(S));
8630 
8631       auto *SizesArrayInit = llvm::ConstantArray::get(
8632           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8633       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8634       auto *SizesArrayGbl = new llvm::GlobalVariable(
8635           CGM.getModule(), SizesArrayInit->getType(),
8636           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8637           SizesArrayInit, Name);
8638       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8639       Info.SizesArray = SizesArrayGbl;
8640     }
8641 
8642     // The map types are always constant so we don't need to generate code to
8643     // fill arrays. Instead, we create an array constant.
8644     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8645     llvm::copy(MapTypes, Mapping.begin());
8646     llvm::Constant *MapTypesArrayInit =
8647         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8648     std::string MaptypesName =
8649         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8650     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8651         CGM.getModule(), MapTypesArrayInit->getType(),
8652         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8653         MapTypesArrayInit, MaptypesName);
8654     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8655     Info.MapTypesArray = MapTypesArrayGbl;
8656 
8657     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8658       llvm::Value *BPVal = *BasePointers[I];
8659       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8660           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8661           Info.BasePointersArray, 0, I);
8662       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8663           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8664       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8665       CGF.Builder.CreateStore(BPVal, BPAddr);
8666 
8667       if (Info.requiresDevicePointerInfo())
8668         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8669           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8670 
8671       llvm::Value *PVal = Pointers[I];
8672       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8673           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8674           Info.PointersArray, 0, I);
8675       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8676           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8677       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8678       CGF.Builder.CreateStore(PVal, PAddr);
8679 
8680       if (hasRuntimeEvaluationCaptureSize) {
8681         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8682             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8683             Info.SizesArray,
8684             /*Idx0=*/0,
8685             /*Idx1=*/I);
8686         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8687         CGF.Builder.CreateStore(
8688             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8689             SAddr);
8690       }
8691     }
8692   }
8693 }
8694 
8695 /// Emit the arguments to be passed to the runtime library based on the
8696 /// arrays of pointers, sizes and map types.
8697 static void emitOffloadingArraysArgument(
8698     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8699     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8700     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8701   CodeGenModule &CGM = CGF.CGM;
8702   if (Info.NumberOfPtrs) {
8703     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8704         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8705         Info.BasePointersArray,
8706         /*Idx0=*/0, /*Idx1=*/0);
8707     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8708         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8709         Info.PointersArray,
8710         /*Idx0=*/0,
8711         /*Idx1=*/0);
8712     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8713         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8714         /*Idx0=*/0, /*Idx1=*/0);
8715     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8716         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8717         Info.MapTypesArray,
8718         /*Idx0=*/0,
8719         /*Idx1=*/0);
8720   } else {
8721     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8722     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8723     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8724     MapTypesArrayArg =
8725         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8726   }
8727 }
8728 
8729 /// Check for inner distribute directive.
8730 static const OMPExecutableDirective *
8731 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8732   const auto *CS = D.getInnermostCapturedStmt();
8733   const auto *Body =
8734       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8735   const Stmt *ChildStmt =
8736       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8737 
8738   if (const auto *NestedDir =
8739           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8740     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8741     switch (D.getDirectiveKind()) {
8742     case OMPD_target:
8743       if (isOpenMPDistributeDirective(DKind))
8744         return NestedDir;
8745       if (DKind == OMPD_teams) {
8746         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8747             /*IgnoreCaptured=*/true);
8748         if (!Body)
8749           return nullptr;
8750         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8751         if (const auto *NND =
8752                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8753           DKind = NND->getDirectiveKind();
8754           if (isOpenMPDistributeDirective(DKind))
8755             return NND;
8756         }
8757       }
8758       return nullptr;
8759     case OMPD_target_teams:
8760       if (isOpenMPDistributeDirective(DKind))
8761         return NestedDir;
8762       return nullptr;
8763     case OMPD_target_parallel:
8764     case OMPD_target_simd:
8765     case OMPD_target_parallel_for:
8766     case OMPD_target_parallel_for_simd:
8767       return nullptr;
8768     case OMPD_target_teams_distribute:
8769     case OMPD_target_teams_distribute_simd:
8770     case OMPD_target_teams_distribute_parallel_for:
8771     case OMPD_target_teams_distribute_parallel_for_simd:
8772     case OMPD_parallel:
8773     case OMPD_for:
8774     case OMPD_parallel_for:
8775     case OMPD_parallel_sections:
8776     case OMPD_for_simd:
8777     case OMPD_parallel_for_simd:
8778     case OMPD_cancel:
8779     case OMPD_cancellation_point:
8780     case OMPD_ordered:
8781     case OMPD_threadprivate:
8782     case OMPD_allocate:
8783     case OMPD_task:
8784     case OMPD_simd:
8785     case OMPD_sections:
8786     case OMPD_section:
8787     case OMPD_single:
8788     case OMPD_master:
8789     case OMPD_critical:
8790     case OMPD_taskyield:
8791     case OMPD_barrier:
8792     case OMPD_taskwait:
8793     case OMPD_taskgroup:
8794     case OMPD_atomic:
8795     case OMPD_flush:
8796     case OMPD_teams:
8797     case OMPD_target_data:
8798     case OMPD_target_exit_data:
8799     case OMPD_target_enter_data:
8800     case OMPD_distribute:
8801     case OMPD_distribute_simd:
8802     case OMPD_distribute_parallel_for:
8803     case OMPD_distribute_parallel_for_simd:
8804     case OMPD_teams_distribute:
8805     case OMPD_teams_distribute_simd:
8806     case OMPD_teams_distribute_parallel_for:
8807     case OMPD_teams_distribute_parallel_for_simd:
8808     case OMPD_target_update:
8809     case OMPD_declare_simd:
8810     case OMPD_declare_target:
8811     case OMPD_end_declare_target:
8812     case OMPD_declare_reduction:
8813     case OMPD_declare_mapper:
8814     case OMPD_taskloop:
8815     case OMPD_taskloop_simd:
8816     case OMPD_requires:
8817     case OMPD_unknown:
8818       llvm_unreachable("Unexpected directive.");
8819     }
8820   }
8821 
8822   return nullptr;
8823 }
8824 
8825 /// Emit the user-defined mapper function. The code generation follows the
8826 /// pattern in the example below.
8827 /// \code
8828 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8829 ///                                           void *base, void *begin,
8830 ///                                           int64_t size, int64_t type) {
8831 ///   // Allocate space for an array section first.
8832 ///   if (size > 1 && !maptype.IsDelete)
8833 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8834 ///                                 size*sizeof(Ty), clearToFrom(type));
8835 ///   // Map members.
8836 ///   for (unsigned i = 0; i < size; i++) {
8837 ///     // For each component specified by this mapper:
8838 ///     for (auto c : all_components) {
8839 ///       if (c.hasMapper())
8840 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8841 ///                       c.arg_type);
8842 ///       else
8843 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8844 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8845 ///     }
8846 ///   }
8847 ///   // Delete the array section.
8848 ///   if (size > 1 && maptype.IsDelete)
8849 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8850 ///                                 size*sizeof(Ty), clearToFrom(type));
8851 /// }
8852 /// \endcode
8853 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8854                                             CodeGenFunction *CGF) {
8855   if (UDMMap.count(D) > 0)
8856     return;
8857   ASTContext &C = CGM.getContext();
8858   QualType Ty = D->getType();
8859   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8860   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8861   auto *MapperVarDecl =
8862       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8863   SourceLocation Loc = D->getLocation();
8864   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8865 
8866   // Prepare mapper function arguments and attributes.
8867   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8868                               C.VoidPtrTy, ImplicitParamDecl::Other);
8869   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8870                             ImplicitParamDecl::Other);
8871   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8872                              C.VoidPtrTy, ImplicitParamDecl::Other);
8873   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8874                             ImplicitParamDecl::Other);
8875   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8876                             ImplicitParamDecl::Other);
8877   FunctionArgList Args;
8878   Args.push_back(&HandleArg);
8879   Args.push_back(&BaseArg);
8880   Args.push_back(&BeginArg);
8881   Args.push_back(&SizeArg);
8882   Args.push_back(&TypeArg);
8883   const CGFunctionInfo &FnInfo =
8884       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8885   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8886   SmallString<64> TyStr;
8887   llvm::raw_svector_ostream Out(TyStr);
8888   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8889   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8890   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8891                                     Name, &CGM.getModule());
8892   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8893   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8894   // Start the mapper function code generation.
8895   CodeGenFunction MapperCGF(CGM);
8896   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8897   // Compute the starting and end addreses of array elements.
8898   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8899       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8900       C.getPointerType(Int64Ty), Loc);
8901   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8902       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8903       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8904   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8905   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8906       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8907       C.getPointerType(Int64Ty), Loc);
8908   // Prepare common arguments for array initiation and deletion.
8909   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8910       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8911       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8912   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8913       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8914       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8915   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8916       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8917       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8918 
8919   // Emit array initiation if this is an array section and \p MapType indicates
8920   // that memory allocation is required.
8921   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8922   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8923                              ElementSize, HeadBB, /*IsInit=*/true);
8924 
8925   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8926 
8927   // Emit the loop header block.
8928   MapperCGF.EmitBlock(HeadBB);
8929   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8930   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8931   // Evaluate whether the initial condition is satisfied.
8932   llvm::Value *IsEmpty =
8933       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8934   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8935   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8936 
8937   // Emit the loop body block.
8938   MapperCGF.EmitBlock(BodyBB);
8939   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8940       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8941   PtrPHI->addIncoming(PtrBegin, EntryBB);
8942   Address PtrCurrent =
8943       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8944                           .getAlignment()
8945                           .alignmentOfArrayElement(ElementSize));
8946   // Privatize the declared variable of mapper to be the current array element.
8947   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8948   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8949     return MapperCGF
8950         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8951         .getAddress();
8952   });
8953   (void)Scope.Privatize();
8954 
8955   // Get map clause information. Fill up the arrays with all mapped variables.
8956   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8957   MappableExprsHandler::MapValuesArrayTy Pointers;
8958   MappableExprsHandler::MapValuesArrayTy Sizes;
8959   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8960   MappableExprsHandler MEHandler(*D, MapperCGF);
8961   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8962 
8963   // Call the runtime API __tgt_mapper_num_components to get the number of
8964   // pre-existing components.
8965   llvm::Value *OffloadingArgs[] = {Handle};
8966   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8967       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8968   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8969       PreviousSize,
8970       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8971 
8972   // Fill up the runtime mapper handle for all components.
8973   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8974     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8975         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8976     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8977         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8978     llvm::Value *CurSizeArg = Sizes[I];
8979 
8980     // Extract the MEMBER_OF field from the map type.
8981     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8982     MapperCGF.EmitBlock(MemberBB);
8983     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8984     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8985         OriMapType,
8986         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8987     llvm::BasicBlock *MemberCombineBB =
8988         MapperCGF.createBasicBlock("omp.member.combine");
8989     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8990     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8991     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8992     // Add the number of pre-existing components to the MEMBER_OF field if it
8993     // is valid.
8994     MapperCGF.EmitBlock(MemberCombineBB);
8995     llvm::Value *CombinedMember =
8996         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8997     // Do nothing if it is not a member of previous components.
8998     MapperCGF.EmitBlock(TypeBB);
8999     llvm::PHINode *MemberMapType =
9000         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9001     MemberMapType->addIncoming(OriMapType, MemberBB);
9002     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9003 
9004     // Combine the map type inherited from user-defined mapper with that
9005     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9006     // bits of the \a MapType, which is the input argument of the mapper
9007     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9008     // bits of MemberMapType.
9009     // [OpenMP 5.0], 1.2.6. map-type decay.
9010     //        | alloc |  to   | from  | tofrom | release | delete
9011     // ----------------------------------------------------------
9012     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9013     // to     | alloc |  to   | alloc |   to   | release | delete
9014     // from   | alloc | alloc | from  |  from  | release | delete
9015     // tofrom | alloc |  to   | from  | tofrom | release | delete
9016     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9017         MapType,
9018         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9019                                    MappableExprsHandler::OMP_MAP_FROM));
9020     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9021     llvm::BasicBlock *AllocElseBB =
9022         MapperCGF.createBasicBlock("omp.type.alloc.else");
9023     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9024     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9025     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9026     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9027     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9028     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9029     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9030     MapperCGF.EmitBlock(AllocBB);
9031     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9032         MemberMapType,
9033         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9034                                      MappableExprsHandler::OMP_MAP_FROM)));
9035     MapperCGF.Builder.CreateBr(EndBB);
9036     MapperCGF.EmitBlock(AllocElseBB);
9037     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9038         LeftToFrom,
9039         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9040     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9041     // In case of to, clear OMP_MAP_FROM.
9042     MapperCGF.EmitBlock(ToBB);
9043     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9044         MemberMapType,
9045         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9046     MapperCGF.Builder.CreateBr(EndBB);
9047     MapperCGF.EmitBlock(ToElseBB);
9048     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9049         LeftToFrom,
9050         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9051     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9052     // In case of from, clear OMP_MAP_TO.
9053     MapperCGF.EmitBlock(FromBB);
9054     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9055         MemberMapType,
9056         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9057     // In case of tofrom, do nothing.
9058     MapperCGF.EmitBlock(EndBB);
9059     llvm::PHINode *CurMapType =
9060         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9061     CurMapType->addIncoming(AllocMapType, AllocBB);
9062     CurMapType->addIncoming(ToMapType, ToBB);
9063     CurMapType->addIncoming(FromMapType, FromBB);
9064     CurMapType->addIncoming(MemberMapType, ToElseBB);
9065 
9066     // TODO: call the corresponding mapper function if a user-defined mapper is
9067     // associated with this map clause.
9068     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9069     // data structure.
9070     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9071                                      CurSizeArg, CurMapType};
9072     MapperCGF.EmitRuntimeCall(
9073         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9074         OffloadingArgs);
9075   }
9076 
9077   // Update the pointer to point to the next element that needs to be mapped,
9078   // and check whether we have mapped all elements.
9079   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9080       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9081   PtrPHI->addIncoming(PtrNext, BodyBB);
9082   llvm::Value *IsDone =
9083       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9084   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9085   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9086 
9087   MapperCGF.EmitBlock(ExitBB);
9088   // Emit array deletion if this is an array section and \p MapType indicates
9089   // that deletion is required.
9090   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9091                              ElementSize, DoneBB, /*IsInit=*/false);
9092 
9093   // Emit the function exit block.
9094   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9095   MapperCGF.FinishFunction();
9096   UDMMap.try_emplace(D, Fn);
9097   if (CGF) {
9098     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9099     Decls.second.push_back(D);
9100   }
9101 }
9102 
9103 /// Emit the array initialization or deletion portion for user-defined mapper
9104 /// code generation. First, it evaluates whether an array section is mapped and
9105 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9106 /// true, and \a MapType indicates to not delete this array, array
9107 /// initialization code is generated. If \a IsInit is false, and \a MapType
9108 /// indicates to not this array, array deletion code is generated.
9109 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9110     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9111     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9112     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9113   StringRef Prefix = IsInit ? ".init" : ".del";
9114 
9115   // Evaluate if this is an array section.
9116   llvm::BasicBlock *IsDeleteBB =
9117       MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9118   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9119   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9120       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9121   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9122 
9123   // Evaluate if we are going to delete this section.
9124   MapperCGF.EmitBlock(IsDeleteBB);
9125   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9126       MapType,
9127       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9128   llvm::Value *DeleteCond;
9129   if (IsInit) {
9130     DeleteCond = MapperCGF.Builder.CreateIsNull(
9131         DeleteBit, "omp.array" + Prefix + ".delete");
9132   } else {
9133     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9134         DeleteBit, "omp.array" + Prefix + ".delete");
9135   }
9136   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9137 
9138   MapperCGF.EmitBlock(BodyBB);
9139   // Get the array size by multiplying element size and element number (i.e., \p
9140   // Size).
9141   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9142       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9143   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9144   // memory allocation/deletion purpose only.
9145   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9146       MapType,
9147       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9148                                    MappableExprsHandler::OMP_MAP_FROM)));
9149   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9150   // data structure.
9151   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9152   MapperCGF.EmitRuntimeCall(
9153       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9154 }
9155 
9156 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9157     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
9158     const llvm::function_ref<llvm::Value *(
9159         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
9160   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9161   const OMPExecutableDirective *TD = &D;
9162   // Get nested teams distribute kind directive, if any.
9163   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9164     TD = getNestedDistributeDirective(CGM.getContext(), D);
9165   if (!TD)
9166     return;
9167   const auto *LD = cast<OMPLoopDirective>(TD);
9168   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
9169                                                      PrePostActionTy &) {
9170     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
9171 
9172     // Emit device ID if any.
9173     llvm::Value *DeviceID;
9174     if (Device)
9175       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9176                                            CGF.Int64Ty, /*isSigned=*/true);
9177     else
9178       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9179 
9180     llvm::Value *Args[] = {DeviceID, NumIterations};
9181     CGF.EmitRuntimeCall(
9182         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9183   };
9184   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9185 }
9186 
9187 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
9188                                      const OMPExecutableDirective &D,
9189                                      llvm::Function *OutlinedFn,
9190                                      llvm::Value *OutlinedFnID,
9191                                      const Expr *IfCond, const Expr *Device) {
9192   if (!CGF.HaveInsertPoint())
9193     return;
9194 
9195   assert(OutlinedFn && "Invalid outlined function!");
9196 
9197   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9198   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9199   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9200   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9201                                             PrePostActionTy &) {
9202     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9203   };
9204   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9205 
9206   CodeGenFunction::OMPTargetDataInfo InputInfo;
9207   llvm::Value *MapTypesArray = nullptr;
9208   // Fill up the pointer arrays and transfer execution to the device.
9209   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9210                     &MapTypesArray, &CS, RequiresOuterTask,
9211                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
9212     // On top of the arrays that were filled up, the target offloading call
9213     // takes as arguments the device id as well as the host pointer. The host
9214     // pointer is used by the runtime library to identify the current target
9215     // region, so it only has to be unique and not necessarily point to
9216     // anything. It could be the pointer to the outlined function that
9217     // implements the target region, but we aren't using that so that the
9218     // compiler doesn't need to keep that, and could therefore inline the host
9219     // function if proven worthwhile during optimization.
9220 
9221     // From this point on, we need to have an ID of the target region defined.
9222     assert(OutlinedFnID && "Invalid outlined function ID!");
9223 
9224     // Emit device ID if any.
9225     llvm::Value *DeviceID;
9226     if (Device) {
9227       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9228                                            CGF.Int64Ty, /*isSigned=*/true);
9229     } else {
9230       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9231     }
9232 
9233     // Emit the number of elements in the offloading arrays.
9234     llvm::Value *PointerNum =
9235         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9236 
9237     // Return value of the runtime offloading call.
9238     llvm::Value *Return;
9239 
9240     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9241     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9242 
9243     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9244     // The target region is an outlined function launched by the runtime
9245     // via calls __tgt_target() or __tgt_target_teams().
9246     //
9247     // __tgt_target() launches a target region with one team and one thread,
9248     // executing a serial region.  This master thread may in turn launch
9249     // more threads within its team upon encountering a parallel region,
9250     // however, no additional teams can be launched on the device.
9251     //
9252     // __tgt_target_teams() launches a target region with one or more teams,
9253     // each with one or more threads.  This call is required for target
9254     // constructs such as:
9255     //  'target teams'
9256     //  'target' / 'teams'
9257     //  'target teams distribute parallel for'
9258     //  'target parallel'
9259     // and so on.
9260     //
9261     // Note that on the host and CPU targets, the runtime implementation of
9262     // these calls simply call the outlined function without forking threads.
9263     // The outlined functions themselves have runtime calls to
9264     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9265     // the compiler in emitTeamsCall() and emitParallelCall().
9266     //
9267     // In contrast, on the NVPTX target, the implementation of
9268     // __tgt_target_teams() launches a GPU kernel with the requested number
9269     // of teams and threads so no additional calls to the runtime are required.
9270     if (NumTeams) {
9271       // If we have NumTeams defined this means that we have an enclosed teams
9272       // region. Therefore we also expect to have NumThreads defined. These two
9273       // values should be defined in the presence of a teams directive,
9274       // regardless of having any clauses associated. If the user is using teams
9275       // but no clauses, these two values will be the default that should be
9276       // passed to the runtime library - a 32-bit integer with the value zero.
9277       assert(NumThreads && "Thread limit expression should be available along "
9278                            "with number of teams.");
9279       llvm::Value *OffloadingArgs[] = {DeviceID,
9280                                        OutlinedFnID,
9281                                        PointerNum,
9282                                        InputInfo.BasePointersArray.getPointer(),
9283                                        InputInfo.PointersArray.getPointer(),
9284                                        InputInfo.SizesArray.getPointer(),
9285                                        MapTypesArray,
9286                                        NumTeams,
9287                                        NumThreads};
9288       Return = CGF.EmitRuntimeCall(
9289           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9290                                           : OMPRTL__tgt_target_teams),
9291           OffloadingArgs);
9292     } else {
9293       llvm::Value *OffloadingArgs[] = {DeviceID,
9294                                        OutlinedFnID,
9295                                        PointerNum,
9296                                        InputInfo.BasePointersArray.getPointer(),
9297                                        InputInfo.PointersArray.getPointer(),
9298                                        InputInfo.SizesArray.getPointer(),
9299                                        MapTypesArray};
9300       Return = CGF.EmitRuntimeCall(
9301           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9302                                           : OMPRTL__tgt_target),
9303           OffloadingArgs);
9304     }
9305 
9306     // Check the error code and execute the host version if required.
9307     llvm::BasicBlock *OffloadFailedBlock =
9308         CGF.createBasicBlock("omp_offload.failed");
9309     llvm::BasicBlock *OffloadContBlock =
9310         CGF.createBasicBlock("omp_offload.cont");
9311     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9312     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9313 
9314     CGF.EmitBlock(OffloadFailedBlock);
9315     if (RequiresOuterTask) {
9316       CapturedVars.clear();
9317       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9318     }
9319     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9320     CGF.EmitBranch(OffloadContBlock);
9321 
9322     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9323   };
9324 
9325   // Notify that the host version must be executed.
9326   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9327                     RequiresOuterTask](CodeGenFunction &CGF,
9328                                        PrePostActionTy &) {
9329     if (RequiresOuterTask) {
9330       CapturedVars.clear();
9331       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9332     }
9333     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9334   };
9335 
9336   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9337                           &CapturedVars, RequiresOuterTask,
9338                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9339     // Fill up the arrays with all the captured variables.
9340     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9341     MappableExprsHandler::MapValuesArrayTy Pointers;
9342     MappableExprsHandler::MapValuesArrayTy Sizes;
9343     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9344 
9345     // Get mappable expression information.
9346     MappableExprsHandler MEHandler(D, CGF);
9347     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9348 
9349     auto RI = CS.getCapturedRecordDecl()->field_begin();
9350     auto CV = CapturedVars.begin();
9351     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9352                                               CE = CS.capture_end();
9353          CI != CE; ++CI, ++RI, ++CV) {
9354       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9355       MappableExprsHandler::MapValuesArrayTy CurPointers;
9356       MappableExprsHandler::MapValuesArrayTy CurSizes;
9357       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9358       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9359 
9360       // VLA sizes are passed to the outlined region by copy and do not have map
9361       // information associated.
9362       if (CI->capturesVariableArrayType()) {
9363         CurBasePointers.push_back(*CV);
9364         CurPointers.push_back(*CV);
9365         CurSizes.push_back(CGF.Builder.CreateIntCast(
9366             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9367         // Copy to the device as an argument. No need to retrieve it.
9368         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9369                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9370                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9371       } else {
9372         // If we have any information in the map clause, we use it, otherwise we
9373         // just do a default mapping.
9374         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9375                                          CurSizes, CurMapTypes, PartialStruct);
9376         if (CurBasePointers.empty())
9377           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9378                                            CurPointers, CurSizes, CurMapTypes);
9379         // Generate correct mapping for variables captured by reference in
9380         // lambdas.
9381         if (CI->capturesVariable())
9382           MEHandler.generateInfoForLambdaCaptures(
9383               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9384               CurMapTypes, LambdaPointers);
9385       }
9386       // We expect to have at least an element of information for this capture.
9387       assert(!CurBasePointers.empty() &&
9388              "Non-existing map pointer for capture!");
9389       assert(CurBasePointers.size() == CurPointers.size() &&
9390              CurBasePointers.size() == CurSizes.size() &&
9391              CurBasePointers.size() == CurMapTypes.size() &&
9392              "Inconsistent map information sizes!");
9393 
9394       // If there is an entry in PartialStruct it means we have a struct with
9395       // individual members mapped. Emit an extra combined entry.
9396       if (PartialStruct.Base.isValid())
9397         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9398                                     CurMapTypes, PartialStruct);
9399 
9400       // We need to append the results of this capture to what we already have.
9401       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9402       Pointers.append(CurPointers.begin(), CurPointers.end());
9403       Sizes.append(CurSizes.begin(), CurSizes.end());
9404       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9405     }
9406     // Adjust MEMBER_OF flags for the lambdas captures.
9407     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9408                                               Pointers, MapTypes);
9409     // Map other list items in the map clause which are not captured variables
9410     // but "declare target link" global variables.
9411     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9412                                                MapTypes);
9413 
9414     TargetDataInfo Info;
9415     // Fill up the arrays and create the arguments.
9416     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9417     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9418                                  Info.PointersArray, Info.SizesArray,
9419                                  Info.MapTypesArray, Info);
9420     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9421     InputInfo.BasePointersArray =
9422         Address(Info.BasePointersArray, CGM.getPointerAlign());
9423     InputInfo.PointersArray =
9424         Address(Info.PointersArray, CGM.getPointerAlign());
9425     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9426     MapTypesArray = Info.MapTypesArray;
9427     if (RequiresOuterTask)
9428       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9429     else
9430       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9431   };
9432 
9433   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9434                              CodeGenFunction &CGF, PrePostActionTy &) {
9435     if (RequiresOuterTask) {
9436       CodeGenFunction::OMPTargetDataInfo InputInfo;
9437       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9438     } else {
9439       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9440     }
9441   };
9442 
9443   // If we have a target function ID it means that we need to support
9444   // offloading, otherwise, just execute on the host. We need to execute on host
9445   // regardless of the conditional in the if clause if, e.g., the user do not
9446   // specify target triples.
9447   if (OutlinedFnID) {
9448     if (IfCond) {
9449       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9450     } else {
9451       RegionCodeGenTy ThenRCG(TargetThenGen);
9452       ThenRCG(CGF);
9453     }
9454   } else {
9455     RegionCodeGenTy ElseRCG(TargetElseGen);
9456     ElseRCG(CGF);
9457   }
9458 }
9459 
9460 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9461                                                     StringRef ParentName) {
9462   if (!S)
9463     return;
9464 
9465   // Codegen OMP target directives that offload compute to the device.
9466   bool RequiresDeviceCodegen =
9467       isa<OMPExecutableDirective>(S) &&
9468       isOpenMPTargetExecutionDirective(
9469           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9470 
9471   if (RequiresDeviceCodegen) {
9472     const auto &E = *cast<OMPExecutableDirective>(S);
9473     unsigned DeviceID;
9474     unsigned FileID;
9475     unsigned Line;
9476     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9477                              FileID, Line);
9478 
9479     // Is this a target region that should not be emitted as an entry point? If
9480     // so just signal we are done with this target region.
9481     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9482                                                             ParentName, Line))
9483       return;
9484 
9485     switch (E.getDirectiveKind()) {
9486     case OMPD_target:
9487       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9488                                                    cast<OMPTargetDirective>(E));
9489       break;
9490     case OMPD_target_parallel:
9491       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9492           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9493       break;
9494     case OMPD_target_teams:
9495       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9496           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9497       break;
9498     case OMPD_target_teams_distribute:
9499       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9500           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9501       break;
9502     case OMPD_target_teams_distribute_simd:
9503       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9504           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9505       break;
9506     case OMPD_target_parallel_for:
9507       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9508           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9509       break;
9510     case OMPD_target_parallel_for_simd:
9511       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9512           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9513       break;
9514     case OMPD_target_simd:
9515       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9516           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9517       break;
9518     case OMPD_target_teams_distribute_parallel_for:
9519       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9520           CGM, ParentName,
9521           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9522       break;
9523     case OMPD_target_teams_distribute_parallel_for_simd:
9524       CodeGenFunction::
9525           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9526               CGM, ParentName,
9527               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9528       break;
9529     case OMPD_parallel:
9530     case OMPD_for:
9531     case OMPD_parallel_for:
9532     case OMPD_parallel_sections:
9533     case OMPD_for_simd:
9534     case OMPD_parallel_for_simd:
9535     case OMPD_cancel:
9536     case OMPD_cancellation_point:
9537     case OMPD_ordered:
9538     case OMPD_threadprivate:
9539     case OMPD_allocate:
9540     case OMPD_task:
9541     case OMPD_simd:
9542     case OMPD_sections:
9543     case OMPD_section:
9544     case OMPD_single:
9545     case OMPD_master:
9546     case OMPD_critical:
9547     case OMPD_taskyield:
9548     case OMPD_barrier:
9549     case OMPD_taskwait:
9550     case OMPD_taskgroup:
9551     case OMPD_atomic:
9552     case OMPD_flush:
9553     case OMPD_teams:
9554     case OMPD_target_data:
9555     case OMPD_target_exit_data:
9556     case OMPD_target_enter_data:
9557     case OMPD_distribute:
9558     case OMPD_distribute_simd:
9559     case OMPD_distribute_parallel_for:
9560     case OMPD_distribute_parallel_for_simd:
9561     case OMPD_teams_distribute:
9562     case OMPD_teams_distribute_simd:
9563     case OMPD_teams_distribute_parallel_for:
9564     case OMPD_teams_distribute_parallel_for_simd:
9565     case OMPD_target_update:
9566     case OMPD_declare_simd:
9567     case OMPD_declare_target:
9568     case OMPD_end_declare_target:
9569     case OMPD_declare_reduction:
9570     case OMPD_declare_mapper:
9571     case OMPD_taskloop:
9572     case OMPD_taskloop_simd:
9573     case OMPD_requires:
9574     case OMPD_unknown:
9575       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9576     }
9577     return;
9578   }
9579 
9580   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9581     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9582       return;
9583 
9584     scanForTargetRegionsFunctions(
9585         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9586     return;
9587   }
9588 
9589   // If this is a lambda function, look into its body.
9590   if (const auto *L = dyn_cast<LambdaExpr>(S))
9591     S = L->getBody();
9592 
9593   // Keep looking for target regions recursively.
9594   for (const Stmt *II : S->children())
9595     scanForTargetRegionsFunctions(II, ParentName);
9596 }
9597 
9598 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9599   // If emitting code for the host, we do not process FD here. Instead we do
9600   // the normal code generation.
9601   if (!CGM.getLangOpts().OpenMPIsDevice)
9602     return false;
9603 
9604   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9605   StringRef Name = CGM.getMangledName(GD);
9606   // Try to detect target regions in the function.
9607   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9608     scanForTargetRegionsFunctions(FD->getBody(), Name);
9609 
9610   // Do not to emit function if it is not marked as declare target.
9611   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9612          AlreadyEmittedTargetFunctions.count(Name) == 0;
9613 }
9614 
9615 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9616   if (!CGM.getLangOpts().OpenMPIsDevice)
9617     return false;
9618 
9619   // Check if there are Ctors/Dtors in this declaration and look for target
9620   // regions in it. We use the complete variant to produce the kernel name
9621   // mangling.
9622   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9623   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9624     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9625       StringRef ParentName =
9626           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9627       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9628     }
9629     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9630       StringRef ParentName =
9631           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9632       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9633     }
9634   }
9635 
9636   // Do not to emit variable if it is not marked as declare target.
9637   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9638       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9639           cast<VarDecl>(GD.getDecl()));
9640   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9641       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9642        HasRequiresUnifiedSharedMemory)) {
9643     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9644     return true;
9645   }
9646   return false;
9647 }
9648 
9649 llvm::Constant *
9650 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9651                                                 const VarDecl *VD) {
9652   assert(VD->getType().isConstant(CGM.getContext()) &&
9653          "Expected constant variable.");
9654   StringRef VarName;
9655   llvm::Constant *Addr;
9656   llvm::GlobalValue::LinkageTypes Linkage;
9657   QualType Ty = VD->getType();
9658   SmallString<128> Buffer;
9659   {
9660     unsigned DeviceID;
9661     unsigned FileID;
9662     unsigned Line;
9663     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9664                              FileID, Line);
9665     llvm::raw_svector_ostream OS(Buffer);
9666     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9667        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9668     VarName = OS.str();
9669   }
9670   Linkage = llvm::GlobalValue::InternalLinkage;
9671   Addr =
9672       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9673                                   getDefaultFirstprivateAddressSpace());
9674   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9675   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9676   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9677   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9678       VarName, Addr, VarSize,
9679       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9680   return Addr;
9681 }
9682 
9683 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9684                                                    llvm::Constant *Addr) {
9685   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9686       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9687   if (!Res) {
9688     if (CGM.getLangOpts().OpenMPIsDevice) {
9689       // Register non-target variables being emitted in device code (debug info
9690       // may cause this).
9691       StringRef VarName = CGM.getMangledName(VD);
9692       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9693     }
9694     return;
9695   }
9696   // Register declare target variables.
9697   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9698   StringRef VarName;
9699   CharUnits VarSize;
9700   llvm::GlobalValue::LinkageTypes Linkage;
9701 
9702   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9703       !HasRequiresUnifiedSharedMemory) {
9704     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9705     VarName = CGM.getMangledName(VD);
9706     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9707       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9708       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9709     } else {
9710       VarSize = CharUnits::Zero();
9711     }
9712     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9713     // Temp solution to prevent optimizations of the internal variables.
9714     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9715       std::string RefName = getName({VarName, "ref"});
9716       if (!CGM.GetGlobalValue(RefName)) {
9717         llvm::Constant *AddrRef =
9718             getOrCreateInternalVariable(Addr->getType(), RefName);
9719         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9720         GVAddrRef->setConstant(/*Val=*/true);
9721         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9722         GVAddrRef->setInitializer(Addr);
9723         CGM.addCompilerUsedGlobal(GVAddrRef);
9724       }
9725     }
9726   } else {
9727     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9728             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9729              HasRequiresUnifiedSharedMemory)) &&
9730            "Declare target attribute must link or to with unified memory.");
9731     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9732       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9733     else
9734       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9735 
9736     if (CGM.getLangOpts().OpenMPIsDevice) {
9737       VarName = Addr->getName();
9738       Addr = nullptr;
9739     } else {
9740       VarName = getAddrOfDeclareTargetVar(VD).getName();
9741       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9742     }
9743     VarSize = CGM.getPointerSize();
9744     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9745   }
9746 
9747   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9748       VarName, Addr, VarSize, Flags, Linkage);
9749 }
9750 
9751 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9752   if (isa<FunctionDecl>(GD.getDecl()) ||
9753       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9754     return emitTargetFunctions(GD);
9755 
9756   return emitTargetGlobalVariable(GD);
9757 }
9758 
9759 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9760   for (const VarDecl *VD : DeferredGlobalVariables) {
9761     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9762         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9763     if (!Res)
9764       continue;
9765     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9766         !HasRequiresUnifiedSharedMemory) {
9767       CGM.EmitGlobal(VD);
9768     } else {
9769       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9770               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9771                HasRequiresUnifiedSharedMemory)) &&
9772              "Expected link clause or to clause with unified memory.");
9773       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9774     }
9775   }
9776 }
9777 
9778 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9779     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9780   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9781          " Expected target-based directive.");
9782 }
9783 
9784 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9785     const OMPRequiresDecl *D) {
9786   for (const OMPClause *Clause : D->clauselists()) {
9787     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9788       HasRequiresUnifiedSharedMemory = true;
9789       break;
9790     }
9791   }
9792 }
9793 
9794 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9795                                                        LangAS &AS) {
9796   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9797     return false;
9798   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9799   switch(A->getAllocatorType()) {
9800   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9801   // Not supported, fallback to the default mem space.
9802   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9803   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9804   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9805   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9806   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9807   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9808   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9809     AS = LangAS::Default;
9810     return true;
9811   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9812     llvm_unreachable("Expected predefined allocator for the variables with the "
9813                      "static storage.");
9814   }
9815   return false;
9816 }
9817 
9818 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9819   return HasRequiresUnifiedSharedMemory;
9820 }
9821 
9822 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9823     CodeGenModule &CGM)
9824     : CGM(CGM) {
9825   if (CGM.getLangOpts().OpenMPIsDevice) {
9826     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9827     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9828   }
9829 }
9830 
9831 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9832   if (CGM.getLangOpts().OpenMPIsDevice)
9833     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9834 }
9835 
9836 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9837   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9838     return true;
9839 
9840   StringRef Name = CGM.getMangledName(GD);
9841   const auto *D = cast<FunctionDecl>(GD.getDecl());
9842   // Do not to emit function if it is marked as declare target as it was already
9843   // emitted.
9844   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9845     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9846       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9847         return !F->isDeclaration();
9848       return false;
9849     }
9850     return true;
9851   }
9852 
9853   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9854 }
9855 
9856 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9857   // If we don't have entries or if we are emitting code for the device, we
9858   // don't need to do anything.
9859   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9860       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9861       (OffloadEntriesInfoManager.empty() &&
9862        !HasEmittedDeclareTargetRegion &&
9863        !HasEmittedTargetRegion))
9864     return nullptr;
9865 
9866   // Create and register the function that handles the requires directives.
9867   ASTContext &C = CGM.getContext();
9868 
9869   llvm::Function *RequiresRegFn;
9870   {
9871     CodeGenFunction CGF(CGM);
9872     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9873     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9874     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9875     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9876     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9877     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9878     // TODO: check for other requires clauses.
9879     // The requires directive takes effect only when a target region is
9880     // present in the compilation unit. Otherwise it is ignored and not
9881     // passed to the runtime. This avoids the runtime from throwing an error
9882     // for mismatching requires clauses across compilation units that don't
9883     // contain at least 1 target region.
9884     assert((HasEmittedTargetRegion ||
9885             HasEmittedDeclareTargetRegion ||
9886             !OffloadEntriesInfoManager.empty()) &&
9887            "Target or declare target region expected.");
9888     if (HasRequiresUnifiedSharedMemory)
9889       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9890     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9891         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9892     CGF.FinishFunction();
9893   }
9894   return RequiresRegFn;
9895 }
9896 
9897 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9898   // If we have offloading in the current module, we need to emit the entries
9899   // now and register the offloading descriptor.
9900   createOffloadEntriesAndInfoMetadata();
9901 
9902   // Create and register the offloading binary descriptors. This is the main
9903   // entity that captures all the information about offloading in the current
9904   // compilation unit.
9905   return createOffloadingBinaryDescriptorRegistration();
9906 }
9907 
9908 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9909                                     const OMPExecutableDirective &D,
9910                                     SourceLocation Loc,
9911                                     llvm::Function *OutlinedFn,
9912                                     ArrayRef<llvm::Value *> CapturedVars) {
9913   if (!CGF.HaveInsertPoint())
9914     return;
9915 
9916   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9917   CodeGenFunction::RunCleanupsScope Scope(CGF);
9918 
9919   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9920   llvm::Value *Args[] = {
9921       RTLoc,
9922       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9923       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9924   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9925   RealArgs.append(std::begin(Args), std::end(Args));
9926   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9927 
9928   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9929   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9930 }
9931 
9932 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9933                                          const Expr *NumTeams,
9934                                          const Expr *ThreadLimit,
9935                                          SourceLocation Loc) {
9936   if (!CGF.HaveInsertPoint())
9937     return;
9938 
9939   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9940 
9941   llvm::Value *NumTeamsVal =
9942       NumTeams
9943           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9944                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9945           : CGF.Builder.getInt32(0);
9946 
9947   llvm::Value *ThreadLimitVal =
9948       ThreadLimit
9949           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9950                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9951           : CGF.Builder.getInt32(0);
9952 
9953   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9954   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9955                                      ThreadLimitVal};
9956   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9957                       PushNumTeamsArgs);
9958 }
9959 
9960 void CGOpenMPRuntime::emitTargetDataCalls(
9961     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9962     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9963   if (!CGF.HaveInsertPoint())
9964     return;
9965 
9966   // Action used to replace the default codegen action and turn privatization
9967   // off.
9968   PrePostActionTy NoPrivAction;
9969 
9970   // Generate the code for the opening of the data environment. Capture all the
9971   // arguments of the runtime call by reference because they are used in the
9972   // closing of the region.
9973   auto &&BeginThenGen = [this, &D, Device, &Info,
9974                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9975     // Fill up the arrays with all the mapped variables.
9976     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9977     MappableExprsHandler::MapValuesArrayTy Pointers;
9978     MappableExprsHandler::MapValuesArrayTy Sizes;
9979     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9980 
9981     // Get map clause information.
9982     MappableExprsHandler MCHandler(D, CGF);
9983     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9984 
9985     // Fill up the arrays and create the arguments.
9986     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9987 
9988     llvm::Value *BasePointersArrayArg = nullptr;
9989     llvm::Value *PointersArrayArg = nullptr;
9990     llvm::Value *SizesArrayArg = nullptr;
9991     llvm::Value *MapTypesArrayArg = nullptr;
9992     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9993                                  SizesArrayArg, MapTypesArrayArg, Info);
9994 
9995     // Emit device ID if any.
9996     llvm::Value *DeviceID = nullptr;
9997     if (Device) {
9998       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9999                                            CGF.Int64Ty, /*isSigned=*/true);
10000     } else {
10001       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10002     }
10003 
10004     // Emit the number of elements in the offloading arrays.
10005     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10006 
10007     llvm::Value *OffloadingArgs[] = {
10008         DeviceID,         PointerNum,    BasePointersArrayArg,
10009         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10010     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10011                         OffloadingArgs);
10012 
10013     // If device pointer privatization is required, emit the body of the region
10014     // here. It will have to be duplicated: with and without privatization.
10015     if (!Info.CaptureDeviceAddrMap.empty())
10016       CodeGen(CGF);
10017   };
10018 
10019   // Generate code for the closing of the data region.
10020   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10021                                             PrePostActionTy &) {
10022     assert(Info.isValid() && "Invalid data environment closing arguments.");
10023 
10024     llvm::Value *BasePointersArrayArg = nullptr;
10025     llvm::Value *PointersArrayArg = nullptr;
10026     llvm::Value *SizesArrayArg = nullptr;
10027     llvm::Value *MapTypesArrayArg = nullptr;
10028     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10029                                  SizesArrayArg, MapTypesArrayArg, Info);
10030 
10031     // Emit device ID if any.
10032     llvm::Value *DeviceID = nullptr;
10033     if (Device) {
10034       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10035                                            CGF.Int64Ty, /*isSigned=*/true);
10036     } else {
10037       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10038     }
10039 
10040     // Emit the number of elements in the offloading arrays.
10041     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10042 
10043     llvm::Value *OffloadingArgs[] = {
10044         DeviceID,         PointerNum,    BasePointersArrayArg,
10045         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10046     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10047                         OffloadingArgs);
10048   };
10049 
10050   // If we need device pointer privatization, we need to emit the body of the
10051   // region with no privatization in the 'else' branch of the conditional.
10052   // Otherwise, we don't have to do anything.
10053   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10054                                                          PrePostActionTy &) {
10055     if (!Info.CaptureDeviceAddrMap.empty()) {
10056       CodeGen.setAction(NoPrivAction);
10057       CodeGen(CGF);
10058     }
10059   };
10060 
10061   // We don't have to do anything to close the region if the if clause evaluates
10062   // to false.
10063   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10064 
10065   if (IfCond) {
10066     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10067   } else {
10068     RegionCodeGenTy RCG(BeginThenGen);
10069     RCG(CGF);
10070   }
10071 
10072   // If we don't require privatization of device pointers, we emit the body in
10073   // between the runtime calls. This avoids duplicating the body code.
10074   if (Info.CaptureDeviceAddrMap.empty()) {
10075     CodeGen.setAction(NoPrivAction);
10076     CodeGen(CGF);
10077   }
10078 
10079   if (IfCond) {
10080     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10081   } else {
10082     RegionCodeGenTy RCG(EndThenGen);
10083     RCG(CGF);
10084   }
10085 }
10086 
10087 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10088     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10089     const Expr *Device) {
10090   if (!CGF.HaveInsertPoint())
10091     return;
10092 
10093   assert((isa<OMPTargetEnterDataDirective>(D) ||
10094           isa<OMPTargetExitDataDirective>(D) ||
10095           isa<OMPTargetUpdateDirective>(D)) &&
10096          "Expecting either target enter, exit data, or update directives.");
10097 
10098   CodeGenFunction::OMPTargetDataInfo InputInfo;
10099   llvm::Value *MapTypesArray = nullptr;
10100   // Generate the code for the opening of the data environment.
10101   auto &&ThenGen = [this, &D, Device, &InputInfo,
10102                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10103     // Emit device ID if any.
10104     llvm::Value *DeviceID = nullptr;
10105     if (Device) {
10106       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10107                                            CGF.Int64Ty, /*isSigned=*/true);
10108     } else {
10109       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10110     }
10111 
10112     // Emit the number of elements in the offloading arrays.
10113     llvm::Constant *PointerNum =
10114         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10115 
10116     llvm::Value *OffloadingArgs[] = {DeviceID,
10117                                      PointerNum,
10118                                      InputInfo.BasePointersArray.getPointer(),
10119                                      InputInfo.PointersArray.getPointer(),
10120                                      InputInfo.SizesArray.getPointer(),
10121                                      MapTypesArray};
10122 
10123     // Select the right runtime function call for each expected standalone
10124     // directive.
10125     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10126     OpenMPRTLFunction RTLFn;
10127     switch (D.getDirectiveKind()) {
10128     case OMPD_target_enter_data:
10129       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10130                         : OMPRTL__tgt_target_data_begin;
10131       break;
10132     case OMPD_target_exit_data:
10133       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10134                         : OMPRTL__tgt_target_data_end;
10135       break;
10136     case OMPD_target_update:
10137       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10138                         : OMPRTL__tgt_target_data_update;
10139       break;
10140     case OMPD_parallel:
10141     case OMPD_for:
10142     case OMPD_parallel_for:
10143     case OMPD_parallel_sections:
10144     case OMPD_for_simd:
10145     case OMPD_parallel_for_simd:
10146     case OMPD_cancel:
10147     case OMPD_cancellation_point:
10148     case OMPD_ordered:
10149     case OMPD_threadprivate:
10150     case OMPD_allocate:
10151     case OMPD_task:
10152     case OMPD_simd:
10153     case OMPD_sections:
10154     case OMPD_section:
10155     case OMPD_single:
10156     case OMPD_master:
10157     case OMPD_critical:
10158     case OMPD_taskyield:
10159     case OMPD_barrier:
10160     case OMPD_taskwait:
10161     case OMPD_taskgroup:
10162     case OMPD_atomic:
10163     case OMPD_flush:
10164     case OMPD_teams:
10165     case OMPD_target_data:
10166     case OMPD_distribute:
10167     case OMPD_distribute_simd:
10168     case OMPD_distribute_parallel_for:
10169     case OMPD_distribute_parallel_for_simd:
10170     case OMPD_teams_distribute:
10171     case OMPD_teams_distribute_simd:
10172     case OMPD_teams_distribute_parallel_for:
10173     case OMPD_teams_distribute_parallel_for_simd:
10174     case OMPD_declare_simd:
10175     case OMPD_declare_target:
10176     case OMPD_end_declare_target:
10177     case OMPD_declare_reduction:
10178     case OMPD_declare_mapper:
10179     case OMPD_taskloop:
10180     case OMPD_taskloop_simd:
10181     case OMPD_target:
10182     case OMPD_target_simd:
10183     case OMPD_target_teams_distribute:
10184     case OMPD_target_teams_distribute_simd:
10185     case OMPD_target_teams_distribute_parallel_for:
10186     case OMPD_target_teams_distribute_parallel_for_simd:
10187     case OMPD_target_teams:
10188     case OMPD_target_parallel:
10189     case OMPD_target_parallel_for:
10190     case OMPD_target_parallel_for_simd:
10191     case OMPD_requires:
10192     case OMPD_unknown:
10193       llvm_unreachable("Unexpected standalone target data directive.");
10194       break;
10195     }
10196     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10197   };
10198 
10199   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10200                              CodeGenFunction &CGF, PrePostActionTy &) {
10201     // Fill up the arrays with all the mapped variables.
10202     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10203     MappableExprsHandler::MapValuesArrayTy Pointers;
10204     MappableExprsHandler::MapValuesArrayTy Sizes;
10205     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10206 
10207     // Get map clause information.
10208     MappableExprsHandler MEHandler(D, CGF);
10209     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10210 
10211     TargetDataInfo Info;
10212     // Fill up the arrays and create the arguments.
10213     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10214     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10215                                  Info.PointersArray, Info.SizesArray,
10216                                  Info.MapTypesArray, Info);
10217     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10218     InputInfo.BasePointersArray =
10219         Address(Info.BasePointersArray, CGM.getPointerAlign());
10220     InputInfo.PointersArray =
10221         Address(Info.PointersArray, CGM.getPointerAlign());
10222     InputInfo.SizesArray =
10223         Address(Info.SizesArray, CGM.getPointerAlign());
10224     MapTypesArray = Info.MapTypesArray;
10225     if (D.hasClausesOfKind<OMPDependClause>())
10226       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10227     else
10228       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10229   };
10230 
10231   if (IfCond) {
10232     emitOMPIfClause(CGF, IfCond, TargetThenGen,
10233                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
10234   } else {
10235     RegionCodeGenTy ThenRCG(TargetThenGen);
10236     ThenRCG(CGF);
10237   }
10238 }
10239 
10240 namespace {
10241   /// Kind of parameter in a function with 'declare simd' directive.
10242   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10243   /// Attribute set of the parameter.
10244   struct ParamAttrTy {
10245     ParamKindTy Kind = Vector;
10246     llvm::APSInt StrideOrArg;
10247     llvm::APSInt Alignment;
10248   };
10249 } // namespace
10250 
10251 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10252                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10253   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10254   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10255   // of that clause. The VLEN value must be power of 2.
10256   // In other case the notion of the function`s "characteristic data type" (CDT)
10257   // is used to compute the vector length.
10258   // CDT is defined in the following order:
10259   //   a) For non-void function, the CDT is the return type.
10260   //   b) If the function has any non-uniform, non-linear parameters, then the
10261   //   CDT is the type of the first such parameter.
10262   //   c) If the CDT determined by a) or b) above is struct, union, or class
10263   //   type which is pass-by-value (except for the type that maps to the
10264   //   built-in complex data type), the characteristic data type is int.
10265   //   d) If none of the above three cases is applicable, the CDT is int.
10266   // The VLEN is then determined based on the CDT and the size of vector
10267   // register of that ISA for which current vector version is generated. The
10268   // VLEN is computed using the formula below:
10269   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10270   // where vector register size specified in section 3.2.1 Registers and the
10271   // Stack Frame of original AMD64 ABI document.
10272   QualType RetType = FD->getReturnType();
10273   if (RetType.isNull())
10274     return 0;
10275   ASTContext &C = FD->getASTContext();
10276   QualType CDT;
10277   if (!RetType.isNull() && !RetType->isVoidType()) {
10278     CDT = RetType;
10279   } else {
10280     unsigned Offset = 0;
10281     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10282       if (ParamAttrs[Offset].Kind == Vector)
10283         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10284       ++Offset;
10285     }
10286     if (CDT.isNull()) {
10287       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10288         if (ParamAttrs[I + Offset].Kind == Vector) {
10289           CDT = FD->getParamDecl(I)->getType();
10290           break;
10291         }
10292       }
10293     }
10294   }
10295   if (CDT.isNull())
10296     CDT = C.IntTy;
10297   CDT = CDT->getCanonicalTypeUnqualified();
10298   if (CDT->isRecordType() || CDT->isUnionType())
10299     CDT = C.IntTy;
10300   return C.getTypeSize(CDT);
10301 }
10302 
10303 static void
10304 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10305                            const llvm::APSInt &VLENVal,
10306                            ArrayRef<ParamAttrTy> ParamAttrs,
10307                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10308   struct ISADataTy {
10309     char ISA;
10310     unsigned VecRegSize;
10311   };
10312   ISADataTy ISAData[] = {
10313       {
10314           'b', 128
10315       }, // SSE
10316       {
10317           'c', 256
10318       }, // AVX
10319       {
10320           'd', 256
10321       }, // AVX2
10322       {
10323           'e', 512
10324       }, // AVX512
10325   };
10326   llvm::SmallVector<char, 2> Masked;
10327   switch (State) {
10328   case OMPDeclareSimdDeclAttr::BS_Undefined:
10329     Masked.push_back('N');
10330     Masked.push_back('M');
10331     break;
10332   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10333     Masked.push_back('N');
10334     break;
10335   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10336     Masked.push_back('M');
10337     break;
10338   }
10339   for (char Mask : Masked) {
10340     for (const ISADataTy &Data : ISAData) {
10341       SmallString<256> Buffer;
10342       llvm::raw_svector_ostream Out(Buffer);
10343       Out << "_ZGV" << Data.ISA << Mask;
10344       if (!VLENVal) {
10345         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10346         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10347         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10348       } else {
10349         Out << VLENVal;
10350       }
10351       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10352         switch (ParamAttr.Kind){
10353         case LinearWithVarStride:
10354           Out << 's' << ParamAttr.StrideOrArg;
10355           break;
10356         case Linear:
10357           Out << 'l';
10358           if (!!ParamAttr.StrideOrArg)
10359             Out << ParamAttr.StrideOrArg;
10360           break;
10361         case Uniform:
10362           Out << 'u';
10363           break;
10364         case Vector:
10365           Out << 'v';
10366           break;
10367         }
10368         if (!!ParamAttr.Alignment)
10369           Out << 'a' << ParamAttr.Alignment;
10370       }
10371       Out << '_' << Fn->getName();
10372       Fn->addFnAttr(Out.str());
10373     }
10374   }
10375 }
10376 
10377 // This are the Functions that are needed to mangle the name of the
10378 // vector functions generated by the compiler, according to the rules
10379 // defined in the "Vector Function ABI specifications for AArch64",
10380 // available at
10381 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10382 
10383 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10384 ///
10385 /// TODO: Need to implement the behavior for reference marked with a
10386 /// var or no linear modifiers (1.b in the section). For this, we
10387 /// need to extend ParamKindTy to support the linear modifiers.
10388 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10389   QT = QT.getCanonicalType();
10390 
10391   if (QT->isVoidType())
10392     return false;
10393 
10394   if (Kind == ParamKindTy::Uniform)
10395     return false;
10396 
10397   if (Kind == ParamKindTy::Linear)
10398     return false;
10399 
10400   // TODO: Handle linear references with modifiers
10401 
10402   if (Kind == ParamKindTy::LinearWithVarStride)
10403     return false;
10404 
10405   return true;
10406 }
10407 
10408 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10409 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10410   QT = QT.getCanonicalType();
10411   unsigned Size = C.getTypeSize(QT);
10412 
10413   // Only scalars and complex within 16 bytes wide set PVB to true.
10414   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10415     return false;
10416 
10417   if (QT->isFloatingType())
10418     return true;
10419 
10420   if (QT->isIntegerType())
10421     return true;
10422 
10423   if (QT->isPointerType())
10424     return true;
10425 
10426   // TODO: Add support for complex types (section 3.1.2, item 2).
10427 
10428   return false;
10429 }
10430 
10431 /// Computes the lane size (LS) of a return type or of an input parameter,
10432 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10433 /// TODO: Add support for references, section 3.2.1, item 1.
10434 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10435   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10436     QualType PTy = QT.getCanonicalType()->getPointeeType();
10437     if (getAArch64PBV(PTy, C))
10438       return C.getTypeSize(PTy);
10439   }
10440   if (getAArch64PBV(QT, C))
10441     return C.getTypeSize(QT);
10442 
10443   return C.getTypeSize(C.getUIntPtrType());
10444 }
10445 
10446 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10447 // signature of the scalar function, as defined in 3.2.2 of the
10448 // AAVFABI.
10449 static std::tuple<unsigned, unsigned, bool>
10450 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10451   QualType RetType = FD->getReturnType().getCanonicalType();
10452 
10453   ASTContext &C = FD->getASTContext();
10454 
10455   bool OutputBecomesInput = false;
10456 
10457   llvm::SmallVector<unsigned, 8> Sizes;
10458   if (!RetType->isVoidType()) {
10459     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10460     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10461       OutputBecomesInput = true;
10462   }
10463   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10464     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10465     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10466   }
10467 
10468   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10469   // The LS of a function parameter / return value can only be a power
10470   // of 2, starting from 8 bits, up to 128.
10471   assert(std::all_of(Sizes.begin(), Sizes.end(),
10472                      [](unsigned Size) {
10473                        return Size == 8 || Size == 16 || Size == 32 ||
10474                               Size == 64 || Size == 128;
10475                      }) &&
10476          "Invalid size");
10477 
10478   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10479                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10480                          OutputBecomesInput);
10481 }
10482 
10483 /// Mangle the parameter part of the vector function name according to
10484 /// their OpenMP classification. The mangling function is defined in
10485 /// section 3.5 of the AAVFABI.
10486 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10487   SmallString<256> Buffer;
10488   llvm::raw_svector_ostream Out(Buffer);
10489   for (const auto &ParamAttr : ParamAttrs) {
10490     switch (ParamAttr.Kind) {
10491     case LinearWithVarStride:
10492       Out << "ls" << ParamAttr.StrideOrArg;
10493       break;
10494     case Linear:
10495       Out << 'l';
10496       // Don't print the step value if it is not present or if it is
10497       // equal to 1.
10498       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10499         Out << ParamAttr.StrideOrArg;
10500       break;
10501     case Uniform:
10502       Out << 'u';
10503       break;
10504     case Vector:
10505       Out << 'v';
10506       break;
10507     }
10508 
10509     if (!!ParamAttr.Alignment)
10510       Out << 'a' << ParamAttr.Alignment;
10511   }
10512 
10513   return Out.str();
10514 }
10515 
10516 // Function used to add the attribute. The parameter `VLEN` is
10517 // templated to allow the use of "x" when targeting scalable functions
10518 // for SVE.
10519 template <typename T>
10520 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10521                                  char ISA, StringRef ParSeq,
10522                                  StringRef MangledName, bool OutputBecomesInput,
10523                                  llvm::Function *Fn) {
10524   SmallString<256> Buffer;
10525   llvm::raw_svector_ostream Out(Buffer);
10526   Out << Prefix << ISA << LMask << VLEN;
10527   if (OutputBecomesInput)
10528     Out << "v";
10529   Out << ParSeq << "_" << MangledName;
10530   Fn->addFnAttr(Out.str());
10531 }
10532 
10533 // Helper function to generate the Advanced SIMD names depending on
10534 // the value of the NDS when simdlen is not present.
10535 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10536                                       StringRef Prefix, char ISA,
10537                                       StringRef ParSeq, StringRef MangledName,
10538                                       bool OutputBecomesInput,
10539                                       llvm::Function *Fn) {
10540   switch (NDS) {
10541   case 8:
10542     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10543                          OutputBecomesInput, Fn);
10544     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10545                          OutputBecomesInput, Fn);
10546     break;
10547   case 16:
10548     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10549                          OutputBecomesInput, Fn);
10550     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10551                          OutputBecomesInput, Fn);
10552     break;
10553   case 32:
10554     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10555                          OutputBecomesInput, Fn);
10556     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10557                          OutputBecomesInput, Fn);
10558     break;
10559   case 64:
10560   case 128:
10561     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10562                          OutputBecomesInput, Fn);
10563     break;
10564   default:
10565     llvm_unreachable("Scalar type is too wide.");
10566   }
10567 }
10568 
10569 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10570 static void emitAArch64DeclareSimdFunction(
10571     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10572     ArrayRef<ParamAttrTy> ParamAttrs,
10573     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10574     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10575 
10576   // Get basic data for building the vector signature.
10577   const auto Data = getNDSWDS(FD, ParamAttrs);
10578   const unsigned NDS = std::get<0>(Data);
10579   const unsigned WDS = std::get<1>(Data);
10580   const bool OutputBecomesInput = std::get<2>(Data);
10581 
10582   // Check the values provided via `simdlen` by the user.
10583   // 1. A `simdlen(1)` doesn't produce vector signatures,
10584   if (UserVLEN == 1) {
10585     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10586         DiagnosticsEngine::Warning,
10587         "The clause simdlen(1) has no effect when targeting aarch64.");
10588     CGM.getDiags().Report(SLoc, DiagID);
10589     return;
10590   }
10591 
10592   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10593   // Advanced SIMD output.
10594   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10595     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10596         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10597                                     "power of 2 when targeting Advanced SIMD.");
10598     CGM.getDiags().Report(SLoc, DiagID);
10599     return;
10600   }
10601 
10602   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10603   // limits.
10604   if (ISA == 's' && UserVLEN != 0) {
10605     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10606       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10607           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10608                                       "lanes in the architectural constraints "
10609                                       "for SVE (min is 128-bit, max is "
10610                                       "2048-bit, by steps of 128-bit)");
10611       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10612       return;
10613     }
10614   }
10615 
10616   // Sort out parameter sequence.
10617   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10618   StringRef Prefix = "_ZGV";
10619   // Generate simdlen from user input (if any).
10620   if (UserVLEN) {
10621     if (ISA == 's') {
10622       // SVE generates only a masked function.
10623       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10624                            OutputBecomesInput, Fn);
10625     } else {
10626       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10627       // Advanced SIMD generates one or two functions, depending on
10628       // the `[not]inbranch` clause.
10629       switch (State) {
10630       case OMPDeclareSimdDeclAttr::BS_Undefined:
10631         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10632                              OutputBecomesInput, Fn);
10633         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10634                              OutputBecomesInput, Fn);
10635         break;
10636       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10637         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10638                              OutputBecomesInput, Fn);
10639         break;
10640       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10641         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10642                              OutputBecomesInput, Fn);
10643         break;
10644       }
10645     }
10646   } else {
10647     // If no user simdlen is provided, follow the AAVFABI rules for
10648     // generating the vector length.
10649     if (ISA == 's') {
10650       // SVE, section 3.4.1, item 1.
10651       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10652                            OutputBecomesInput, Fn);
10653     } else {
10654       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10655       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10656       // two vector names depending on the use of the clause
10657       // `[not]inbranch`.
10658       switch (State) {
10659       case OMPDeclareSimdDeclAttr::BS_Undefined:
10660         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10661                                   OutputBecomesInput, Fn);
10662         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10663                                   OutputBecomesInput, Fn);
10664         break;
10665       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10666         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10667                                   OutputBecomesInput, Fn);
10668         break;
10669       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10670         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10671                                   OutputBecomesInput, Fn);
10672         break;
10673       }
10674     }
10675   }
10676 }
10677 
10678 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10679                                               llvm::Function *Fn) {
10680   ASTContext &C = CGM.getContext();
10681   FD = FD->getMostRecentDecl();
10682   // Map params to their positions in function decl.
10683   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10684   if (isa<CXXMethodDecl>(FD))
10685     ParamPositions.try_emplace(FD, 0);
10686   unsigned ParamPos = ParamPositions.size();
10687   for (const ParmVarDecl *P : FD->parameters()) {
10688     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10689     ++ParamPos;
10690   }
10691   while (FD) {
10692     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10693       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10694       // Mark uniform parameters.
10695       for (const Expr *E : Attr->uniforms()) {
10696         E = E->IgnoreParenImpCasts();
10697         unsigned Pos;
10698         if (isa<CXXThisExpr>(E)) {
10699           Pos = ParamPositions[FD];
10700         } else {
10701           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10702                                 ->getCanonicalDecl();
10703           Pos = ParamPositions[PVD];
10704         }
10705         ParamAttrs[Pos].Kind = Uniform;
10706       }
10707       // Get alignment info.
10708       auto NI = Attr->alignments_begin();
10709       for (const Expr *E : Attr->aligneds()) {
10710         E = E->IgnoreParenImpCasts();
10711         unsigned Pos;
10712         QualType ParmTy;
10713         if (isa<CXXThisExpr>(E)) {
10714           Pos = ParamPositions[FD];
10715           ParmTy = E->getType();
10716         } else {
10717           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10718                                 ->getCanonicalDecl();
10719           Pos = ParamPositions[PVD];
10720           ParmTy = PVD->getType();
10721         }
10722         ParamAttrs[Pos].Alignment =
10723             (*NI)
10724                 ? (*NI)->EvaluateKnownConstInt(C)
10725                 : llvm::APSInt::getUnsigned(
10726                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10727                           .getQuantity());
10728         ++NI;
10729       }
10730       // Mark linear parameters.
10731       auto SI = Attr->steps_begin();
10732       auto MI = Attr->modifiers_begin();
10733       for (const Expr *E : Attr->linears()) {
10734         E = E->IgnoreParenImpCasts();
10735         unsigned Pos;
10736         if (isa<CXXThisExpr>(E)) {
10737           Pos = ParamPositions[FD];
10738         } else {
10739           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10740                                 ->getCanonicalDecl();
10741           Pos = ParamPositions[PVD];
10742         }
10743         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10744         ParamAttr.Kind = Linear;
10745         if (*SI) {
10746           Expr::EvalResult Result;
10747           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10748             if (const auto *DRE =
10749                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10750               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10751                 ParamAttr.Kind = LinearWithVarStride;
10752                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10753                     ParamPositions[StridePVD->getCanonicalDecl()]);
10754               }
10755             }
10756           } else {
10757             ParamAttr.StrideOrArg = Result.Val.getInt();
10758           }
10759         }
10760         ++SI;
10761         ++MI;
10762       }
10763       llvm::APSInt VLENVal;
10764       SourceLocation ExprLoc;
10765       const Expr *VLENExpr = Attr->getSimdlen();
10766       if (VLENExpr) {
10767         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10768         ExprLoc = VLENExpr->getExprLoc();
10769       }
10770       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10771       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10772           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10773         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10774       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10775         unsigned VLEN = VLENVal.getExtValue();
10776         StringRef MangledName = Fn->getName();
10777         if (CGM.getTarget().hasFeature("sve"))
10778           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10779                                          MangledName, 's', 128, Fn, ExprLoc);
10780         if (CGM.getTarget().hasFeature("neon"))
10781           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10782                                          MangledName, 'n', 128, Fn, ExprLoc);
10783       }
10784     }
10785     FD = FD->getPreviousDecl();
10786   }
10787 }
10788 
10789 namespace {
10790 /// Cleanup action for doacross support.
10791 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10792 public:
10793   static const int DoacrossFinArgs = 2;
10794 
10795 private:
10796   llvm::FunctionCallee RTLFn;
10797   llvm::Value *Args[DoacrossFinArgs];
10798 
10799 public:
10800   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10801                     ArrayRef<llvm::Value *> CallArgs)
10802       : RTLFn(RTLFn) {
10803     assert(CallArgs.size() == DoacrossFinArgs);
10804     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10805   }
10806   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10807     if (!CGF.HaveInsertPoint())
10808       return;
10809     CGF.EmitRuntimeCall(RTLFn, Args);
10810   }
10811 };
10812 } // namespace
10813 
10814 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10815                                        const OMPLoopDirective &D,
10816                                        ArrayRef<Expr *> NumIterations) {
10817   if (!CGF.HaveInsertPoint())
10818     return;
10819 
10820   ASTContext &C = CGM.getContext();
10821   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10822   RecordDecl *RD;
10823   if (KmpDimTy.isNull()) {
10824     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10825     //  kmp_int64 lo; // lower
10826     //  kmp_int64 up; // upper
10827     //  kmp_int64 st; // stride
10828     // };
10829     RD = C.buildImplicitRecord("kmp_dim");
10830     RD->startDefinition();
10831     addFieldToRecordDecl(C, RD, Int64Ty);
10832     addFieldToRecordDecl(C, RD, Int64Ty);
10833     addFieldToRecordDecl(C, RD, Int64Ty);
10834     RD->completeDefinition();
10835     KmpDimTy = C.getRecordType(RD);
10836   } else {
10837     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10838   }
10839   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10840   QualType ArrayTy =
10841       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10842 
10843   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10844   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10845   enum { LowerFD = 0, UpperFD, StrideFD };
10846   // Fill dims with data.
10847   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10848     LValue DimsLVal = CGF.MakeAddrLValue(
10849         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10850     // dims.upper = num_iterations;
10851     LValue UpperLVal = CGF.EmitLValueForField(
10852         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10853     llvm::Value *NumIterVal =
10854         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10855                                  D.getNumIterations()->getType(), Int64Ty,
10856                                  D.getNumIterations()->getExprLoc());
10857     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10858     // dims.stride = 1;
10859     LValue StrideLVal = CGF.EmitLValueForField(
10860         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10861     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10862                           StrideLVal);
10863   }
10864 
10865   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10866   // kmp_int32 num_dims, struct kmp_dim * dims);
10867   llvm::Value *Args[] = {
10868       emitUpdateLocation(CGF, D.getBeginLoc()),
10869       getThreadID(CGF, D.getBeginLoc()),
10870       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10871       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10872           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10873           CGM.VoidPtrTy)};
10874 
10875   llvm::FunctionCallee RTLFn =
10876       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10877   CGF.EmitRuntimeCall(RTLFn, Args);
10878   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10879       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10880   llvm::FunctionCallee FiniRTLFn =
10881       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10882   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10883                                              llvm::makeArrayRef(FiniArgs));
10884 }
10885 
10886 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10887                                           const OMPDependClause *C) {
10888   QualType Int64Ty =
10889       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10890   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10891   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10892       Int64Ty, Size, ArrayType::Normal, 0);
10893   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10894   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10895     const Expr *CounterVal = C->getLoopData(I);
10896     assert(CounterVal);
10897     llvm::Value *CntVal = CGF.EmitScalarConversion(
10898         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10899         CounterVal->getExprLoc());
10900     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10901                           /*Volatile=*/false, Int64Ty);
10902   }
10903   llvm::Value *Args[] = {
10904       emitUpdateLocation(CGF, C->getBeginLoc()),
10905       getThreadID(CGF, C->getBeginLoc()),
10906       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10907   llvm::FunctionCallee RTLFn;
10908   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10909     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10910   } else {
10911     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10912     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10913   }
10914   CGF.EmitRuntimeCall(RTLFn, Args);
10915 }
10916 
10917 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10918                                llvm::FunctionCallee Callee,
10919                                ArrayRef<llvm::Value *> Args) const {
10920   assert(Loc.isValid() && "Outlined function call location must be valid.");
10921   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10922 
10923   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10924     if (Fn->doesNotThrow()) {
10925       CGF.EmitNounwindRuntimeCall(Fn, Args);
10926       return;
10927     }
10928   }
10929   CGF.EmitRuntimeCall(Callee, Args);
10930 }
10931 
10932 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10933     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10934     ArrayRef<llvm::Value *> Args) const {
10935   emitCall(CGF, Loc, OutlinedFn, Args);
10936 }
10937 
10938 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10939   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10940     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10941       HasEmittedDeclareTargetRegion = true;
10942 }
10943 
10944 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10945                                              const VarDecl *NativeParam,
10946                                              const VarDecl *TargetParam) const {
10947   return CGF.GetAddrOfLocalVar(NativeParam);
10948 }
10949 
10950 namespace {
10951 /// Cleanup action for allocate support.
10952 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10953 public:
10954   static const int CleanupArgs = 3;
10955 
10956 private:
10957   llvm::FunctionCallee RTLFn;
10958   llvm::Value *Args[CleanupArgs];
10959 
10960 public:
10961   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10962                        ArrayRef<llvm::Value *> CallArgs)
10963       : RTLFn(RTLFn) {
10964     assert(CallArgs.size() == CleanupArgs &&
10965            "Size of arguments does not match.");
10966     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10967   }
10968   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10969     if (!CGF.HaveInsertPoint())
10970       return;
10971     CGF.EmitRuntimeCall(RTLFn, Args);
10972   }
10973 };
10974 } // namespace
10975 
10976 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10977                                                    const VarDecl *VD) {
10978   if (!VD)
10979     return Address::invalid();
10980   const VarDecl *CVD = VD->getCanonicalDecl();
10981   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10982     return Address::invalid();
10983   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10984   // Use the default allocation.
10985   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10986       !AA->getAllocator())
10987     return Address::invalid();
10988   llvm::Value *Size;
10989   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10990   if (CVD->getType()->isVariablyModifiedType()) {
10991     Size = CGF.getTypeSize(CVD->getType());
10992     // Align the size: ((size + align - 1) / align) * align
10993     Size = CGF.Builder.CreateNUWAdd(
10994         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10995     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10996     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10997   } else {
10998     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10999     Size = CGM.getSize(Sz.alignTo(Align));
11000   }
11001   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11002   assert(AA->getAllocator() &&
11003          "Expected allocator expression for non-default allocator.");
11004   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11005   // According to the standard, the original allocator type is a enum (integer).
11006   // Convert to pointer type, if required.
11007   if (Allocator->getType()->isIntegerTy())
11008     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11009   else if (Allocator->getType()->isPointerTy())
11010     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11011                                                                 CGM.VoidPtrTy);
11012   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11013 
11014   llvm::Value *Addr =
11015       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11016                           CVD->getName() + ".void.addr");
11017   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11018                                                               Allocator};
11019   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11020 
11021   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11022                                                 llvm::makeArrayRef(FiniArgs));
11023   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11024       Addr,
11025       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11026       CVD->getName() + ".addr");
11027   return Address(Addr, Align);
11028 }
11029 
11030 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11031     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11032     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11033   llvm_unreachable("Not supported in SIMD-only mode");
11034 }
11035 
11036 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11037     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11038     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11039   llvm_unreachable("Not supported in SIMD-only mode");
11040 }
11041 
11042 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11043     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11044     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11045     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11046     bool Tied, unsigned &NumberOfParts) {
11047   llvm_unreachable("Not supported in SIMD-only mode");
11048 }
11049 
11050 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11051                                            SourceLocation Loc,
11052                                            llvm::Function *OutlinedFn,
11053                                            ArrayRef<llvm::Value *> CapturedVars,
11054                                            const Expr *IfCond) {
11055   llvm_unreachable("Not supported in SIMD-only mode");
11056 }
11057 
11058 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11059     CodeGenFunction &CGF, StringRef CriticalName,
11060     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11061     const Expr *Hint) {
11062   llvm_unreachable("Not supported in SIMD-only mode");
11063 }
11064 
11065 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11066                                            const RegionCodeGenTy &MasterOpGen,
11067                                            SourceLocation Loc) {
11068   llvm_unreachable("Not supported in SIMD-only mode");
11069 }
11070 
11071 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11072                                             SourceLocation Loc) {
11073   llvm_unreachable("Not supported in SIMD-only mode");
11074 }
11075 
11076 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11077     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11078     SourceLocation Loc) {
11079   llvm_unreachable("Not supported in SIMD-only mode");
11080 }
11081 
11082 void CGOpenMPSIMDRuntime::emitSingleRegion(
11083     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11084     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11085     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11086     ArrayRef<const Expr *> AssignmentOps) {
11087   llvm_unreachable("Not supported in SIMD-only mode");
11088 }
11089 
11090 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11091                                             const RegionCodeGenTy &OrderedOpGen,
11092                                             SourceLocation Loc,
11093                                             bool IsThreads) {
11094   llvm_unreachable("Not supported in SIMD-only mode");
11095 }
11096 
11097 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11098                                           SourceLocation Loc,
11099                                           OpenMPDirectiveKind Kind,
11100                                           bool EmitChecks,
11101                                           bool ForceSimpleCall) {
11102   llvm_unreachable("Not supported in SIMD-only mode");
11103 }
11104 
11105 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11106     CodeGenFunction &CGF, SourceLocation Loc,
11107     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11108     bool Ordered, const DispatchRTInput &DispatchValues) {
11109   llvm_unreachable("Not supported in SIMD-only mode");
11110 }
11111 
11112 void CGOpenMPSIMDRuntime::emitForStaticInit(
11113     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11114     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11115   llvm_unreachable("Not supported in SIMD-only mode");
11116 }
11117 
11118 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11119     CodeGenFunction &CGF, SourceLocation Loc,
11120     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11121   llvm_unreachable("Not supported in SIMD-only mode");
11122 }
11123 
11124 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11125                                                      SourceLocation Loc,
11126                                                      unsigned IVSize,
11127                                                      bool IVSigned) {
11128   llvm_unreachable("Not supported in SIMD-only mode");
11129 }
11130 
11131 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11132                                               SourceLocation Loc,
11133                                               OpenMPDirectiveKind DKind) {
11134   llvm_unreachable("Not supported in SIMD-only mode");
11135 }
11136 
11137 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11138                                               SourceLocation Loc,
11139                                               unsigned IVSize, bool IVSigned,
11140                                               Address IL, Address LB,
11141                                               Address UB, Address ST) {
11142   llvm_unreachable("Not supported in SIMD-only mode");
11143 }
11144 
11145 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11146                                                llvm::Value *NumThreads,
11147                                                SourceLocation Loc) {
11148   llvm_unreachable("Not supported in SIMD-only mode");
11149 }
11150 
11151 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11152                                              OpenMPProcBindClauseKind ProcBind,
11153                                              SourceLocation Loc) {
11154   llvm_unreachable("Not supported in SIMD-only mode");
11155 }
11156 
11157 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11158                                                     const VarDecl *VD,
11159                                                     Address VDAddr,
11160                                                     SourceLocation Loc) {
11161   llvm_unreachable("Not supported in SIMD-only mode");
11162 }
11163 
11164 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11165     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11166     CodeGenFunction *CGF) {
11167   llvm_unreachable("Not supported in SIMD-only mode");
11168 }
11169 
11170 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11171     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11172   llvm_unreachable("Not supported in SIMD-only mode");
11173 }
11174 
11175 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11176                                     ArrayRef<const Expr *> Vars,
11177                                     SourceLocation Loc) {
11178   llvm_unreachable("Not supported in SIMD-only mode");
11179 }
11180 
11181 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11182                                        const OMPExecutableDirective &D,
11183                                        llvm::Function *TaskFunction,
11184                                        QualType SharedsTy, Address Shareds,
11185                                        const Expr *IfCond,
11186                                        const OMPTaskDataTy &Data) {
11187   llvm_unreachable("Not supported in SIMD-only mode");
11188 }
11189 
11190 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11191     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11192     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11193     const Expr *IfCond, const OMPTaskDataTy &Data) {
11194   llvm_unreachable("Not supported in SIMD-only mode");
11195 }
11196 
11197 void CGOpenMPSIMDRuntime::emitReduction(
11198     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11199     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11200     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11201   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11202   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11203                                  ReductionOps, Options);
11204 }
11205 
11206 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11207     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11208     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11209   llvm_unreachable("Not supported in SIMD-only mode");
11210 }
11211 
11212 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11213                                                   SourceLocation Loc,
11214                                                   ReductionCodeGen &RCG,
11215                                                   unsigned N) {
11216   llvm_unreachable("Not supported in SIMD-only mode");
11217 }
11218 
11219 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11220                                                   SourceLocation Loc,
11221                                                   llvm::Value *ReductionsPtr,
11222                                                   LValue SharedLVal) {
11223   llvm_unreachable("Not supported in SIMD-only mode");
11224 }
11225 
11226 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11227                                            SourceLocation Loc) {
11228   llvm_unreachable("Not supported in SIMD-only mode");
11229 }
11230 
11231 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11232     CodeGenFunction &CGF, SourceLocation Loc,
11233     OpenMPDirectiveKind CancelRegion) {
11234   llvm_unreachable("Not supported in SIMD-only mode");
11235 }
11236 
11237 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11238                                          SourceLocation Loc, const Expr *IfCond,
11239                                          OpenMPDirectiveKind CancelRegion) {
11240   llvm_unreachable("Not supported in SIMD-only mode");
11241 }
11242 
11243 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11244     const OMPExecutableDirective &D, StringRef ParentName,
11245     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11246     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11247   llvm_unreachable("Not supported in SIMD-only mode");
11248 }
11249 
11250 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
11251                                          const OMPExecutableDirective &D,
11252                                          llvm::Function *OutlinedFn,
11253                                          llvm::Value *OutlinedFnID,
11254                                          const Expr *IfCond,
11255                                          const Expr *Device) {
11256   llvm_unreachable("Not supported in SIMD-only mode");
11257 }
11258 
11259 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11260   llvm_unreachable("Not supported in SIMD-only mode");
11261 }
11262 
11263 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11264   llvm_unreachable("Not supported in SIMD-only mode");
11265 }
11266 
11267 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11268   return false;
11269 }
11270 
11271 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
11272   return nullptr;
11273 }
11274 
11275 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11276                                         const OMPExecutableDirective &D,
11277                                         SourceLocation Loc,
11278                                         llvm::Function *OutlinedFn,
11279                                         ArrayRef<llvm::Value *> CapturedVars) {
11280   llvm_unreachable("Not supported in SIMD-only mode");
11281 }
11282 
11283 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11284                                              const Expr *NumTeams,
11285                                              const Expr *ThreadLimit,
11286                                              SourceLocation Loc) {
11287   llvm_unreachable("Not supported in SIMD-only mode");
11288 }
11289 
11290 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11291     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11292     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11293   llvm_unreachable("Not supported in SIMD-only mode");
11294 }
11295 
11296 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11297     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11298     const Expr *Device) {
11299   llvm_unreachable("Not supported in SIMD-only mode");
11300 }
11301 
11302 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11303                                            const OMPLoopDirective &D,
11304                                            ArrayRef<Expr *> NumIterations) {
11305   llvm_unreachable("Not supported in SIMD-only mode");
11306 }
11307 
11308 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11309                                               const OMPDependClause *C) {
11310   llvm_unreachable("Not supported in SIMD-only mode");
11311 }
11312 
11313 const VarDecl *
11314 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11315                                         const VarDecl *NativeParam) const {
11316   llvm_unreachable("Not supported in SIMD-only mode");
11317 }
11318 
11319 Address
11320 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11321                                          const VarDecl *NativeParam,
11322                                          const VarDecl *TargetParam) const {
11323   llvm_unreachable("Not supported in SIMD-only mode");
11324 }
11325