1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51 
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64 
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82   bool hasCancel() const { return HasCancel; }
83 
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87 
88   ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109 
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116 
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122 
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190 
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204 
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210 
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236 
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244 
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253 
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259 
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267 
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274 
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281 
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286 
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293 
294   ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314 
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321 
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326 
327 private:
328   StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349 
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353 
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363 
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370 
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375 
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381 
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386 
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418 
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558 
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705 
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755   // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
756   OMPRTL__tgt_mapper_num_components,
757   // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
758   // *base, void *begin, int64_t size, int64_t type);
759   OMPRTL__tgt_push_mapper_component,
760 };
761 
762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
763 /// region.
764 class CleanupTy final : public EHScopeStack::Cleanup {
765   PrePostActionTy *Action;
766 
767 public:
768   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
769   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
770     if (!CGF.HaveInsertPoint())
771       return;
772     Action->Exit(CGF);
773   }
774 };
775 
776 } // anonymous namespace
777 
778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
779   CodeGenFunction::RunCleanupsScope Scope(CGF);
780   if (PrePostAction) {
781     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
782     Callback(CodeGen, CGF, *PrePostAction);
783   } else {
784     PrePostActionTy Action;
785     Callback(CodeGen, CGF, Action);
786   }
787 }
788 
789 /// Check if the combiner is a call to UDR combiner and if it is so return the
790 /// UDR decl used for reduction.
791 static const OMPDeclareReductionDecl *
792 getReductionInit(const Expr *ReductionOp) {
793   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
794     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
795       if (const auto *DRE =
796               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
797         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
798           return DRD;
799   return nullptr;
800 }
801 
802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
803                                              const OMPDeclareReductionDecl *DRD,
804                                              const Expr *InitOp,
805                                              Address Private, Address Original,
806                                              QualType Ty) {
807   if (DRD->getInitializer()) {
808     std::pair<llvm::Function *, llvm::Function *> Reduction =
809         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
810     const auto *CE = cast<CallExpr>(InitOp);
811     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
812     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
813     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
814     const auto *LHSDRE =
815         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
816     const auto *RHSDRE =
817         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
818     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
819     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
820                             [=]() { return Private; });
821     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
822                             [=]() { return Original; });
823     (void)PrivateScope.Privatize();
824     RValue Func = RValue::get(Reduction.second);
825     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
826     CGF.EmitIgnoredExpr(InitOp);
827   } else {
828     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
829     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
830     auto *GV = new llvm::GlobalVariable(
831         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
832         llvm::GlobalValue::PrivateLinkage, Init, Name);
833     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
834     RValue InitRVal;
835     switch (CGF.getEvaluationKind(Ty)) {
836     case TEK_Scalar:
837       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
838       break;
839     case TEK_Complex:
840       InitRVal =
841           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
842       break;
843     case TEK_Aggregate:
844       InitRVal = RValue::getAggregate(LV.getAddress());
845       break;
846     }
847     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
848     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
849     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
850                          /*IsInitializer=*/false);
851   }
852 }
853 
854 /// Emit initialization of arrays of complex types.
855 /// \param DestAddr Address of the array.
856 /// \param Type Type of array.
857 /// \param Init Initial expression of array.
858 /// \param SrcAddr Address of the original array.
859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
860                                  QualType Type, bool EmitDeclareReductionInit,
861                                  const Expr *Init,
862                                  const OMPDeclareReductionDecl *DRD,
863                                  Address SrcAddr = Address::invalid()) {
864   // Perform element-by-element initialization.
865   QualType ElementTy;
866 
867   // Drill down to the base element type on both arrays.
868   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
869   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
870   DestAddr =
871       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
872   if (DRD)
873     SrcAddr =
874         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
875 
876   llvm::Value *SrcBegin = nullptr;
877   if (DRD)
878     SrcBegin = SrcAddr.getPointer();
879   llvm::Value *DestBegin = DestAddr.getPointer();
880   // Cast from pointer to array type to pointer to single element.
881   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
882   // The basic structure here is a while-do loop.
883   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
884   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
885   llvm::Value *IsEmpty =
886       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
887   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
888 
889   // Enter the loop body, making that address the current address.
890   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
891   CGF.EmitBlock(BodyBB);
892 
893   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
894 
895   llvm::PHINode *SrcElementPHI = nullptr;
896   Address SrcElementCurrent = Address::invalid();
897   if (DRD) {
898     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
899                                           "omp.arraycpy.srcElementPast");
900     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
901     SrcElementCurrent =
902         Address(SrcElementPHI,
903                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
904   }
905   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
906       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
907   DestElementPHI->addIncoming(DestBegin, EntryBB);
908   Address DestElementCurrent =
909       Address(DestElementPHI,
910               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
911 
912   // Emit copy.
913   {
914     CodeGenFunction::RunCleanupsScope InitScope(CGF);
915     if (EmitDeclareReductionInit) {
916       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
917                                        SrcElementCurrent, ElementTy);
918     } else
919       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
920                            /*IsInitializer=*/false);
921   }
922 
923   if (DRD) {
924     // Shift the address forward by one element.
925     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
926         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
927     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
928   }
929 
930   // Shift the address forward by one element.
931   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
932       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
933   // Check whether we've reached the end.
934   llvm::Value *Done =
935       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
936   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
937   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
938 
939   // Done.
940   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
941 }
942 
943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
944   return CGF.EmitOMPSharedLValue(E);
945 }
946 
947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
948                                             const Expr *E) {
949   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
950     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
951   return LValue();
952 }
953 
954 void ReductionCodeGen::emitAggregateInitialization(
955     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
956     const OMPDeclareReductionDecl *DRD) {
957   // Emit VarDecl with copy init for arrays.
958   // Get the address of the original variable captured in current
959   // captured region.
960   const auto *PrivateVD =
961       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
962   bool EmitDeclareReductionInit =
963       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
964   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
965                        EmitDeclareReductionInit,
966                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
967                                                 : PrivateVD->getInit(),
968                        DRD, SharedLVal.getAddress());
969 }
970 
971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
972                                    ArrayRef<const Expr *> Privates,
973                                    ArrayRef<const Expr *> ReductionOps) {
974   ClausesData.reserve(Shareds.size());
975   SharedAddresses.reserve(Shareds.size());
976   Sizes.reserve(Shareds.size());
977   BaseDecls.reserve(Shareds.size());
978   auto IPriv = Privates.begin();
979   auto IRed = ReductionOps.begin();
980   for (const Expr *Ref : Shareds) {
981     ClausesData.emplace_back(Ref, *IPriv, *IRed);
982     std::advance(IPriv, 1);
983     std::advance(IRed, 1);
984   }
985 }
986 
987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
988   assert(SharedAddresses.size() == N &&
989          "Number of generated lvalues must be exactly N.");
990   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
991   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
992   SharedAddresses.emplace_back(First, Second);
993 }
994 
995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
996   const auto *PrivateVD =
997       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
998   QualType PrivateType = PrivateVD->getType();
999   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1000   if (!PrivateType->isVariablyModifiedType()) {
1001     Sizes.emplace_back(
1002         CGF.getTypeSize(
1003             SharedAddresses[N].first.getType().getNonReferenceType()),
1004         nullptr);
1005     return;
1006   }
1007   llvm::Value *Size;
1008   llvm::Value *SizeInChars;
1009   auto *ElemType =
1010       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1011           ->getElementType();
1012   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1013   if (AsArraySection) {
1014     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1015                                      SharedAddresses[N].first.getPointer());
1016     Size = CGF.Builder.CreateNUWAdd(
1017         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1018     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1019   } else {
1020     SizeInChars = CGF.getTypeSize(
1021         SharedAddresses[N].first.getType().getNonReferenceType());
1022     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1023   }
1024   Sizes.emplace_back(SizeInChars, Size);
1025   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1026       CGF,
1027       cast<OpaqueValueExpr>(
1028           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1029       RValue::get(Size));
1030   CGF.EmitVariablyModifiedType(PrivateType);
1031 }
1032 
1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1034                                          llvm::Value *Size) {
1035   const auto *PrivateVD =
1036       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1037   QualType PrivateType = PrivateVD->getType();
1038   if (!PrivateType->isVariablyModifiedType()) {
1039     assert(!Size && !Sizes[N].second &&
1040            "Size should be nullptr for non-variably modified reduction "
1041            "items.");
1042     return;
1043   }
1044   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1045       CGF,
1046       cast<OpaqueValueExpr>(
1047           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1048       RValue::get(Size));
1049   CGF.EmitVariablyModifiedType(PrivateType);
1050 }
1051 
1052 void ReductionCodeGen::emitInitialization(
1053     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1054     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1055   assert(SharedAddresses.size() > N && "No variable was generated");
1056   const auto *PrivateVD =
1057       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1058   const OMPDeclareReductionDecl *DRD =
1059       getReductionInit(ClausesData[N].ReductionOp);
1060   QualType PrivateType = PrivateVD->getType();
1061   PrivateAddr = CGF.Builder.CreateElementBitCast(
1062       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1063   QualType SharedType = SharedAddresses[N].first.getType();
1064   SharedLVal = CGF.MakeAddrLValue(
1065       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1066                                        CGF.ConvertTypeForMem(SharedType)),
1067       SharedType, SharedAddresses[N].first.getBaseInfo(),
1068       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1069   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1070     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1071   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1072     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1073                                      PrivateAddr, SharedLVal.getAddress(),
1074                                      SharedLVal.getType());
1075   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1076              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1077     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1078                          PrivateVD->getType().getQualifiers(),
1079                          /*IsInitializer=*/false);
1080   }
1081 }
1082 
1083 bool ReductionCodeGen::needCleanups(unsigned N) {
1084   const auto *PrivateVD =
1085       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1086   QualType PrivateType = PrivateVD->getType();
1087   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1088   return DTorKind != QualType::DK_none;
1089 }
1090 
1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1092                                     Address PrivateAddr) {
1093   const auto *PrivateVD =
1094       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1095   QualType PrivateType = PrivateVD->getType();
1096   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1097   if (needCleanups(N)) {
1098     PrivateAddr = CGF.Builder.CreateElementBitCast(
1099         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1100     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1101   }
1102 }
1103 
1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1105                           LValue BaseLV) {
1106   BaseTy = BaseTy.getNonReferenceType();
1107   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1108          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1109     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1110       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1111     } else {
1112       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1113       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1114     }
1115     BaseTy = BaseTy->getPointeeType();
1116   }
1117   return CGF.MakeAddrLValue(
1118       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1119                                        CGF.ConvertTypeForMem(ElTy)),
1120       BaseLV.getType(), BaseLV.getBaseInfo(),
1121       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1122 }
1123 
1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1125                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1126                           llvm::Value *Addr) {
1127   Address Tmp = Address::invalid();
1128   Address TopTmp = Address::invalid();
1129   Address MostTopTmp = Address::invalid();
1130   BaseTy = BaseTy.getNonReferenceType();
1131   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1132          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1133     Tmp = CGF.CreateMemTemp(BaseTy);
1134     if (TopTmp.isValid())
1135       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1136     else
1137       MostTopTmp = Tmp;
1138     TopTmp = Tmp;
1139     BaseTy = BaseTy->getPointeeType();
1140   }
1141   llvm::Type *Ty = BaseLVType;
1142   if (Tmp.isValid())
1143     Ty = Tmp.getElementType();
1144   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1145   if (Tmp.isValid()) {
1146     CGF.Builder.CreateStore(Addr, Tmp);
1147     return MostTopTmp;
1148   }
1149   return Address(Addr, BaseLVAlignment);
1150 }
1151 
1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1153   const VarDecl *OrigVD = nullptr;
1154   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1155     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1156     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1157       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1158     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1159       Base = TempASE->getBase()->IgnoreParenImpCasts();
1160     DE = cast<DeclRefExpr>(Base);
1161     OrigVD = cast<VarDecl>(DE->getDecl());
1162   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1163     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1164     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1165       Base = TempASE->getBase()->IgnoreParenImpCasts();
1166     DE = cast<DeclRefExpr>(Base);
1167     OrigVD = cast<VarDecl>(DE->getDecl());
1168   }
1169   return OrigVD;
1170 }
1171 
1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1173                                                Address PrivateAddr) {
1174   const DeclRefExpr *DE;
1175   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1176     BaseDecls.emplace_back(OrigVD);
1177     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1178     LValue BaseLValue =
1179         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1180                     OriginalBaseLValue);
1181     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1182         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1183     llvm::Value *PrivatePointer =
1184         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1185             PrivateAddr.getPointer(),
1186             SharedAddresses[N].first.getAddress().getType());
1187     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1188     return castToBase(CGF, OrigVD->getType(),
1189                       SharedAddresses[N].first.getType(),
1190                       OriginalBaseLValue.getAddress().getType(),
1191                       OriginalBaseLValue.getAlignment(), Ptr);
1192   }
1193   BaseDecls.emplace_back(
1194       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1195   return PrivateAddr;
1196 }
1197 
1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1199   const OMPDeclareReductionDecl *DRD =
1200       getReductionInit(ClausesData[N].ReductionOp);
1201   return DRD && DRD->getInitializer();
1202 }
1203 
1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1205   return CGF.EmitLoadOfPointerLValue(
1206       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1207       getThreadIDVariable()->getType()->castAs<PointerType>());
1208 }
1209 
1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1211   if (!CGF.HaveInsertPoint())
1212     return;
1213   // 1.2.2 OpenMP Language Terminology
1214   // Structured block - An executable statement with a single entry at the
1215   // top and a single exit at the bottom.
1216   // The point of exit cannot be a branch out of the structured block.
1217   // longjmp() and throw() must not violate the entry/exit criteria.
1218   CGF.EHStack.pushTerminate();
1219   CodeGen(CGF);
1220   CGF.EHStack.popTerminate();
1221 }
1222 
1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1224     CodeGenFunction &CGF) {
1225   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1226                             getThreadIDVariable()->getType(),
1227                             AlignmentSource::Decl);
1228 }
1229 
1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1231                                        QualType FieldTy) {
1232   auto *Field = FieldDecl::Create(
1233       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1234       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1235       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1236   Field->setAccess(AS_public);
1237   DC->addDecl(Field);
1238   return Field;
1239 }
1240 
1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1242                                  StringRef Separator)
1243     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1244       OffloadEntriesInfoManager(CGM) {
1245   ASTContext &C = CGM.getContext();
1246   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1247   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1248   RD->startDefinition();
1249   // reserved_1
1250   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1251   // flags
1252   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253   // reserved_2
1254   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255   // reserved_3
1256   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257   // psource
1258   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1259   RD->completeDefinition();
1260   IdentQTy = C.getRecordType(RD);
1261   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1262   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1263 
1264   loadOffloadInfoMetadata();
1265 }
1266 
1267 void CGOpenMPRuntime::clear() {
1268   InternalVars.clear();
1269   // Clean non-target variable declarations possibly used only in debug info.
1270   for (const auto &Data : EmittedNonTargetVariables) {
1271     if (!Data.getValue().pointsToAliveValue())
1272       continue;
1273     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1274     if (!GV)
1275       continue;
1276     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1277       continue;
1278     GV->eraseFromParent();
1279   }
1280 }
1281 
1282 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1283   SmallString<128> Buffer;
1284   llvm::raw_svector_ostream OS(Buffer);
1285   StringRef Sep = FirstSeparator;
1286   for (StringRef Part : Parts) {
1287     OS << Sep << Part;
1288     Sep = Separator;
1289   }
1290   return OS.str();
1291 }
1292 
1293 static llvm::Function *
1294 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1295                           const Expr *CombinerInitializer, const VarDecl *In,
1296                           const VarDecl *Out, bool IsCombiner) {
1297   // void .omp_combiner.(Ty *in, Ty *out);
1298   ASTContext &C = CGM.getContext();
1299   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1300   FunctionArgList Args;
1301   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1302                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1303   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1304                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1305   Args.push_back(&OmpOutParm);
1306   Args.push_back(&OmpInParm);
1307   const CGFunctionInfo &FnInfo =
1308       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1309   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1310   std::string Name = CGM.getOpenMPRuntime().getName(
1311       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1312   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1313                                     Name, &CGM.getModule());
1314   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1315   if (CGM.getLangOpts().Optimize) {
1316     Fn->removeFnAttr(llvm::Attribute::NoInline);
1317     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1318     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1319   }
1320   CodeGenFunction CGF(CGM);
1321   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1322   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1323   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1324                     Out->getLocation());
1325   CodeGenFunction::OMPPrivateScope Scope(CGF);
1326   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1327   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1328     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1329         .getAddress();
1330   });
1331   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1332   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1333     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1334         .getAddress();
1335   });
1336   (void)Scope.Privatize();
1337   if (!IsCombiner && Out->hasInit() &&
1338       !CGF.isTrivialInitializer(Out->getInit())) {
1339     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1340                          Out->getType().getQualifiers(),
1341                          /*IsInitializer=*/true);
1342   }
1343   if (CombinerInitializer)
1344     CGF.EmitIgnoredExpr(CombinerInitializer);
1345   Scope.ForceCleanup();
1346   CGF.FinishFunction();
1347   return Fn;
1348 }
1349 
1350 void CGOpenMPRuntime::emitUserDefinedReduction(
1351     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1352   if (UDRMap.count(D) > 0)
1353     return;
1354   llvm::Function *Combiner = emitCombinerOrInitializer(
1355       CGM, D->getType(), D->getCombiner(),
1356       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1357       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1358       /*IsCombiner=*/true);
1359   llvm::Function *Initializer = nullptr;
1360   if (const Expr *Init = D->getInitializer()) {
1361     Initializer = emitCombinerOrInitializer(
1362         CGM, D->getType(),
1363         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1364                                                                      : nullptr,
1365         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1366         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1367         /*IsCombiner=*/false);
1368   }
1369   UDRMap.try_emplace(D, Combiner, Initializer);
1370   if (CGF) {
1371     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1372     Decls.second.push_back(D);
1373   }
1374 }
1375 
1376 std::pair<llvm::Function *, llvm::Function *>
1377 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1378   auto I = UDRMap.find(D);
1379   if (I != UDRMap.end())
1380     return I->second;
1381   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1382   return UDRMap.lookup(D);
1383 }
1384 
1385 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1386     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1387     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1388     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1389   assert(ThreadIDVar->getType()->isPointerType() &&
1390          "thread id variable must be of type kmp_int32 *");
1391   CodeGenFunction CGF(CGM, true);
1392   bool HasCancel = false;
1393   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1394     HasCancel = OPD->hasCancel();
1395   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1396     HasCancel = OPSD->hasCancel();
1397   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1398     HasCancel = OPFD->hasCancel();
1399   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1400     HasCancel = OPFD->hasCancel();
1401   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1402     HasCancel = OPFD->hasCancel();
1403   else if (const auto *OPFD =
1404                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1405     HasCancel = OPFD->hasCancel();
1406   else if (const auto *OPFD =
1407                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1408     HasCancel = OPFD->hasCancel();
1409   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1410                                     HasCancel, OutlinedHelperName);
1411   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1412   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1413 }
1414 
1415 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1416     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1417     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1418   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1419   return emitParallelOrTeamsOutlinedFunction(
1420       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1421 }
1422 
1423 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1424     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1425     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1426   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1427   return emitParallelOrTeamsOutlinedFunction(
1428       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1429 }
1430 
1431 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1432     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1433     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1434     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1435     bool Tied, unsigned &NumberOfParts) {
1436   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1437                                               PrePostActionTy &) {
1438     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1439     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1440     llvm::Value *TaskArgs[] = {
1441         UpLoc, ThreadID,
1442         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1443                                     TaskTVar->getType()->castAs<PointerType>())
1444             .getPointer()};
1445     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1446   };
1447   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1448                                                             UntiedCodeGen);
1449   CodeGen.setAction(Action);
1450   assert(!ThreadIDVar->getType()->isPointerType() &&
1451          "thread id variable must be of type kmp_int32 for tasks");
1452   const OpenMPDirectiveKind Region =
1453       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1454                                                       : OMPD_task;
1455   const CapturedStmt *CS = D.getCapturedStmt(Region);
1456   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1457   CodeGenFunction CGF(CGM, true);
1458   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1459                                         InnermostKind,
1460                                         TD ? TD->hasCancel() : false, Action);
1461   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1462   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1463   if (!Tied)
1464     NumberOfParts = Action.getNumberOfParts();
1465   return Res;
1466 }
1467 
1468 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1469                              const RecordDecl *RD, const CGRecordLayout &RL,
1470                              ArrayRef<llvm::Constant *> Data) {
1471   llvm::StructType *StructTy = RL.getLLVMType();
1472   unsigned PrevIdx = 0;
1473   ConstantInitBuilder CIBuilder(CGM);
1474   auto DI = Data.begin();
1475   for (const FieldDecl *FD : RD->fields()) {
1476     unsigned Idx = RL.getLLVMFieldNo(FD);
1477     // Fill the alignment.
1478     for (unsigned I = PrevIdx; I < Idx; ++I)
1479       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1480     PrevIdx = Idx + 1;
1481     Fields.add(*DI);
1482     ++DI;
1483   }
1484 }
1485 
1486 template <class... As>
1487 static llvm::GlobalVariable *
1488 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1489                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1490                    As &&... Args) {
1491   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1492   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1493   ConstantInitBuilder CIBuilder(CGM);
1494   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1495   buildStructValue(Fields, CGM, RD, RL, Data);
1496   return Fields.finishAndCreateGlobal(
1497       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1498       std::forward<As>(Args)...);
1499 }
1500 
1501 template <typename T>
1502 static void
1503 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1504                                          ArrayRef<llvm::Constant *> Data,
1505                                          T &Parent) {
1506   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1507   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1508   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1509   buildStructValue(Fields, CGM, RD, RL, Data);
1510   Fields.finishAndAddTo(Parent);
1511 }
1512 
1513 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1514   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1515   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1516   FlagsTy FlagsKey(Flags, Reserved2Flags);
1517   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1518   if (!Entry) {
1519     if (!DefaultOpenMPPSource) {
1520       // Initialize default location for psource field of ident_t structure of
1521       // all ident_t objects. Format is ";file;function;line;column;;".
1522       // Taken from
1523       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1524       DefaultOpenMPPSource =
1525           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1526       DefaultOpenMPPSource =
1527           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1528     }
1529 
1530     llvm::Constant *Data[] = {
1531         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1532         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1533         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1534         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1535     llvm::GlobalValue *DefaultOpenMPLocation =
1536         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1537                            llvm::GlobalValue::PrivateLinkage);
1538     DefaultOpenMPLocation->setUnnamedAddr(
1539         llvm::GlobalValue::UnnamedAddr::Global);
1540 
1541     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1542   }
1543   return Address(Entry, Align);
1544 }
1545 
1546 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1547                                              bool AtCurrentPoint) {
1548   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1549   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1550 
1551   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1552   if (AtCurrentPoint) {
1553     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1554         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1555   } else {
1556     Elem.second.ServiceInsertPt =
1557         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1558     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1559   }
1560 }
1561 
1562 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1563   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1564   if (Elem.second.ServiceInsertPt) {
1565     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1566     Elem.second.ServiceInsertPt = nullptr;
1567     Ptr->eraseFromParent();
1568   }
1569 }
1570 
1571 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1572                                                  SourceLocation Loc,
1573                                                  unsigned Flags) {
1574   Flags |= OMP_IDENT_KMPC;
1575   // If no debug info is generated - return global default location.
1576   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1577       Loc.isInvalid())
1578     return getOrCreateDefaultLocation(Flags).getPointer();
1579 
1580   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1581 
1582   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1583   Address LocValue = Address::invalid();
1584   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1585   if (I != OpenMPLocThreadIDMap.end())
1586     LocValue = Address(I->second.DebugLoc, Align);
1587 
1588   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1589   // GetOpenMPThreadID was called before this routine.
1590   if (!LocValue.isValid()) {
1591     // Generate "ident_t .kmpc_loc.addr;"
1592     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1593     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1594     Elem.second.DebugLoc = AI.getPointer();
1595     LocValue = AI;
1596 
1597     if (!Elem.second.ServiceInsertPt)
1598       setLocThreadIdInsertPt(CGF);
1599     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1600     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1601     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1602                              CGF.getTypeSize(IdentQTy));
1603   }
1604 
1605   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1606   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1607   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1608   LValue PSource =
1609       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1610 
1611   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1612   if (OMPDebugLoc == nullptr) {
1613     SmallString<128> Buffer2;
1614     llvm::raw_svector_ostream OS2(Buffer2);
1615     // Build debug location
1616     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1617     OS2 << ";" << PLoc.getFilename() << ";";
1618     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1619       OS2 << FD->getQualifiedNameAsString();
1620     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1621     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1622     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1623   }
1624   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1625   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1626 
1627   // Our callers always pass this to a runtime function, so for
1628   // convenience, go ahead and return a naked pointer.
1629   return LocValue.getPointer();
1630 }
1631 
1632 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1633                                           SourceLocation Loc) {
1634   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1635 
1636   llvm::Value *ThreadID = nullptr;
1637   // Check whether we've already cached a load of the thread id in this
1638   // function.
1639   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1640   if (I != OpenMPLocThreadIDMap.end()) {
1641     ThreadID = I->second.ThreadID;
1642     if (ThreadID != nullptr)
1643       return ThreadID;
1644   }
1645   // If exceptions are enabled, do not use parameter to avoid possible crash.
1646   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1647       !CGF.getLangOpts().CXXExceptions ||
1648       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1649     if (auto *OMPRegionInfo =
1650             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1651       if (OMPRegionInfo->getThreadIDVariable()) {
1652         // Check if this an outlined function with thread id passed as argument.
1653         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1654         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1655         // If value loaded in entry block, cache it and use it everywhere in
1656         // function.
1657         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1658           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1659           Elem.second.ThreadID = ThreadID;
1660         }
1661         return ThreadID;
1662       }
1663     }
1664   }
1665 
1666   // This is not an outlined function region - need to call __kmpc_int32
1667   // kmpc_global_thread_num(ident_t *loc).
1668   // Generate thread id value and cache this value for use across the
1669   // function.
1670   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671   if (!Elem.second.ServiceInsertPt)
1672     setLocThreadIdInsertPt(CGF);
1673   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1674   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1675   llvm::CallInst *Call = CGF.Builder.CreateCall(
1676       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1677       emitUpdateLocation(CGF, Loc));
1678   Call->setCallingConv(CGF.getRuntimeCC());
1679   Elem.second.ThreadID = Call;
1680   return Call;
1681 }
1682 
1683 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1684   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1685   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1686     clearLocThreadIdInsertPt(CGF);
1687     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1688   }
1689   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1690     for(auto *D : FunctionUDRMap[CGF.CurFn])
1691       UDRMap.erase(D);
1692     FunctionUDRMap.erase(CGF.CurFn);
1693   }
1694   auto I = FunctionUDMMap.find(CGF.CurFn);
1695   if (I != FunctionUDMMap.end()) {
1696     for(auto *D : I->second)
1697       UDMMap.erase(D);
1698     FunctionUDMMap.erase(I);
1699   }
1700 }
1701 
1702 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1703   return IdentTy->getPointerTo();
1704 }
1705 
1706 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1707   if (!Kmpc_MicroTy) {
1708     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1709     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1710                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1711     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1712   }
1713   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1714 }
1715 
1716 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1717   llvm::FunctionCallee RTLFn = nullptr;
1718   switch (static_cast<OpenMPRTLFunction>(Function)) {
1719   case OMPRTL__kmpc_fork_call: {
1720     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1721     // microtask, ...);
1722     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1723                                 getKmpc_MicroPointerTy()};
1724     auto *FnTy =
1725         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1726     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1727     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1728       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1729         llvm::LLVMContext &Ctx = F->getContext();
1730         llvm::MDBuilder MDB(Ctx);
1731         // Annotate the callback behavior of the __kmpc_fork_call:
1732         //  - The callback callee is argument number 2 (microtask).
1733         //  - The first two arguments of the callback callee are unknown (-1).
1734         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1735         //    callback callee.
1736         F->addMetadata(
1737             llvm::LLVMContext::MD_callback,
1738             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1739                                         2, {-1, -1},
1740                                         /* VarArgsArePassed */ true)}));
1741       }
1742     }
1743     break;
1744   }
1745   case OMPRTL__kmpc_global_thread_num: {
1746     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1747     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1751     break;
1752   }
1753   case OMPRTL__kmpc_threadprivate_cached: {
1754     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1755     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1756     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1757                                 CGM.VoidPtrTy, CGM.SizeTy,
1758                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1759     auto *FnTy =
1760         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1762     break;
1763   }
1764   case OMPRTL__kmpc_critical: {
1765     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit);
1767     llvm::Type *TypeParams[] = {
1768         getIdentTyPointerTy(), CGM.Int32Ty,
1769         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1773     break;
1774   }
1775   case OMPRTL__kmpc_critical_with_hint: {
1776     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1777     // kmp_critical_name *crit, uintptr_t hint);
1778     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1779                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1780                                 CGM.IntPtrTy};
1781     auto *FnTy =
1782         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1783     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1784     break;
1785   }
1786   case OMPRTL__kmpc_threadprivate_register: {
1787     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1788     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1789     // typedef void *(*kmpc_ctor)(void *);
1790     auto *KmpcCtorTy =
1791         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1792                                 /*isVarArg*/ false)->getPointerTo();
1793     // typedef void *(*kmpc_cctor)(void *, void *);
1794     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1795     auto *KmpcCopyCtorTy =
1796         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1797                                 /*isVarArg*/ false)
1798             ->getPointerTo();
1799     // typedef void (*kmpc_dtor)(void *);
1800     auto *KmpcDtorTy =
1801         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1802             ->getPointerTo();
1803     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1804                               KmpcCopyCtorTy, KmpcDtorTy};
1805     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1806                                         /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1808     break;
1809   }
1810   case OMPRTL__kmpc_end_critical: {
1811     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1812     // kmp_critical_name *crit);
1813     llvm::Type *TypeParams[] = {
1814         getIdentTyPointerTy(), CGM.Int32Ty,
1815         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1816     auto *FnTy =
1817         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1818     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1819     break;
1820   }
1821   case OMPRTL__kmpc_cancel_barrier: {
1822     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1823     // global_tid);
1824     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1825     auto *FnTy =
1826         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1827     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1828     break;
1829   }
1830   case OMPRTL__kmpc_barrier: {
1831     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1832     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1833     auto *FnTy =
1834         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1835     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1836     break;
1837   }
1838   case OMPRTL__kmpc_for_static_fini: {
1839     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1840     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1841     auto *FnTy =
1842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1843     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1844     break;
1845   }
1846   case OMPRTL__kmpc_push_num_threads: {
1847     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1848     // kmp_int32 num_threads)
1849     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1850                                 CGM.Int32Ty};
1851     auto *FnTy =
1852         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1853     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1854     break;
1855   }
1856   case OMPRTL__kmpc_serialized_parallel: {
1857     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1858     // global_tid);
1859     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1860     auto *FnTy =
1861         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1862     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1863     break;
1864   }
1865   case OMPRTL__kmpc_end_serialized_parallel: {
1866     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1867     // global_tid);
1868     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1869     auto *FnTy =
1870         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1871     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1872     break;
1873   }
1874   case OMPRTL__kmpc_flush: {
1875     // Build void __kmpc_flush(ident_t *loc);
1876     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1877     auto *FnTy =
1878         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1879     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1880     break;
1881   }
1882   case OMPRTL__kmpc_master: {
1883     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1884     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1885     auto *FnTy =
1886         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1887     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1888     break;
1889   }
1890   case OMPRTL__kmpc_end_master: {
1891     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1892     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1893     auto *FnTy =
1894         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1895     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1896     break;
1897   }
1898   case OMPRTL__kmpc_omp_taskyield: {
1899     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1900     // int end_part);
1901     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1902     auto *FnTy =
1903         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1904     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1905     break;
1906   }
1907   case OMPRTL__kmpc_single: {
1908     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1909     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1910     auto *FnTy =
1911         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1912     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1913     break;
1914   }
1915   case OMPRTL__kmpc_end_single: {
1916     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1917     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1918     auto *FnTy =
1919         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1920     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1921     break;
1922   }
1923   case OMPRTL__kmpc_omp_task_alloc: {
1924     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1925     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1926     // kmp_routine_entry_t *task_entry);
1927     assert(KmpRoutineEntryPtrTy != nullptr &&
1928            "Type kmp_routine_entry_t must be created.");
1929     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1930                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1931     // Return void * and then cast to particular kmp_task_t type.
1932     auto *FnTy =
1933         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1934     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1935     break;
1936   }
1937   case OMPRTL__kmpc_omp_target_task_alloc: {
1938     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1939     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1940     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1941     assert(KmpRoutineEntryPtrTy != nullptr &&
1942            "Type kmp_routine_entry_t must be created.");
1943     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1944                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1945                                 CGM.Int64Ty};
1946     // Return void * and then cast to particular kmp_task_t type.
1947     auto *FnTy =
1948         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1949     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1950     break;
1951   }
1952   case OMPRTL__kmpc_omp_task: {
1953     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1954     // *new_task);
1955     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1956                                 CGM.VoidPtrTy};
1957     auto *FnTy =
1958         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1959     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1960     break;
1961   }
1962   case OMPRTL__kmpc_copyprivate: {
1963     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1964     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1965     // kmp_int32 didit);
1966     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1967     auto *CpyFnTy =
1968         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1969     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1970                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1971                                 CGM.Int32Ty};
1972     auto *FnTy =
1973         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1974     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1975     break;
1976   }
1977   case OMPRTL__kmpc_reduce: {
1978     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1979     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1980     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1981     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1982     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1983                                                /*isVarArg=*/false);
1984     llvm::Type *TypeParams[] = {
1985         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1986         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1987         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1988     auto *FnTy =
1989         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1990     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1991     break;
1992   }
1993   case OMPRTL__kmpc_reduce_nowait: {
1994     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1995     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1996     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1997     // *lck);
1998     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1999     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2000                                                /*isVarArg=*/false);
2001     llvm::Type *TypeParams[] = {
2002         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2003         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2004         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2008     break;
2009   }
2010   case OMPRTL__kmpc_end_reduce: {
2011     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2012     // kmp_critical_name *lck);
2013     llvm::Type *TypeParams[] = {
2014         getIdentTyPointerTy(), CGM.Int32Ty,
2015         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2019     break;
2020   }
2021   case OMPRTL__kmpc_end_reduce_nowait: {
2022     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2023     // kmp_critical_name *lck);
2024     llvm::Type *TypeParams[] = {
2025         getIdentTyPointerTy(), CGM.Int32Ty,
2026         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2027     auto *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn =
2030         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2031     break;
2032   }
2033   case OMPRTL__kmpc_omp_task_begin_if0: {
2034     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035     // *new_task);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037                                 CGM.VoidPtrTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040     RTLFn =
2041         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2042     break;
2043   }
2044   case OMPRTL__kmpc_omp_task_complete_if0: {
2045     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2046     // *new_task);
2047     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2048                                 CGM.VoidPtrTy};
2049     auto *FnTy =
2050         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2051     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2052                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2053     break;
2054   }
2055   case OMPRTL__kmpc_ordered: {
2056     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2057     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2058     auto *FnTy =
2059         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2060     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2061     break;
2062   }
2063   case OMPRTL__kmpc_end_ordered: {
2064     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2065     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2066     auto *FnTy =
2067         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2068     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2069     break;
2070   }
2071   case OMPRTL__kmpc_omp_taskwait: {
2072     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2073     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2074     auto *FnTy =
2075         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2076     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2077     break;
2078   }
2079   case OMPRTL__kmpc_taskgroup: {
2080     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2081     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2082     auto *FnTy =
2083         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2084     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2085     break;
2086   }
2087   case OMPRTL__kmpc_end_taskgroup: {
2088     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2089     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2090     auto *FnTy =
2091         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2092     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2093     break;
2094   }
2095   case OMPRTL__kmpc_push_proc_bind: {
2096     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2097     // int proc_bind)
2098     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2099     auto *FnTy =
2100         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2101     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2102     break;
2103   }
2104   case OMPRTL__kmpc_omp_task_with_deps: {
2105     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2106     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2107     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2108     llvm::Type *TypeParams[] = {
2109         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2110         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2111     auto *FnTy =
2112         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2113     RTLFn =
2114         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2115     break;
2116   }
2117   case OMPRTL__kmpc_omp_wait_deps: {
2118     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2119     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2120     // kmp_depend_info_t *noalias_dep_list);
2121     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2122                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2123                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2124     auto *FnTy =
2125         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2126     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2127     break;
2128   }
2129   case OMPRTL__kmpc_cancellationpoint: {
2130     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2131     // global_tid, kmp_int32 cncl_kind)
2132     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2133     auto *FnTy =
2134         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2135     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2136     break;
2137   }
2138   case OMPRTL__kmpc_cancel: {
2139     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2140     // kmp_int32 cncl_kind)
2141     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2142     auto *FnTy =
2143         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2144     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2145     break;
2146   }
2147   case OMPRTL__kmpc_push_num_teams: {
2148     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2149     // kmp_int32 num_teams, kmp_int32 num_threads)
2150     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2151         CGM.Int32Ty};
2152     auto *FnTy =
2153         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2154     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2155     break;
2156   }
2157   case OMPRTL__kmpc_fork_teams: {
2158     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2159     // microtask, ...);
2160     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2161                                 getKmpc_MicroPointerTy()};
2162     auto *FnTy =
2163         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2164     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2165     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2166       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2167         llvm::LLVMContext &Ctx = F->getContext();
2168         llvm::MDBuilder MDB(Ctx);
2169         // Annotate the callback behavior of the __kmpc_fork_teams:
2170         //  - The callback callee is argument number 2 (microtask).
2171         //  - The first two arguments of the callback callee are unknown (-1).
2172         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2173         //    callback callee.
2174         F->addMetadata(
2175             llvm::LLVMContext::MD_callback,
2176             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2177                                         2, {-1, -1},
2178                                         /* VarArgsArePassed */ true)}));
2179       }
2180     }
2181     break;
2182   }
2183   case OMPRTL__kmpc_taskloop: {
2184     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2185     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2186     // sched, kmp_uint64 grainsize, void *task_dup);
2187     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2188                                 CGM.IntTy,
2189                                 CGM.VoidPtrTy,
2190                                 CGM.IntTy,
2191                                 CGM.Int64Ty->getPointerTo(),
2192                                 CGM.Int64Ty->getPointerTo(),
2193                                 CGM.Int64Ty,
2194                                 CGM.IntTy,
2195                                 CGM.IntTy,
2196                                 CGM.Int64Ty,
2197                                 CGM.VoidPtrTy};
2198     auto *FnTy =
2199         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2200     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2201     break;
2202   }
2203   case OMPRTL__kmpc_doacross_init: {
2204     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2205     // num_dims, struct kmp_dim *dims);
2206     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2207                                 CGM.Int32Ty,
2208                                 CGM.Int32Ty,
2209                                 CGM.VoidPtrTy};
2210     auto *FnTy =
2211         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2212     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2213     break;
2214   }
2215   case OMPRTL__kmpc_doacross_fini: {
2216     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2217     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2218     auto *FnTy =
2219         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2220     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2221     break;
2222   }
2223   case OMPRTL__kmpc_doacross_post: {
2224     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2225     // *vec);
2226     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2227                                 CGM.Int64Ty->getPointerTo()};
2228     auto *FnTy =
2229         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2230     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2231     break;
2232   }
2233   case OMPRTL__kmpc_doacross_wait: {
2234     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2235     // *vec);
2236     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2237                                 CGM.Int64Ty->getPointerTo()};
2238     auto *FnTy =
2239         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2240     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2241     break;
2242   }
2243   case OMPRTL__kmpc_task_reduction_init: {
2244     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2245     // *data);
2246     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2247     auto *FnTy =
2248         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2249     RTLFn =
2250         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2251     break;
2252   }
2253   case OMPRTL__kmpc_task_reduction_get_th_data: {
2254     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2255     // *d);
2256     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2257     auto *FnTy =
2258         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2259     RTLFn = CGM.CreateRuntimeFunction(
2260         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2261     break;
2262   }
2263   case OMPRTL__kmpc_alloc: {
2264     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2265     // al); omp_allocator_handle_t type is void *.
2266     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2267     auto *FnTy =
2268         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2269     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2270     break;
2271   }
2272   case OMPRTL__kmpc_free: {
2273     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2274     // al); omp_allocator_handle_t type is void *.
2275     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2276     auto *FnTy =
2277         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2278     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2279     break;
2280   }
2281   case OMPRTL__kmpc_push_target_tripcount: {
2282     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2283     // size);
2284     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2285     llvm::FunctionType *FnTy =
2286         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2287     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2288     break;
2289   }
2290   case OMPRTL__tgt_target: {
2291     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2292     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2293     // *arg_types);
2294     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2295                                 CGM.VoidPtrTy,
2296                                 CGM.Int32Ty,
2297                                 CGM.VoidPtrPtrTy,
2298                                 CGM.VoidPtrPtrTy,
2299                                 CGM.Int64Ty->getPointerTo(),
2300                                 CGM.Int64Ty->getPointerTo()};
2301     auto *FnTy =
2302         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2303     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2304     break;
2305   }
2306   case OMPRTL__tgt_target_nowait: {
2307     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2308     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2309     // int64_t *arg_types);
2310     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2311                                 CGM.VoidPtrTy,
2312                                 CGM.Int32Ty,
2313                                 CGM.VoidPtrPtrTy,
2314                                 CGM.VoidPtrPtrTy,
2315                                 CGM.Int64Ty->getPointerTo(),
2316                                 CGM.Int64Ty->getPointerTo()};
2317     auto *FnTy =
2318         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2319     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2320     break;
2321   }
2322   case OMPRTL__tgt_target_teams: {
2323     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2324     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2325     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2326     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2327                                 CGM.VoidPtrTy,
2328                                 CGM.Int32Ty,
2329                                 CGM.VoidPtrPtrTy,
2330                                 CGM.VoidPtrPtrTy,
2331                                 CGM.Int64Ty->getPointerTo(),
2332                                 CGM.Int64Ty->getPointerTo(),
2333                                 CGM.Int32Ty,
2334                                 CGM.Int32Ty};
2335     auto *FnTy =
2336         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2337     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2338     break;
2339   }
2340   case OMPRTL__tgt_target_teams_nowait: {
2341     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2342     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2343     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2344     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2345                                 CGM.VoidPtrTy,
2346                                 CGM.Int32Ty,
2347                                 CGM.VoidPtrPtrTy,
2348                                 CGM.VoidPtrPtrTy,
2349                                 CGM.Int64Ty->getPointerTo(),
2350                                 CGM.Int64Ty->getPointerTo(),
2351                                 CGM.Int32Ty,
2352                                 CGM.Int32Ty};
2353     auto *FnTy =
2354         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2355     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2356     break;
2357   }
2358   case OMPRTL__tgt_register_requires: {
2359     // Build void __tgt_register_requires(int64_t flags);
2360     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2361     auto *FnTy =
2362         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2363     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2364     break;
2365   }
2366   case OMPRTL__tgt_register_lib: {
2367     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2368     QualType ParamTy =
2369         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2370     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2371     auto *FnTy =
2372         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2373     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2374     break;
2375   }
2376   case OMPRTL__tgt_unregister_lib: {
2377     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2378     QualType ParamTy =
2379         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2380     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2381     auto *FnTy =
2382         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2383     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2384     break;
2385   }
2386   case OMPRTL__tgt_target_data_begin: {
2387     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2388     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2389     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2390                                 CGM.Int32Ty,
2391                                 CGM.VoidPtrPtrTy,
2392                                 CGM.VoidPtrPtrTy,
2393                                 CGM.Int64Ty->getPointerTo(),
2394                                 CGM.Int64Ty->getPointerTo()};
2395     auto *FnTy =
2396         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2397     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2398     break;
2399   }
2400   case OMPRTL__tgt_target_data_begin_nowait: {
2401     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2402     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2403     // *arg_types);
2404     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2405                                 CGM.Int32Ty,
2406                                 CGM.VoidPtrPtrTy,
2407                                 CGM.VoidPtrPtrTy,
2408                                 CGM.Int64Ty->getPointerTo(),
2409                                 CGM.Int64Ty->getPointerTo()};
2410     auto *FnTy =
2411         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2412     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2413     break;
2414   }
2415   case OMPRTL__tgt_target_data_end: {
2416     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2417     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2418     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2419                                 CGM.Int32Ty,
2420                                 CGM.VoidPtrPtrTy,
2421                                 CGM.VoidPtrPtrTy,
2422                                 CGM.Int64Ty->getPointerTo(),
2423                                 CGM.Int64Ty->getPointerTo()};
2424     auto *FnTy =
2425         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2426     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2427     break;
2428   }
2429   case OMPRTL__tgt_target_data_end_nowait: {
2430     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2431     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2432     // *arg_types);
2433     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2434                                 CGM.Int32Ty,
2435                                 CGM.VoidPtrPtrTy,
2436                                 CGM.VoidPtrPtrTy,
2437                                 CGM.Int64Ty->getPointerTo(),
2438                                 CGM.Int64Ty->getPointerTo()};
2439     auto *FnTy =
2440         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2441     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2442     break;
2443   }
2444   case OMPRTL__tgt_target_data_update: {
2445     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2446     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2447     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2448                                 CGM.Int32Ty,
2449                                 CGM.VoidPtrPtrTy,
2450                                 CGM.VoidPtrPtrTy,
2451                                 CGM.Int64Ty->getPointerTo(),
2452                                 CGM.Int64Ty->getPointerTo()};
2453     auto *FnTy =
2454         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2455     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2456     break;
2457   }
2458   case OMPRTL__tgt_target_data_update_nowait: {
2459     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2460     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2461     // *arg_types);
2462     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2463                                 CGM.Int32Ty,
2464                                 CGM.VoidPtrPtrTy,
2465                                 CGM.VoidPtrPtrTy,
2466                                 CGM.Int64Ty->getPointerTo(),
2467                                 CGM.Int64Ty->getPointerTo()};
2468     auto *FnTy =
2469         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2470     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2471     break;
2472   }
2473   case OMPRTL__tgt_mapper_num_components: {
2474     // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2475     llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2476     auto *FnTy =
2477         llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2478     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2479     break;
2480   }
2481   case OMPRTL__tgt_push_mapper_component: {
2482     // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2483     // *base, void *begin, int64_t size, int64_t type);
2484     llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2485                                 CGM.Int64Ty, CGM.Int64Ty};
2486     auto *FnTy =
2487         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2488     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2489     break;
2490   }
2491   }
2492   assert(RTLFn && "Unable to find OpenMP runtime function");
2493   return RTLFn;
2494 }
2495 
2496 llvm::FunctionCallee
2497 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2498   assert((IVSize == 32 || IVSize == 64) &&
2499          "IV size is not compatible with the omp runtime");
2500   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2501                                             : "__kmpc_for_static_init_4u")
2502                                 : (IVSigned ? "__kmpc_for_static_init_8"
2503                                             : "__kmpc_for_static_init_8u");
2504   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2505   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2506   llvm::Type *TypeParams[] = {
2507     getIdentTyPointerTy(),                     // loc
2508     CGM.Int32Ty,                               // tid
2509     CGM.Int32Ty,                               // schedtype
2510     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2511     PtrTy,                                     // p_lower
2512     PtrTy,                                     // p_upper
2513     PtrTy,                                     // p_stride
2514     ITy,                                       // incr
2515     ITy                                        // chunk
2516   };
2517   auto *FnTy =
2518       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519   return CGM.CreateRuntimeFunction(FnTy, Name);
2520 }
2521 
2522 llvm::FunctionCallee
2523 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2524   assert((IVSize == 32 || IVSize == 64) &&
2525          "IV size is not compatible with the omp runtime");
2526   StringRef Name =
2527       IVSize == 32
2528           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2529           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2530   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2531   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2532                                CGM.Int32Ty,           // tid
2533                                CGM.Int32Ty,           // schedtype
2534                                ITy,                   // lower
2535                                ITy,                   // upper
2536                                ITy,                   // stride
2537                                ITy                    // chunk
2538   };
2539   auto *FnTy =
2540       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2541   return CGM.CreateRuntimeFunction(FnTy, Name);
2542 }
2543 
2544 llvm::FunctionCallee
2545 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2546   assert((IVSize == 32 || IVSize == 64) &&
2547          "IV size is not compatible with the omp runtime");
2548   StringRef Name =
2549       IVSize == 32
2550           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2551           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2552   llvm::Type *TypeParams[] = {
2553       getIdentTyPointerTy(), // loc
2554       CGM.Int32Ty,           // tid
2555   };
2556   auto *FnTy =
2557       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2558   return CGM.CreateRuntimeFunction(FnTy, Name);
2559 }
2560 
2561 llvm::FunctionCallee
2562 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2563   assert((IVSize == 32 || IVSize == 64) &&
2564          "IV size is not compatible with the omp runtime");
2565   StringRef Name =
2566       IVSize == 32
2567           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2568           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2569   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2570   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2571   llvm::Type *TypeParams[] = {
2572     getIdentTyPointerTy(),                     // loc
2573     CGM.Int32Ty,                               // tid
2574     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2575     PtrTy,                                     // p_lower
2576     PtrTy,                                     // p_upper
2577     PtrTy                                      // p_stride
2578   };
2579   auto *FnTy =
2580       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2581   return CGM.CreateRuntimeFunction(FnTy, Name);
2582 }
2583 
2584 /// Obtain information that uniquely identifies a target entry. This
2585 /// consists of the file and device IDs as well as line number associated with
2586 /// the relevant entry source location.
2587 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2588                                      unsigned &DeviceID, unsigned &FileID,
2589                                      unsigned &LineNum) {
2590   SourceManager &SM = C.getSourceManager();
2591 
2592   // The loc should be always valid and have a file ID (the user cannot use
2593   // #pragma directives in macros)
2594 
2595   assert(Loc.isValid() && "Source location is expected to be always valid.");
2596 
2597   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2598   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2599 
2600   llvm::sys::fs::UniqueID ID;
2601   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2602     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2603         << PLoc.getFilename() << EC.message();
2604 
2605   DeviceID = ID.getDevice();
2606   FileID = ID.getFile();
2607   LineNum = PLoc.getLine();
2608 }
2609 
2610 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2611   if (CGM.getLangOpts().OpenMPSimd)
2612     return Address::invalid();
2613   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2614       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2615   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2616               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2617                HasRequiresUnifiedSharedMemory))) {
2618     SmallString<64> PtrName;
2619     {
2620       llvm::raw_svector_ostream OS(PtrName);
2621       OS << CGM.getMangledName(GlobalDecl(VD));
2622       if (!VD->isExternallyVisible()) {
2623         unsigned DeviceID, FileID, Line;
2624         getTargetEntryUniqueInfo(CGM.getContext(),
2625                                  VD->getCanonicalDecl()->getBeginLoc(),
2626                                  DeviceID, FileID, Line);
2627         OS << llvm::format("_%x", FileID);
2628       }
2629       OS << "_decl_tgt_ref_ptr";
2630     }
2631     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2632     if (!Ptr) {
2633       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2634       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2635                                         PtrName);
2636 
2637       auto *GV = cast<llvm::GlobalVariable>(Ptr);
2638       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2639 
2640       if (!CGM.getLangOpts().OpenMPIsDevice)
2641         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2642       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2643     }
2644     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2645   }
2646   return Address::invalid();
2647 }
2648 
2649 llvm::Constant *
2650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2651   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2652          !CGM.getContext().getTargetInfo().isTLSSupported());
2653   // Lookup the entry, lazily creating it if necessary.
2654   std::string Suffix = getName({"cache", ""});
2655   return getOrCreateInternalVariable(
2656       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2657 }
2658 
2659 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2660                                                 const VarDecl *VD,
2661                                                 Address VDAddr,
2662                                                 SourceLocation Loc) {
2663   if (CGM.getLangOpts().OpenMPUseTLS &&
2664       CGM.getContext().getTargetInfo().isTLSSupported())
2665     return VDAddr;
2666 
2667   llvm::Type *VarTy = VDAddr.getElementType();
2668   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2669                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2670                                                        CGM.Int8PtrTy),
2671                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2672                          getOrCreateThreadPrivateCache(VD)};
2673   return Address(CGF.EmitRuntimeCall(
2674       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2675                  VDAddr.getAlignment());
2676 }
2677 
2678 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2679     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2680     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2681   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2682   // library.
2683   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2684   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2685                       OMPLoc);
2686   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2687   // to register constructor/destructor for variable.
2688   llvm::Value *Args[] = {
2689       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2690       Ctor, CopyCtor, Dtor};
2691   CGF.EmitRuntimeCall(
2692       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2693 }
2694 
2695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2696     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2697     bool PerformInit, CodeGenFunction *CGF) {
2698   if (CGM.getLangOpts().OpenMPUseTLS &&
2699       CGM.getContext().getTargetInfo().isTLSSupported())
2700     return nullptr;
2701 
2702   VD = VD->getDefinition(CGM.getContext());
2703   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2704     QualType ASTTy = VD->getType();
2705 
2706     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2707     const Expr *Init = VD->getAnyInitializer();
2708     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2709       // Generate function that re-emits the declaration's initializer into the
2710       // threadprivate copy of the variable VD
2711       CodeGenFunction CtorCGF(CGM);
2712       FunctionArgList Args;
2713       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2714                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2715                             ImplicitParamDecl::Other);
2716       Args.push_back(&Dst);
2717 
2718       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2719           CGM.getContext().VoidPtrTy, Args);
2720       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2721       std::string Name = getName({"__kmpc_global_ctor_", ""});
2722       llvm::Function *Fn =
2723           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2724       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2725                             Args, Loc, Loc);
2726       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2727           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2728           CGM.getContext().VoidPtrTy, Dst.getLocation());
2729       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2730       Arg = CtorCGF.Builder.CreateElementBitCast(
2731           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2732       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2733                                /*IsInitializer=*/true);
2734       ArgVal = CtorCGF.EmitLoadOfScalar(
2735           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2736           CGM.getContext().VoidPtrTy, Dst.getLocation());
2737       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2738       CtorCGF.FinishFunction();
2739       Ctor = Fn;
2740     }
2741     if (VD->getType().isDestructedType() != QualType::DK_none) {
2742       // Generate function that emits destructor call for the threadprivate copy
2743       // of the variable VD
2744       CodeGenFunction DtorCGF(CGM);
2745       FunctionArgList Args;
2746       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2747                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2748                             ImplicitParamDecl::Other);
2749       Args.push_back(&Dst);
2750 
2751       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2752           CGM.getContext().VoidTy, Args);
2753       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2754       std::string Name = getName({"__kmpc_global_dtor_", ""});
2755       llvm::Function *Fn =
2756           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2757       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2758       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2759                             Loc, Loc);
2760       // Create a scope with an artificial location for the body of this function.
2761       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2762       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2763           DtorCGF.GetAddrOfLocalVar(&Dst),
2764           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2765       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2766                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2767                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2768       DtorCGF.FinishFunction();
2769       Dtor = Fn;
2770     }
2771     // Do not emit init function if it is not required.
2772     if (!Ctor && !Dtor)
2773       return nullptr;
2774 
2775     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2776     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2777                                                /*isVarArg=*/false)
2778                            ->getPointerTo();
2779     // Copying constructor for the threadprivate variable.
2780     // Must be NULL - reserved by runtime, but currently it requires that this
2781     // parameter is always NULL. Otherwise it fires assertion.
2782     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2783     if (Ctor == nullptr) {
2784       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2785                                              /*isVarArg=*/false)
2786                          ->getPointerTo();
2787       Ctor = llvm::Constant::getNullValue(CtorTy);
2788     }
2789     if (Dtor == nullptr) {
2790       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2791                                              /*isVarArg=*/false)
2792                          ->getPointerTo();
2793       Dtor = llvm::Constant::getNullValue(DtorTy);
2794     }
2795     if (!CGF) {
2796       auto *InitFunctionTy =
2797           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2798       std::string Name = getName({"__omp_threadprivate_init_", ""});
2799       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2800           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2801       CodeGenFunction InitCGF(CGM);
2802       FunctionArgList ArgList;
2803       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2804                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2805                             Loc, Loc);
2806       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2807       InitCGF.FinishFunction();
2808       return InitFunction;
2809     }
2810     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2811   }
2812   return nullptr;
2813 }
2814 
2815 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2816                                                      llvm::GlobalVariable *Addr,
2817                                                      bool PerformInit) {
2818   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2819       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2820   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2821       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2822        HasRequiresUnifiedSharedMemory))
2823     return CGM.getLangOpts().OpenMPIsDevice;
2824   VD = VD->getDefinition(CGM.getContext());
2825   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2826     return CGM.getLangOpts().OpenMPIsDevice;
2827 
2828   QualType ASTTy = VD->getType();
2829 
2830   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2831   // Produce the unique prefix to identify the new target regions. We use
2832   // the source location of the variable declaration which we know to not
2833   // conflict with any target region.
2834   unsigned DeviceID;
2835   unsigned FileID;
2836   unsigned Line;
2837   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2838   SmallString<128> Buffer, Out;
2839   {
2840     llvm::raw_svector_ostream OS(Buffer);
2841     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2842        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2843   }
2844 
2845   const Expr *Init = VD->getAnyInitializer();
2846   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2847     llvm::Constant *Ctor;
2848     llvm::Constant *ID;
2849     if (CGM.getLangOpts().OpenMPIsDevice) {
2850       // Generate function that re-emits the declaration's initializer into
2851       // the threadprivate copy of the variable VD
2852       CodeGenFunction CtorCGF(CGM);
2853 
2854       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2855       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2856       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2857           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2858       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2859       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2860                             FunctionArgList(), Loc, Loc);
2861       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2862       CtorCGF.EmitAnyExprToMem(Init,
2863                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2864                                Init->getType().getQualifiers(),
2865                                /*IsInitializer=*/true);
2866       CtorCGF.FinishFunction();
2867       Ctor = Fn;
2868       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2869       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2870     } else {
2871       Ctor = new llvm::GlobalVariable(
2872           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2873           llvm::GlobalValue::PrivateLinkage,
2874           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2875       ID = Ctor;
2876     }
2877 
2878     // Register the information for the entry associated with the constructor.
2879     Out.clear();
2880     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2881         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2882         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2883   }
2884   if (VD->getType().isDestructedType() != QualType::DK_none) {
2885     llvm::Constant *Dtor;
2886     llvm::Constant *ID;
2887     if (CGM.getLangOpts().OpenMPIsDevice) {
2888       // Generate function that emits destructor call for the threadprivate
2889       // copy of the variable VD
2890       CodeGenFunction DtorCGF(CGM);
2891 
2892       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2893       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2894       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2895           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2896       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2897       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2898                             FunctionArgList(), Loc, Loc);
2899       // Create a scope with an artificial location for the body of this
2900       // function.
2901       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2902       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2903                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2904                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2905       DtorCGF.FinishFunction();
2906       Dtor = Fn;
2907       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2908       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2909     } else {
2910       Dtor = new llvm::GlobalVariable(
2911           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2912           llvm::GlobalValue::PrivateLinkage,
2913           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2914       ID = Dtor;
2915     }
2916     // Register the information for the entry associated with the destructor.
2917     Out.clear();
2918     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2919         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2920         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2921   }
2922   return CGM.getLangOpts().OpenMPIsDevice;
2923 }
2924 
2925 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2926                                                           QualType VarType,
2927                                                           StringRef Name) {
2928   std::string Suffix = getName({"artificial", ""});
2929   std::string CacheSuffix = getName({"cache", ""});
2930   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2931   llvm::Value *GAddr =
2932       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2933   llvm::Value *Args[] = {
2934       emitUpdateLocation(CGF, SourceLocation()),
2935       getThreadID(CGF, SourceLocation()),
2936       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2937       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2938                                 /*isSigned=*/false),
2939       getOrCreateInternalVariable(
2940           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2941   return Address(
2942       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2943           CGF.EmitRuntimeCall(
2944               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2945           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2946       CGM.getPointerAlign());
2947 }
2948 
2949 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2950                                       const RegionCodeGenTy &ThenGen,
2951                                       const RegionCodeGenTy &ElseGen) {
2952   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2953 
2954   // If the condition constant folds and can be elided, try to avoid emitting
2955   // the condition and the dead arm of the if/else.
2956   bool CondConstant;
2957   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2958     if (CondConstant)
2959       ThenGen(CGF);
2960     else
2961       ElseGen(CGF);
2962     return;
2963   }
2964 
2965   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2966   // emit the conditional branch.
2967   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2968   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2969   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2970   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2971 
2972   // Emit the 'then' code.
2973   CGF.EmitBlock(ThenBlock);
2974   ThenGen(CGF);
2975   CGF.EmitBranch(ContBlock);
2976   // Emit the 'else' code if present.
2977   // There is no need to emit line number for unconditional branch.
2978   (void)ApplyDebugLocation::CreateEmpty(CGF);
2979   CGF.EmitBlock(ElseBlock);
2980   ElseGen(CGF);
2981   // There is no need to emit line number for unconditional branch.
2982   (void)ApplyDebugLocation::CreateEmpty(CGF);
2983   CGF.EmitBranch(ContBlock);
2984   // Emit the continuation block for code after the if.
2985   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2986 }
2987 
2988 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2989                                        llvm::Function *OutlinedFn,
2990                                        ArrayRef<llvm::Value *> CapturedVars,
2991                                        const Expr *IfCond) {
2992   if (!CGF.HaveInsertPoint())
2993     return;
2994   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2995   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2996                                                      PrePostActionTy &) {
2997     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2998     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2999     llvm::Value *Args[] = {
3000         RTLoc,
3001         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3002         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3003     llvm::SmallVector<llvm::Value *, 16> RealArgs;
3004     RealArgs.append(std::begin(Args), std::end(Args));
3005     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3006 
3007     llvm::FunctionCallee RTLFn =
3008         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3009     CGF.EmitRuntimeCall(RTLFn, RealArgs);
3010   };
3011   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3012                                                           PrePostActionTy &) {
3013     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3014     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3015     // Build calls:
3016     // __kmpc_serialized_parallel(&Loc, GTid);
3017     llvm::Value *Args[] = {RTLoc, ThreadID};
3018     CGF.EmitRuntimeCall(
3019         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3020 
3021     // OutlinedFn(&GTid, &zero, CapturedStruct);
3022     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3023                                                         /*Name*/ ".zero.addr");
3024     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
3025     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3026     // ThreadId for serialized parallels is 0.
3027     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3028     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
3029     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3030     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3031 
3032     // __kmpc_end_serialized_parallel(&Loc, GTid);
3033     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3034     CGF.EmitRuntimeCall(
3035         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3036         EndArgs);
3037   };
3038   if (IfCond) {
3039     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3040   } else {
3041     RegionCodeGenTy ThenRCG(ThenGen);
3042     ThenRCG(CGF);
3043   }
3044 }
3045 
3046 // If we're inside an (outlined) parallel region, use the region info's
3047 // thread-ID variable (it is passed in a first argument of the outlined function
3048 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3049 // regular serial code region, get thread ID by calling kmp_int32
3050 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3051 // return the address of that temp.
3052 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3053                                              SourceLocation Loc) {
3054   if (auto *OMPRegionInfo =
3055           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3056     if (OMPRegionInfo->getThreadIDVariable())
3057       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3058 
3059   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3060   QualType Int32Ty =
3061       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3062   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3063   CGF.EmitStoreOfScalar(ThreadID,
3064                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3065 
3066   return ThreadIDTemp;
3067 }
3068 
3069 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3070     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3071   SmallString<256> Buffer;
3072   llvm::raw_svector_ostream Out(Buffer);
3073   Out << Name;
3074   StringRef RuntimeName = Out.str();
3075   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3076   if (Elem.second) {
3077     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3078            "OMP internal variable has different type than requested");
3079     return &*Elem.second;
3080   }
3081 
3082   return Elem.second = new llvm::GlobalVariable(
3083              CGM.getModule(), Ty, /*IsConstant*/ false,
3084              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3085              Elem.first(), /*InsertBefore=*/nullptr,
3086              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3087 }
3088 
3089 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3090   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3091   std::string Name = getName({Prefix, "var"});
3092   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3093 }
3094 
3095 namespace {
3096 /// Common pre(post)-action for different OpenMP constructs.
3097 class CommonActionTy final : public PrePostActionTy {
3098   llvm::FunctionCallee EnterCallee;
3099   ArrayRef<llvm::Value *> EnterArgs;
3100   llvm::FunctionCallee ExitCallee;
3101   ArrayRef<llvm::Value *> ExitArgs;
3102   bool Conditional;
3103   llvm::BasicBlock *ContBlock = nullptr;
3104 
3105 public:
3106   CommonActionTy(llvm::FunctionCallee EnterCallee,
3107                  ArrayRef<llvm::Value *> EnterArgs,
3108                  llvm::FunctionCallee ExitCallee,
3109                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3110       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3111         ExitArgs(ExitArgs), Conditional(Conditional) {}
3112   void Enter(CodeGenFunction &CGF) override {
3113     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3114     if (Conditional) {
3115       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3116       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3117       ContBlock = CGF.createBasicBlock("omp_if.end");
3118       // Generate the branch (If-stmt)
3119       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3120       CGF.EmitBlock(ThenBlock);
3121     }
3122   }
3123   void Done(CodeGenFunction &CGF) {
3124     // Emit the rest of blocks/branches
3125     CGF.EmitBranch(ContBlock);
3126     CGF.EmitBlock(ContBlock, true);
3127   }
3128   void Exit(CodeGenFunction &CGF) override {
3129     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3130   }
3131 };
3132 } // anonymous namespace
3133 
3134 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3135                                          StringRef CriticalName,
3136                                          const RegionCodeGenTy &CriticalOpGen,
3137                                          SourceLocation Loc, const Expr *Hint) {
3138   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3139   // CriticalOpGen();
3140   // __kmpc_end_critical(ident_t *, gtid, Lock);
3141   // Prepare arguments and build a call to __kmpc_critical
3142   if (!CGF.HaveInsertPoint())
3143     return;
3144   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3145                          getCriticalRegionLock(CriticalName)};
3146   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3147                                                 std::end(Args));
3148   if (Hint) {
3149     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3150         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3151   }
3152   CommonActionTy Action(
3153       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3154                                  : OMPRTL__kmpc_critical),
3155       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3156   CriticalOpGen.setAction(Action);
3157   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3158 }
3159 
3160 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3161                                        const RegionCodeGenTy &MasterOpGen,
3162                                        SourceLocation Loc) {
3163   if (!CGF.HaveInsertPoint())
3164     return;
3165   // if(__kmpc_master(ident_t *, gtid)) {
3166   //   MasterOpGen();
3167   //   __kmpc_end_master(ident_t *, gtid);
3168   // }
3169   // Prepare arguments and build a call to __kmpc_master
3170   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3171   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3172                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3173                         /*Conditional=*/true);
3174   MasterOpGen.setAction(Action);
3175   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3176   Action.Done(CGF);
3177 }
3178 
3179 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3180                                         SourceLocation Loc) {
3181   if (!CGF.HaveInsertPoint())
3182     return;
3183   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3184   llvm::Value *Args[] = {
3185       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3186       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3187   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3188   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3189     Region->emitUntiedSwitch(CGF);
3190 }
3191 
3192 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3193                                           const RegionCodeGenTy &TaskgroupOpGen,
3194                                           SourceLocation Loc) {
3195   if (!CGF.HaveInsertPoint())
3196     return;
3197   // __kmpc_taskgroup(ident_t *, gtid);
3198   // TaskgroupOpGen();
3199   // __kmpc_end_taskgroup(ident_t *, gtid);
3200   // Prepare arguments and build a call to __kmpc_taskgroup
3201   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3202   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3203                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3204                         Args);
3205   TaskgroupOpGen.setAction(Action);
3206   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3207 }
3208 
3209 /// Given an array of pointers to variables, project the address of a
3210 /// given variable.
3211 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3212                                       unsigned Index, const VarDecl *Var) {
3213   // Pull out the pointer to the variable.
3214   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3215   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3216 
3217   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3218   Addr = CGF.Builder.CreateElementBitCast(
3219       Addr, CGF.ConvertTypeForMem(Var->getType()));
3220   return Addr;
3221 }
3222 
3223 static llvm::Value *emitCopyprivateCopyFunction(
3224     CodeGenModule &CGM, llvm::Type *ArgsType,
3225     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3226     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3227     SourceLocation Loc) {
3228   ASTContext &C = CGM.getContext();
3229   // void copy_func(void *LHSArg, void *RHSArg);
3230   FunctionArgList Args;
3231   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3232                            ImplicitParamDecl::Other);
3233   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3234                            ImplicitParamDecl::Other);
3235   Args.push_back(&LHSArg);
3236   Args.push_back(&RHSArg);
3237   const auto &CGFI =
3238       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3239   std::string Name =
3240       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3241   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3242                                     llvm::GlobalValue::InternalLinkage, Name,
3243                                     &CGM.getModule());
3244   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3245   Fn->setDoesNotRecurse();
3246   CodeGenFunction CGF(CGM);
3247   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3248   // Dest = (void*[n])(LHSArg);
3249   // Src = (void*[n])(RHSArg);
3250   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3251       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3252       ArgsType), CGF.getPointerAlign());
3253   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3254       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3255       ArgsType), CGF.getPointerAlign());
3256   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3257   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3258   // ...
3259   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3260   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3261     const auto *DestVar =
3262         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3263     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3264 
3265     const auto *SrcVar =
3266         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3267     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3268 
3269     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3270     QualType Type = VD->getType();
3271     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3272   }
3273   CGF.FinishFunction();
3274   return Fn;
3275 }
3276 
3277 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3278                                        const RegionCodeGenTy &SingleOpGen,
3279                                        SourceLocation Loc,
3280                                        ArrayRef<const Expr *> CopyprivateVars,
3281                                        ArrayRef<const Expr *> SrcExprs,
3282                                        ArrayRef<const Expr *> DstExprs,
3283                                        ArrayRef<const Expr *> AssignmentOps) {
3284   if (!CGF.HaveInsertPoint())
3285     return;
3286   assert(CopyprivateVars.size() == SrcExprs.size() &&
3287          CopyprivateVars.size() == DstExprs.size() &&
3288          CopyprivateVars.size() == AssignmentOps.size());
3289   ASTContext &C = CGM.getContext();
3290   // int32 did_it = 0;
3291   // if(__kmpc_single(ident_t *, gtid)) {
3292   //   SingleOpGen();
3293   //   __kmpc_end_single(ident_t *, gtid);
3294   //   did_it = 1;
3295   // }
3296   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3297   // <copy_func>, did_it);
3298 
3299   Address DidIt = Address::invalid();
3300   if (!CopyprivateVars.empty()) {
3301     // int32 did_it = 0;
3302     QualType KmpInt32Ty =
3303         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3304     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3305     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3306   }
3307   // Prepare arguments and build a call to __kmpc_single
3308   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3309   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3310                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3311                         /*Conditional=*/true);
3312   SingleOpGen.setAction(Action);
3313   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3314   if (DidIt.isValid()) {
3315     // did_it = 1;
3316     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3317   }
3318   Action.Done(CGF);
3319   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3320   // <copy_func>, did_it);
3321   if (DidIt.isValid()) {
3322     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3323     QualType CopyprivateArrayTy =
3324         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3325                                /*IndexTypeQuals=*/0);
3326     // Create a list of all private variables for copyprivate.
3327     Address CopyprivateList =
3328         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3329     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3330       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3331       CGF.Builder.CreateStore(
3332           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3333               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3334           Elem);
3335     }
3336     // Build function that copies private values from single region to all other
3337     // threads in the corresponding parallel region.
3338     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3339         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3340         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3341     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3342     Address CL =
3343       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3344                                                       CGF.VoidPtrTy);
3345     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3346     llvm::Value *Args[] = {
3347         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3348         getThreadID(CGF, Loc),        // i32 <gtid>
3349         BufSize,                      // size_t <buf_size>
3350         CL.getPointer(),              // void *<copyprivate list>
3351         CpyFn,                        // void (*) (void *, void *) <copy_func>
3352         DidItVal                      // i32 did_it
3353     };
3354     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3355   }
3356 }
3357 
3358 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3359                                         const RegionCodeGenTy &OrderedOpGen,
3360                                         SourceLocation Loc, bool IsThreads) {
3361   if (!CGF.HaveInsertPoint())
3362     return;
3363   // __kmpc_ordered(ident_t *, gtid);
3364   // OrderedOpGen();
3365   // __kmpc_end_ordered(ident_t *, gtid);
3366   // Prepare arguments and build a call to __kmpc_ordered
3367   if (IsThreads) {
3368     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3369     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3370                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3371                           Args);
3372     OrderedOpGen.setAction(Action);
3373     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3374     return;
3375   }
3376   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3377 }
3378 
3379 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3380   unsigned Flags;
3381   if (Kind == OMPD_for)
3382     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3383   else if (Kind == OMPD_sections)
3384     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3385   else if (Kind == OMPD_single)
3386     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3387   else if (Kind == OMPD_barrier)
3388     Flags = OMP_IDENT_BARRIER_EXPL;
3389   else
3390     Flags = OMP_IDENT_BARRIER_IMPL;
3391   return Flags;
3392 }
3393 
3394 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3395     CodeGenFunction &CGF, const OMPLoopDirective &S,
3396     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3397   // Check if the loop directive is actually a doacross loop directive. In this
3398   // case choose static, 1 schedule.
3399   if (llvm::any_of(
3400           S.getClausesOfKind<OMPOrderedClause>(),
3401           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3402     ScheduleKind = OMPC_SCHEDULE_static;
3403     // Chunk size is 1 in this case.
3404     llvm::APInt ChunkSize(32, 1);
3405     ChunkExpr = IntegerLiteral::Create(
3406         CGF.getContext(), ChunkSize,
3407         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3408         SourceLocation());
3409   }
3410 }
3411 
3412 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3413                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3414                                       bool ForceSimpleCall) {
3415   if (!CGF.HaveInsertPoint())
3416     return;
3417   // Build call __kmpc_cancel_barrier(loc, thread_id);
3418   // Build call __kmpc_barrier(loc, thread_id);
3419   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3420   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3421   // thread_id);
3422   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3423                          getThreadID(CGF, Loc)};
3424   if (auto *OMPRegionInfo =
3425           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3426     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3427       llvm::Value *Result = CGF.EmitRuntimeCall(
3428           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3429       if (EmitChecks) {
3430         // if (__kmpc_cancel_barrier()) {
3431         //   exit from construct;
3432         // }
3433         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3434         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3435         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3436         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3437         CGF.EmitBlock(ExitBB);
3438         //   exit from construct;
3439         CodeGenFunction::JumpDest CancelDestination =
3440             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3441         CGF.EmitBranchThroughCleanup(CancelDestination);
3442         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3443       }
3444       return;
3445     }
3446   }
3447   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3448 }
3449 
3450 /// Map the OpenMP loop schedule to the runtime enumeration.
3451 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3452                                           bool Chunked, bool Ordered) {
3453   switch (ScheduleKind) {
3454   case OMPC_SCHEDULE_static:
3455     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3456                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3457   case OMPC_SCHEDULE_dynamic:
3458     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3459   case OMPC_SCHEDULE_guided:
3460     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3461   case OMPC_SCHEDULE_runtime:
3462     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3463   case OMPC_SCHEDULE_auto:
3464     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3465   case OMPC_SCHEDULE_unknown:
3466     assert(!Chunked && "chunk was specified but schedule kind not known");
3467     return Ordered ? OMP_ord_static : OMP_sch_static;
3468   }
3469   llvm_unreachable("Unexpected runtime schedule");
3470 }
3471 
3472 /// Map the OpenMP distribute schedule to the runtime enumeration.
3473 static OpenMPSchedType
3474 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3475   // only static is allowed for dist_schedule
3476   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3477 }
3478 
3479 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3480                                          bool Chunked) const {
3481   OpenMPSchedType Schedule =
3482       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3483   return Schedule == OMP_sch_static;
3484 }
3485 
3486 bool CGOpenMPRuntime::isStaticNonchunked(
3487     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3488   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3489   return Schedule == OMP_dist_sch_static;
3490 }
3491 
3492 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3493                                       bool Chunked) const {
3494   OpenMPSchedType Schedule =
3495       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3496   return Schedule == OMP_sch_static_chunked;
3497 }
3498 
3499 bool CGOpenMPRuntime::isStaticChunked(
3500     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3501   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3502   return Schedule == OMP_dist_sch_static_chunked;
3503 }
3504 
3505 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3506   OpenMPSchedType Schedule =
3507       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3508   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3509   return Schedule != OMP_sch_static;
3510 }
3511 
3512 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3513                                   OpenMPScheduleClauseModifier M1,
3514                                   OpenMPScheduleClauseModifier M2) {
3515   int Modifier = 0;
3516   switch (M1) {
3517   case OMPC_SCHEDULE_MODIFIER_monotonic:
3518     Modifier = OMP_sch_modifier_monotonic;
3519     break;
3520   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3521     Modifier = OMP_sch_modifier_nonmonotonic;
3522     break;
3523   case OMPC_SCHEDULE_MODIFIER_simd:
3524     if (Schedule == OMP_sch_static_chunked)
3525       Schedule = OMP_sch_static_balanced_chunked;
3526     break;
3527   case OMPC_SCHEDULE_MODIFIER_last:
3528   case OMPC_SCHEDULE_MODIFIER_unknown:
3529     break;
3530   }
3531   switch (M2) {
3532   case OMPC_SCHEDULE_MODIFIER_monotonic:
3533     Modifier = OMP_sch_modifier_monotonic;
3534     break;
3535   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3536     Modifier = OMP_sch_modifier_nonmonotonic;
3537     break;
3538   case OMPC_SCHEDULE_MODIFIER_simd:
3539     if (Schedule == OMP_sch_static_chunked)
3540       Schedule = OMP_sch_static_balanced_chunked;
3541     break;
3542   case OMPC_SCHEDULE_MODIFIER_last:
3543   case OMPC_SCHEDULE_MODIFIER_unknown:
3544     break;
3545   }
3546   return Schedule | Modifier;
3547 }
3548 
3549 void CGOpenMPRuntime::emitForDispatchInit(
3550     CodeGenFunction &CGF, SourceLocation Loc,
3551     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3552     bool Ordered, const DispatchRTInput &DispatchValues) {
3553   if (!CGF.HaveInsertPoint())
3554     return;
3555   OpenMPSchedType Schedule = getRuntimeSchedule(
3556       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3557   assert(Ordered ||
3558          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3559           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3560           Schedule != OMP_sch_static_balanced_chunked));
3561   // Call __kmpc_dispatch_init(
3562   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3563   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3564   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3565 
3566   // If the Chunk was not specified in the clause - use default value 1.
3567   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3568                                             : CGF.Builder.getIntN(IVSize, 1);
3569   llvm::Value *Args[] = {
3570       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3571       CGF.Builder.getInt32(addMonoNonMonoModifier(
3572           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3573       DispatchValues.LB,                                // Lower
3574       DispatchValues.UB,                                // Upper
3575       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3576       Chunk                                             // Chunk
3577   };
3578   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3579 }
3580 
3581 static void emitForStaticInitCall(
3582     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3583     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3584     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3585     const CGOpenMPRuntime::StaticRTInput &Values) {
3586   if (!CGF.HaveInsertPoint())
3587     return;
3588 
3589   assert(!Values.Ordered);
3590   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3591          Schedule == OMP_sch_static_balanced_chunked ||
3592          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3593          Schedule == OMP_dist_sch_static ||
3594          Schedule == OMP_dist_sch_static_chunked);
3595 
3596   // Call __kmpc_for_static_init(
3597   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3598   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3599   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3600   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3601   llvm::Value *Chunk = Values.Chunk;
3602   if (Chunk == nullptr) {
3603     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3604             Schedule == OMP_dist_sch_static) &&
3605            "expected static non-chunked schedule");
3606     // If the Chunk was not specified in the clause - use default value 1.
3607     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3608   } else {
3609     assert((Schedule == OMP_sch_static_chunked ||
3610             Schedule == OMP_sch_static_balanced_chunked ||
3611             Schedule == OMP_ord_static_chunked ||
3612             Schedule == OMP_dist_sch_static_chunked) &&
3613            "expected static chunked schedule");
3614   }
3615   llvm::Value *Args[] = {
3616       UpdateLocation,
3617       ThreadId,
3618       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3619                                                   M2)), // Schedule type
3620       Values.IL.getPointer(),                           // &isLastIter
3621       Values.LB.getPointer(),                           // &LB
3622       Values.UB.getPointer(),                           // &UB
3623       Values.ST.getPointer(),                           // &Stride
3624       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3625       Chunk                                             // Chunk
3626   };
3627   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3628 }
3629 
3630 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3631                                         SourceLocation Loc,
3632                                         OpenMPDirectiveKind DKind,
3633                                         const OpenMPScheduleTy &ScheduleKind,
3634                                         const StaticRTInput &Values) {
3635   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3636       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3637   assert(isOpenMPWorksharingDirective(DKind) &&
3638          "Expected loop-based or sections-based directive.");
3639   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3640                                              isOpenMPLoopDirective(DKind)
3641                                                  ? OMP_IDENT_WORK_LOOP
3642                                                  : OMP_IDENT_WORK_SECTIONS);
3643   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3644   llvm::FunctionCallee StaticInitFunction =
3645       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3646   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3647                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3648 }
3649 
3650 void CGOpenMPRuntime::emitDistributeStaticInit(
3651     CodeGenFunction &CGF, SourceLocation Loc,
3652     OpenMPDistScheduleClauseKind SchedKind,
3653     const CGOpenMPRuntime::StaticRTInput &Values) {
3654   OpenMPSchedType ScheduleNum =
3655       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3656   llvm::Value *UpdatedLocation =
3657       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3658   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3659   llvm::FunctionCallee StaticInitFunction =
3660       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3661   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3662                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3663                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3664 }
3665 
3666 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3667                                           SourceLocation Loc,
3668                                           OpenMPDirectiveKind DKind) {
3669   if (!CGF.HaveInsertPoint())
3670     return;
3671   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3672   llvm::Value *Args[] = {
3673       emitUpdateLocation(CGF, Loc,
3674                          isOpenMPDistributeDirective(DKind)
3675                              ? OMP_IDENT_WORK_DISTRIBUTE
3676                              : isOpenMPLoopDirective(DKind)
3677                                    ? OMP_IDENT_WORK_LOOP
3678                                    : OMP_IDENT_WORK_SECTIONS),
3679       getThreadID(CGF, Loc)};
3680   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3681                       Args);
3682 }
3683 
3684 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3685                                                  SourceLocation Loc,
3686                                                  unsigned IVSize,
3687                                                  bool IVSigned) {
3688   if (!CGF.HaveInsertPoint())
3689     return;
3690   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3691   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3692   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3693 }
3694 
3695 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3696                                           SourceLocation Loc, unsigned IVSize,
3697                                           bool IVSigned, Address IL,
3698                                           Address LB, Address UB,
3699                                           Address ST) {
3700   // Call __kmpc_dispatch_next(
3701   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3702   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3703   //          kmp_int[32|64] *p_stride);
3704   llvm::Value *Args[] = {
3705       emitUpdateLocation(CGF, Loc),
3706       getThreadID(CGF, Loc),
3707       IL.getPointer(), // &isLastIter
3708       LB.getPointer(), // &Lower
3709       UB.getPointer(), // &Upper
3710       ST.getPointer()  // &Stride
3711   };
3712   llvm::Value *Call =
3713       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3714   return CGF.EmitScalarConversion(
3715       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3716       CGF.getContext().BoolTy, Loc);
3717 }
3718 
3719 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3720                                            llvm::Value *NumThreads,
3721                                            SourceLocation Loc) {
3722   if (!CGF.HaveInsertPoint())
3723     return;
3724   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3725   llvm::Value *Args[] = {
3726       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3727       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3728   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3729                       Args);
3730 }
3731 
3732 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3733                                          OpenMPProcBindClauseKind ProcBind,
3734                                          SourceLocation Loc) {
3735   if (!CGF.HaveInsertPoint())
3736     return;
3737   // Constants for proc bind value accepted by the runtime.
3738   enum ProcBindTy {
3739     ProcBindFalse = 0,
3740     ProcBindTrue,
3741     ProcBindMaster,
3742     ProcBindClose,
3743     ProcBindSpread,
3744     ProcBindIntel,
3745     ProcBindDefault
3746   } RuntimeProcBind;
3747   switch (ProcBind) {
3748   case OMPC_PROC_BIND_master:
3749     RuntimeProcBind = ProcBindMaster;
3750     break;
3751   case OMPC_PROC_BIND_close:
3752     RuntimeProcBind = ProcBindClose;
3753     break;
3754   case OMPC_PROC_BIND_spread:
3755     RuntimeProcBind = ProcBindSpread;
3756     break;
3757   case OMPC_PROC_BIND_unknown:
3758     llvm_unreachable("Unsupported proc_bind value.");
3759   }
3760   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3761   llvm::Value *Args[] = {
3762       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3763       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3764   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3765 }
3766 
3767 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3768                                 SourceLocation Loc) {
3769   if (!CGF.HaveInsertPoint())
3770     return;
3771   // Build call void __kmpc_flush(ident_t *loc)
3772   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3773                       emitUpdateLocation(CGF, Loc));
3774 }
3775 
3776 namespace {
3777 /// Indexes of fields for type kmp_task_t.
3778 enum KmpTaskTFields {
3779   /// List of shared variables.
3780   KmpTaskTShareds,
3781   /// Task routine.
3782   KmpTaskTRoutine,
3783   /// Partition id for the untied tasks.
3784   KmpTaskTPartId,
3785   /// Function with call of destructors for private variables.
3786   Data1,
3787   /// Task priority.
3788   Data2,
3789   /// (Taskloops only) Lower bound.
3790   KmpTaskTLowerBound,
3791   /// (Taskloops only) Upper bound.
3792   KmpTaskTUpperBound,
3793   /// (Taskloops only) Stride.
3794   KmpTaskTStride,
3795   /// (Taskloops only) Is last iteration flag.
3796   KmpTaskTLastIter,
3797   /// (Taskloops only) Reduction data.
3798   KmpTaskTReductions,
3799 };
3800 } // anonymous namespace
3801 
3802 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3803   return OffloadEntriesTargetRegion.empty() &&
3804          OffloadEntriesDeviceGlobalVar.empty();
3805 }
3806 
3807 /// Initialize target region entry.
3808 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3809     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3810                                     StringRef ParentName, unsigned LineNum,
3811                                     unsigned Order) {
3812   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3813                                              "only required for the device "
3814                                              "code generation.");
3815   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3816       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3817                                    OMPTargetRegionEntryTargetRegion);
3818   ++OffloadingEntriesNum;
3819 }
3820 
3821 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3822     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3823                                   StringRef ParentName, unsigned LineNum,
3824                                   llvm::Constant *Addr, llvm::Constant *ID,
3825                                   OMPTargetRegionEntryKind Flags) {
3826   // If we are emitting code for a target, the entry is already initialized,
3827   // only has to be registered.
3828   if (CGM.getLangOpts().OpenMPIsDevice) {
3829     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3830       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3831           DiagnosticsEngine::Error,
3832           "Unable to find target region on line '%0' in the device code.");
3833       CGM.getDiags().Report(DiagID) << LineNum;
3834       return;
3835     }
3836     auto &Entry =
3837         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3838     assert(Entry.isValid() && "Entry not initialized!");
3839     Entry.setAddress(Addr);
3840     Entry.setID(ID);
3841     Entry.setFlags(Flags);
3842   } else {
3843     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3844     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3845     ++OffloadingEntriesNum;
3846   }
3847 }
3848 
3849 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3850     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3851     unsigned LineNum) const {
3852   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3853   if (PerDevice == OffloadEntriesTargetRegion.end())
3854     return false;
3855   auto PerFile = PerDevice->second.find(FileID);
3856   if (PerFile == PerDevice->second.end())
3857     return false;
3858   auto PerParentName = PerFile->second.find(ParentName);
3859   if (PerParentName == PerFile->second.end())
3860     return false;
3861   auto PerLine = PerParentName->second.find(LineNum);
3862   if (PerLine == PerParentName->second.end())
3863     return false;
3864   // Fail if this entry is already registered.
3865   if (PerLine->second.getAddress() || PerLine->second.getID())
3866     return false;
3867   return true;
3868 }
3869 
3870 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3871     const OffloadTargetRegionEntryInfoActTy &Action) {
3872   // Scan all target region entries and perform the provided action.
3873   for (const auto &D : OffloadEntriesTargetRegion)
3874     for (const auto &F : D.second)
3875       for (const auto &P : F.second)
3876         for (const auto &L : P.second)
3877           Action(D.first, F.first, P.first(), L.first, L.second);
3878 }
3879 
3880 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3881     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3882                                        OMPTargetGlobalVarEntryKind Flags,
3883                                        unsigned Order) {
3884   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3885                                              "only required for the device "
3886                                              "code generation.");
3887   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3888   ++OffloadingEntriesNum;
3889 }
3890 
3891 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3892     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3893                                      CharUnits VarSize,
3894                                      OMPTargetGlobalVarEntryKind Flags,
3895                                      llvm::GlobalValue::LinkageTypes Linkage) {
3896   if (CGM.getLangOpts().OpenMPIsDevice) {
3897     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3898     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3899            "Entry not initialized!");
3900     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3901            "Resetting with the new address.");
3902     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3903       if (Entry.getVarSize().isZero()) {
3904         Entry.setVarSize(VarSize);
3905         Entry.setLinkage(Linkage);
3906       }
3907       return;
3908     }
3909     Entry.setVarSize(VarSize);
3910     Entry.setLinkage(Linkage);
3911     Entry.setAddress(Addr);
3912   } else {
3913     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3914       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3915       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3916              "Entry not initialized!");
3917       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3918              "Resetting with the new address.");
3919       if (Entry.getVarSize().isZero()) {
3920         Entry.setVarSize(VarSize);
3921         Entry.setLinkage(Linkage);
3922       }
3923       return;
3924     }
3925     OffloadEntriesDeviceGlobalVar.try_emplace(
3926         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3927     ++OffloadingEntriesNum;
3928   }
3929 }
3930 
3931 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3932     actOnDeviceGlobalVarEntriesInfo(
3933         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3934   // Scan all target region entries and perform the provided action.
3935   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3936     Action(E.getKey(), E.getValue());
3937 }
3938 
3939 llvm::Function *
3940 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3941   // If we don't have entries or if we are emitting code for the device, we
3942   // don't need to do anything.
3943   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3944     return nullptr;
3945 
3946   llvm::Module &M = CGM.getModule();
3947   ASTContext &C = CGM.getContext();
3948 
3949   // Get list of devices we care about
3950   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3951 
3952   // We should be creating an offloading descriptor only if there are devices
3953   // specified.
3954   assert(!Devices.empty() && "No OpenMP offloading devices??");
3955 
3956   // Create the external variables that will point to the begin and end of the
3957   // host entries section. These will be defined by the linker.
3958   llvm::Type *OffloadEntryTy =
3959       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3960   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3961   auto *HostEntriesBegin = new llvm::GlobalVariable(
3962       M, OffloadEntryTy, /*isConstant=*/true,
3963       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3964       EntriesBeginName);
3965   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3966   auto *HostEntriesEnd =
3967       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3968                                llvm::GlobalValue::ExternalLinkage,
3969                                /*Initializer=*/nullptr, EntriesEndName);
3970 
3971   // Create all device images
3972   auto *DeviceImageTy = cast<llvm::StructType>(
3973       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3974   ConstantInitBuilder DeviceImagesBuilder(CGM);
3975   ConstantArrayBuilder DeviceImagesEntries =
3976       DeviceImagesBuilder.beginArray(DeviceImageTy);
3977 
3978   for (const llvm::Triple &Device : Devices) {
3979     StringRef T = Device.getTriple();
3980     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3981     auto *ImgBegin = new llvm::GlobalVariable(
3982         M, CGM.Int8Ty, /*isConstant=*/true,
3983         llvm::GlobalValue::ExternalWeakLinkage,
3984         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3985     std::string EndName = getName({"omp_offloading", "img_end", ""});
3986     auto *ImgEnd = new llvm::GlobalVariable(
3987         M, CGM.Int8Ty, /*isConstant=*/true,
3988         llvm::GlobalValue::ExternalWeakLinkage,
3989         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3990 
3991     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3992                               HostEntriesEnd};
3993     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3994                                              DeviceImagesEntries);
3995   }
3996 
3997   // Create device images global array.
3998   std::string ImagesName = getName({"omp_offloading", "device_images"});
3999   llvm::GlobalVariable *DeviceImages =
4000       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
4001                                                 CGM.getPointerAlign(),
4002                                                 /*isConstant=*/true);
4003   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4004 
4005   // This is a Zero array to be used in the creation of the constant expressions
4006   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
4007                              llvm::Constant::getNullValue(CGM.Int32Ty)};
4008 
4009   // Create the target region descriptor.
4010   llvm::Constant *Data[] = {
4011       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
4012       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
4013                                            DeviceImages, Index),
4014       HostEntriesBegin, HostEntriesEnd};
4015   std::string Descriptor = getName({"omp_offloading", "descriptor"});
4016   llvm::GlobalVariable *Desc = createGlobalStruct(
4017       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
4018 
4019   // Emit code to register or unregister the descriptor at execution
4020   // startup or closing, respectively.
4021 
4022   llvm::Function *UnRegFn;
4023   {
4024     FunctionArgList Args;
4025     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4026     Args.push_back(&DummyPtr);
4027 
4028     CodeGenFunction CGF(CGM);
4029     // Disable debug info for global (de-)initializer because they are not part
4030     // of some particular construct.
4031     CGF.disableDebugInfo();
4032     const auto &FI =
4033         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4034     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4035     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
4036     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4037     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4038     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4039                         Desc);
4040     CGF.FinishFunction();
4041   }
4042   llvm::Function *RegFn;
4043   {
4044     CodeGenFunction CGF(CGM);
4045     // Disable debug info for global (de-)initializer because they are not part
4046     // of some particular construct.
4047     CGF.disableDebugInfo();
4048     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4049     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4050 
4051     // Encode offload target triples into the registration function name. It
4052     // will serve as a comdat key for the registration/unregistration code for
4053     // this particular combination of offloading targets.
4054     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4055     RegFnNameParts[0] = "omp_offloading";
4056     RegFnNameParts[1] = "descriptor_reg";
4057     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4058                     [](const llvm::Triple &T) -> const std::string& {
4059                       return T.getTriple();
4060                     });
4061     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4062     std::string Descriptor = getName(RegFnNameParts);
4063     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4064     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4065     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4066     // Create a variable to drive the registration and unregistration of the
4067     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4068     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4069                                   SourceLocation(), nullptr, C.CharTy,
4070                                   ImplicitParamDecl::Other);
4071     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4072     CGF.FinishFunction();
4073   }
4074   if (CGM.supportsCOMDAT()) {
4075     // It is sufficient to call registration function only once, so create a
4076     // COMDAT group for registration/unregistration functions and associated
4077     // data. That would reduce startup time and code size. Registration
4078     // function serves as a COMDAT group key.
4079     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4080     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4081     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4082     RegFn->setComdat(ComdatKey);
4083     UnRegFn->setComdat(ComdatKey);
4084     DeviceImages->setComdat(ComdatKey);
4085     Desc->setComdat(ComdatKey);
4086   }
4087   return RegFn;
4088 }
4089 
4090 void CGOpenMPRuntime::createOffloadEntry(
4091     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4092     llvm::GlobalValue::LinkageTypes Linkage) {
4093   StringRef Name = Addr->getName();
4094   llvm::Module &M = CGM.getModule();
4095   llvm::LLVMContext &C = M.getContext();
4096 
4097   // Create constant string with the name.
4098   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4099 
4100   std::string StringName = getName({"omp_offloading", "entry_name"});
4101   auto *Str = new llvm::GlobalVariable(
4102       M, StrPtrInit->getType(), /*isConstant=*/true,
4103       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4104   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4105 
4106   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4107                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4108                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4109                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4110                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4111   std::string EntryName = getName({"omp_offloading", "entry", ""});
4112   llvm::GlobalVariable *Entry = createGlobalStruct(
4113       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4114       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4115 
4116   // The entry has to be created in the section the linker expects it to be.
4117   std::string Section = getName({"omp_offloading", "entries"});
4118   Entry->setSection(Section);
4119 }
4120 
4121 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4122   // Emit the offloading entries and metadata so that the device codegen side
4123   // can easily figure out what to emit. The produced metadata looks like
4124   // this:
4125   //
4126   // !omp_offload.info = !{!1, ...}
4127   //
4128   // Right now we only generate metadata for function that contain target
4129   // regions.
4130 
4131   // If we do not have entries, we don't need to do anything.
4132   if (OffloadEntriesInfoManager.empty())
4133     return;
4134 
4135   llvm::Module &M = CGM.getModule();
4136   llvm::LLVMContext &C = M.getContext();
4137   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4138       OrderedEntries(OffloadEntriesInfoManager.size());
4139   llvm::SmallVector<StringRef, 16> ParentFunctions(
4140       OffloadEntriesInfoManager.size());
4141 
4142   // Auxiliary methods to create metadata values and strings.
4143   auto &&GetMDInt = [this](unsigned V) {
4144     return llvm::ConstantAsMetadata::get(
4145         llvm::ConstantInt::get(CGM.Int32Ty, V));
4146   };
4147 
4148   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4149 
4150   // Create the offloading info metadata node.
4151   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4152 
4153   // Create function that emits metadata for each target region entry;
4154   auto &&TargetRegionMetadataEmitter =
4155       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4156           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4157           unsigned Line,
4158           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4159         // Generate metadata for target regions. Each entry of this metadata
4160         // contains:
4161         // - Entry 0 -> Kind of this type of metadata (0).
4162         // - Entry 1 -> Device ID of the file where the entry was identified.
4163         // - Entry 2 -> File ID of the file where the entry was identified.
4164         // - Entry 3 -> Mangled name of the function where the entry was
4165         // identified.
4166         // - Entry 4 -> Line in the file where the entry was identified.
4167         // - Entry 5 -> Order the entry was created.
4168         // The first element of the metadata node is the kind.
4169         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4170                                  GetMDInt(FileID),      GetMDString(ParentName),
4171                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4172 
4173         // Save this entry in the right position of the ordered entries array.
4174         OrderedEntries[E.getOrder()] = &E;
4175         ParentFunctions[E.getOrder()] = ParentName;
4176 
4177         // Add metadata to the named metadata node.
4178         MD->addOperand(llvm::MDNode::get(C, Ops));
4179       };
4180 
4181   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4182       TargetRegionMetadataEmitter);
4183 
4184   // Create function that emits metadata for each device global variable entry;
4185   auto &&DeviceGlobalVarMetadataEmitter =
4186       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4187        MD](StringRef MangledName,
4188            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4189                &E) {
4190         // Generate metadata for global variables. Each entry of this metadata
4191         // contains:
4192         // - Entry 0 -> Kind of this type of metadata (1).
4193         // - Entry 1 -> Mangled name of the variable.
4194         // - Entry 2 -> Declare target kind.
4195         // - Entry 3 -> Order the entry was created.
4196         // The first element of the metadata node is the kind.
4197         llvm::Metadata *Ops[] = {
4198             GetMDInt(E.getKind()), GetMDString(MangledName),
4199             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4200 
4201         // Save this entry in the right position of the ordered entries array.
4202         OrderedEntries[E.getOrder()] = &E;
4203 
4204         // Add metadata to the named metadata node.
4205         MD->addOperand(llvm::MDNode::get(C, Ops));
4206       };
4207 
4208   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4209       DeviceGlobalVarMetadataEmitter);
4210 
4211   for (const auto *E : OrderedEntries) {
4212     assert(E && "All ordered entries must exist!");
4213     if (const auto *CE =
4214             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4215                 E)) {
4216       if (!CE->getID() || !CE->getAddress()) {
4217         // Do not blame the entry if the parent funtion is not emitted.
4218         StringRef FnName = ParentFunctions[CE->getOrder()];
4219         if (!CGM.GetGlobalValue(FnName))
4220           continue;
4221         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4222             DiagnosticsEngine::Error,
4223             "Offloading entry for target region is incorrect: either the "
4224             "address or the ID is invalid.");
4225         CGM.getDiags().Report(DiagID);
4226         continue;
4227       }
4228       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4229                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4230     } else if (const auto *CE =
4231                    dyn_cast<OffloadEntriesInfoManagerTy::
4232                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4233       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4234           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4235               CE->getFlags());
4236       switch (Flags) {
4237       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4238         if (CGM.getLangOpts().OpenMPIsDevice &&
4239             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4240           continue;
4241         if (!CE->getAddress()) {
4242           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4243               DiagnosticsEngine::Error,
4244               "Offloading entry for declare target variable is incorrect: the "
4245               "address is invalid.");
4246           CGM.getDiags().Report(DiagID);
4247           continue;
4248         }
4249         // The vaiable has no definition - no need to add the entry.
4250         if (CE->getVarSize().isZero())
4251           continue;
4252         break;
4253       }
4254       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4255         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4256                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4257                "Declaret target link address is set.");
4258         if (CGM.getLangOpts().OpenMPIsDevice)
4259           continue;
4260         if (!CE->getAddress()) {
4261           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4262               DiagnosticsEngine::Error,
4263               "Offloading entry for declare target variable is incorrect: the "
4264               "address is invalid.");
4265           CGM.getDiags().Report(DiagID);
4266           continue;
4267         }
4268         break;
4269       }
4270       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4271                          CE->getVarSize().getQuantity(), Flags,
4272                          CE->getLinkage());
4273     } else {
4274       llvm_unreachable("Unsupported entry kind.");
4275     }
4276   }
4277 }
4278 
4279 /// Loads all the offload entries information from the host IR
4280 /// metadata.
4281 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4282   // If we are in target mode, load the metadata from the host IR. This code has
4283   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4284 
4285   if (!CGM.getLangOpts().OpenMPIsDevice)
4286     return;
4287 
4288   if (CGM.getLangOpts().OMPHostIRFile.empty())
4289     return;
4290 
4291   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4292   if (auto EC = Buf.getError()) {
4293     CGM.getDiags().Report(diag::err_cannot_open_file)
4294         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4295     return;
4296   }
4297 
4298   llvm::LLVMContext C;
4299   auto ME = expectedToErrorOrAndEmitErrors(
4300       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4301 
4302   if (auto EC = ME.getError()) {
4303     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4304         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4305     CGM.getDiags().Report(DiagID)
4306         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4307     return;
4308   }
4309 
4310   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4311   if (!MD)
4312     return;
4313 
4314   for (llvm::MDNode *MN : MD->operands()) {
4315     auto &&GetMDInt = [MN](unsigned Idx) {
4316       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4317       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4318     };
4319 
4320     auto &&GetMDString = [MN](unsigned Idx) {
4321       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4322       return V->getString();
4323     };
4324 
4325     switch (GetMDInt(0)) {
4326     default:
4327       llvm_unreachable("Unexpected metadata!");
4328       break;
4329     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4330         OffloadingEntryInfoTargetRegion:
4331       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4332           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4333           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4334           /*Order=*/GetMDInt(5));
4335       break;
4336     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4337         OffloadingEntryInfoDeviceGlobalVar:
4338       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4339           /*MangledName=*/GetMDString(1),
4340           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4341               /*Flags=*/GetMDInt(2)),
4342           /*Order=*/GetMDInt(3));
4343       break;
4344     }
4345   }
4346 }
4347 
4348 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4349   if (!KmpRoutineEntryPtrTy) {
4350     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4351     ASTContext &C = CGM.getContext();
4352     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4353     FunctionProtoType::ExtProtoInfo EPI;
4354     KmpRoutineEntryPtrQTy = C.getPointerType(
4355         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4356     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4357   }
4358 }
4359 
4360 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4361   // Make sure the type of the entry is already created. This is the type we
4362   // have to create:
4363   // struct __tgt_offload_entry{
4364   //   void      *addr;       // Pointer to the offload entry info.
4365   //                          // (function or global)
4366   //   char      *name;       // Name of the function or global.
4367   //   size_t     size;       // Size of the entry info (0 if it a function).
4368   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4369   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4370   // };
4371   if (TgtOffloadEntryQTy.isNull()) {
4372     ASTContext &C = CGM.getContext();
4373     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4374     RD->startDefinition();
4375     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4376     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4377     addFieldToRecordDecl(C, RD, C.getSizeType());
4378     addFieldToRecordDecl(
4379         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4380     addFieldToRecordDecl(
4381         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4382     RD->completeDefinition();
4383     RD->addAttr(PackedAttr::CreateImplicit(C));
4384     TgtOffloadEntryQTy = C.getRecordType(RD);
4385   }
4386   return TgtOffloadEntryQTy;
4387 }
4388 
4389 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4390   // These are the types we need to build:
4391   // struct __tgt_device_image{
4392   // void   *ImageStart;       // Pointer to the target code start.
4393   // void   *ImageEnd;         // Pointer to the target code end.
4394   // // We also add the host entries to the device image, as it may be useful
4395   // // for the target runtime to have access to that information.
4396   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4397   //                                       // the entries.
4398   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4399   //                                       // entries (non inclusive).
4400   // };
4401   if (TgtDeviceImageQTy.isNull()) {
4402     ASTContext &C = CGM.getContext();
4403     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4404     RD->startDefinition();
4405     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4406     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4407     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4408     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4409     RD->completeDefinition();
4410     TgtDeviceImageQTy = C.getRecordType(RD);
4411   }
4412   return TgtDeviceImageQTy;
4413 }
4414 
4415 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4416   // struct __tgt_bin_desc{
4417   //   int32_t              NumDevices;      // Number of devices supported.
4418   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4419   //                                         // (one per device).
4420   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4421   //                                         // entries.
4422   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4423   //                                         // entries (non inclusive).
4424   // };
4425   if (TgtBinaryDescriptorQTy.isNull()) {
4426     ASTContext &C = CGM.getContext();
4427     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4428     RD->startDefinition();
4429     addFieldToRecordDecl(
4430         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4431     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4432     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4433     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4434     RD->completeDefinition();
4435     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4436   }
4437   return TgtBinaryDescriptorQTy;
4438 }
4439 
4440 namespace {
4441 struct PrivateHelpersTy {
4442   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4443                    const VarDecl *PrivateElemInit)
4444       : Original(Original), PrivateCopy(PrivateCopy),
4445         PrivateElemInit(PrivateElemInit) {}
4446   const VarDecl *Original;
4447   const VarDecl *PrivateCopy;
4448   const VarDecl *PrivateElemInit;
4449 };
4450 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4451 } // anonymous namespace
4452 
4453 static RecordDecl *
4454 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4455   if (!Privates.empty()) {
4456     ASTContext &C = CGM.getContext();
4457     // Build struct .kmp_privates_t. {
4458     //         /*  private vars  */
4459     //       };
4460     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4461     RD->startDefinition();
4462     for (const auto &Pair : Privates) {
4463       const VarDecl *VD = Pair.second.Original;
4464       QualType Type = VD->getType().getNonReferenceType();
4465       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4466       if (VD->hasAttrs()) {
4467         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4468              E(VD->getAttrs().end());
4469              I != E; ++I)
4470           FD->addAttr(*I);
4471       }
4472     }
4473     RD->completeDefinition();
4474     return RD;
4475   }
4476   return nullptr;
4477 }
4478 
4479 static RecordDecl *
4480 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4481                          QualType KmpInt32Ty,
4482                          QualType KmpRoutineEntryPointerQTy) {
4483   ASTContext &C = CGM.getContext();
4484   // Build struct kmp_task_t {
4485   //         void *              shareds;
4486   //         kmp_routine_entry_t routine;
4487   //         kmp_int32           part_id;
4488   //         kmp_cmplrdata_t data1;
4489   //         kmp_cmplrdata_t data2;
4490   // For taskloops additional fields:
4491   //         kmp_uint64          lb;
4492   //         kmp_uint64          ub;
4493   //         kmp_int64           st;
4494   //         kmp_int32           liter;
4495   //         void *              reductions;
4496   //       };
4497   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4498   UD->startDefinition();
4499   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4500   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4501   UD->completeDefinition();
4502   QualType KmpCmplrdataTy = C.getRecordType(UD);
4503   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4504   RD->startDefinition();
4505   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4506   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4507   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4508   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4509   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4510   if (isOpenMPTaskLoopDirective(Kind)) {
4511     QualType KmpUInt64Ty =
4512         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4513     QualType KmpInt64Ty =
4514         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4515     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4516     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4517     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4518     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4519     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4520   }
4521   RD->completeDefinition();
4522   return RD;
4523 }
4524 
4525 static RecordDecl *
4526 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4527                                      ArrayRef<PrivateDataTy> Privates) {
4528   ASTContext &C = CGM.getContext();
4529   // Build struct kmp_task_t_with_privates {
4530   //         kmp_task_t task_data;
4531   //         .kmp_privates_t. privates;
4532   //       };
4533   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4534   RD->startDefinition();
4535   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4536   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4537     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4538   RD->completeDefinition();
4539   return RD;
4540 }
4541 
4542 /// Emit a proxy function which accepts kmp_task_t as the second
4543 /// argument.
4544 /// \code
4545 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4546 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4547 ///   For taskloops:
4548 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4549 ///   tt->reductions, tt->shareds);
4550 ///   return 0;
4551 /// }
4552 /// \endcode
4553 static llvm::Function *
4554 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4555                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4556                       QualType KmpTaskTWithPrivatesPtrQTy,
4557                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4558                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4559                       llvm::Value *TaskPrivatesMap) {
4560   ASTContext &C = CGM.getContext();
4561   FunctionArgList Args;
4562   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4563                             ImplicitParamDecl::Other);
4564   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4565                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4566                                 ImplicitParamDecl::Other);
4567   Args.push_back(&GtidArg);
4568   Args.push_back(&TaskTypeArg);
4569   const auto &TaskEntryFnInfo =
4570       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4571   llvm::FunctionType *TaskEntryTy =
4572       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4573   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4574   auto *TaskEntry = llvm::Function::Create(
4575       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4576   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4577   TaskEntry->setDoesNotRecurse();
4578   CodeGenFunction CGF(CGM);
4579   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4580                     Loc, Loc);
4581 
4582   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4583   // tt,
4584   // For taskloops:
4585   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4586   // tt->task_data.shareds);
4587   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4588       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4589   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4590       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4591       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4592   const auto *KmpTaskTWithPrivatesQTyRD =
4593       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4594   LValue Base =
4595       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4596   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4597   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4598   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4599   llvm::Value *PartidParam = PartIdLVal.getPointer();
4600 
4601   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4602   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4603   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4604       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4605       CGF.ConvertTypeForMem(SharedsPtrTy));
4606 
4607   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4608   llvm::Value *PrivatesParam;
4609   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4610     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4611     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4612         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4613   } else {
4614     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4615   }
4616 
4617   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4618                                TaskPrivatesMap,
4619                                CGF.Builder
4620                                    .CreatePointerBitCastOrAddrSpaceCast(
4621                                        TDBase.getAddress(), CGF.VoidPtrTy)
4622                                    .getPointer()};
4623   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4624                                           std::end(CommonArgs));
4625   if (isOpenMPTaskLoopDirective(Kind)) {
4626     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4627     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4628     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4629     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4630     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4631     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4632     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4633     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4634     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4635     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4636     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4637     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4638     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4639     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4640     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4641     CallArgs.push_back(LBParam);
4642     CallArgs.push_back(UBParam);
4643     CallArgs.push_back(StParam);
4644     CallArgs.push_back(LIParam);
4645     CallArgs.push_back(RParam);
4646   }
4647   CallArgs.push_back(SharedsParam);
4648 
4649   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4650                                                   CallArgs);
4651   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4652                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4653   CGF.FinishFunction();
4654   return TaskEntry;
4655 }
4656 
4657 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4658                                             SourceLocation Loc,
4659                                             QualType KmpInt32Ty,
4660                                             QualType KmpTaskTWithPrivatesPtrQTy,
4661                                             QualType KmpTaskTWithPrivatesQTy) {
4662   ASTContext &C = CGM.getContext();
4663   FunctionArgList Args;
4664   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4665                             ImplicitParamDecl::Other);
4666   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4667                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4668                                 ImplicitParamDecl::Other);
4669   Args.push_back(&GtidArg);
4670   Args.push_back(&TaskTypeArg);
4671   const auto &DestructorFnInfo =
4672       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4673   llvm::FunctionType *DestructorFnTy =
4674       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4675   std::string Name =
4676       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4677   auto *DestructorFn =
4678       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4679                              Name, &CGM.getModule());
4680   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4681                                     DestructorFnInfo);
4682   DestructorFn->setDoesNotRecurse();
4683   CodeGenFunction CGF(CGM);
4684   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4685                     Args, Loc, Loc);
4686 
4687   LValue Base = CGF.EmitLoadOfPointerLValue(
4688       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4689       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4690   const auto *KmpTaskTWithPrivatesQTyRD =
4691       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4692   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4693   Base = CGF.EmitLValueForField(Base, *FI);
4694   for (const auto *Field :
4695        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4696     if (QualType::DestructionKind DtorKind =
4697             Field->getType().isDestructedType()) {
4698       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4699       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4700     }
4701   }
4702   CGF.FinishFunction();
4703   return DestructorFn;
4704 }
4705 
4706 /// Emit a privates mapping function for correct handling of private and
4707 /// firstprivate variables.
4708 /// \code
4709 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4710 /// **noalias priv1,...,  <tyn> **noalias privn) {
4711 ///   *priv1 = &.privates.priv1;
4712 ///   ...;
4713 ///   *privn = &.privates.privn;
4714 /// }
4715 /// \endcode
4716 static llvm::Value *
4717 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4718                                ArrayRef<const Expr *> PrivateVars,
4719                                ArrayRef<const Expr *> FirstprivateVars,
4720                                ArrayRef<const Expr *> LastprivateVars,
4721                                QualType PrivatesQTy,
4722                                ArrayRef<PrivateDataTy> Privates) {
4723   ASTContext &C = CGM.getContext();
4724   FunctionArgList Args;
4725   ImplicitParamDecl TaskPrivatesArg(
4726       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4727       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4728       ImplicitParamDecl::Other);
4729   Args.push_back(&TaskPrivatesArg);
4730   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4731   unsigned Counter = 1;
4732   for (const Expr *E : PrivateVars) {
4733     Args.push_back(ImplicitParamDecl::Create(
4734         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4735         C.getPointerType(C.getPointerType(E->getType()))
4736             .withConst()
4737             .withRestrict(),
4738         ImplicitParamDecl::Other));
4739     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4740     PrivateVarsPos[VD] = Counter;
4741     ++Counter;
4742   }
4743   for (const Expr *E : FirstprivateVars) {
4744     Args.push_back(ImplicitParamDecl::Create(
4745         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4746         C.getPointerType(C.getPointerType(E->getType()))
4747             .withConst()
4748             .withRestrict(),
4749         ImplicitParamDecl::Other));
4750     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4751     PrivateVarsPos[VD] = Counter;
4752     ++Counter;
4753   }
4754   for (const Expr *E : LastprivateVars) {
4755     Args.push_back(ImplicitParamDecl::Create(
4756         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4757         C.getPointerType(C.getPointerType(E->getType()))
4758             .withConst()
4759             .withRestrict(),
4760         ImplicitParamDecl::Other));
4761     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4762     PrivateVarsPos[VD] = Counter;
4763     ++Counter;
4764   }
4765   const auto &TaskPrivatesMapFnInfo =
4766       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4767   llvm::FunctionType *TaskPrivatesMapTy =
4768       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4769   std::string Name =
4770       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4771   auto *TaskPrivatesMap = llvm::Function::Create(
4772       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4773       &CGM.getModule());
4774   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4775                                     TaskPrivatesMapFnInfo);
4776   if (CGM.getLangOpts().Optimize) {
4777     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4778     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4779     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4780   }
4781   CodeGenFunction CGF(CGM);
4782   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4783                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4784 
4785   // *privi = &.privates.privi;
4786   LValue Base = CGF.EmitLoadOfPointerLValue(
4787       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4788       TaskPrivatesArg.getType()->castAs<PointerType>());
4789   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4790   Counter = 0;
4791   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4792     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4793     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4794     LValue RefLVal =
4795         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4796     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4797         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4798     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4799     ++Counter;
4800   }
4801   CGF.FinishFunction();
4802   return TaskPrivatesMap;
4803 }
4804 
4805 /// Emit initialization for private variables in task-based directives.
4806 static void emitPrivatesInit(CodeGenFunction &CGF,
4807                              const OMPExecutableDirective &D,
4808                              Address KmpTaskSharedsPtr, LValue TDBase,
4809                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4810                              QualType SharedsTy, QualType SharedsPtrTy,
4811                              const OMPTaskDataTy &Data,
4812                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4813   ASTContext &C = CGF.getContext();
4814   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4815   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4816   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4817                                  ? OMPD_taskloop
4818                                  : OMPD_task;
4819   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4820   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4821   LValue SrcBase;
4822   bool IsTargetTask =
4823       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4824       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4825   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4826   // PointersArray and SizesArray. The original variables for these arrays are
4827   // not captured and we get their addresses explicitly.
4828   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4829       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4830     SrcBase = CGF.MakeAddrLValue(
4831         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4833         SharedsTy);
4834   }
4835   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4836   for (const PrivateDataTy &Pair : Privates) {
4837     const VarDecl *VD = Pair.second.PrivateCopy;
4838     const Expr *Init = VD->getAnyInitializer();
4839     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4840                              !CGF.isTrivialInitializer(Init)))) {
4841       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4842       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4843         const VarDecl *OriginalVD = Pair.second.Original;
4844         // Check if the variable is the target-based BasePointersArray,
4845         // PointersArray or SizesArray.
4846         LValue SharedRefLValue;
4847         QualType Type = PrivateLValue.getType();
4848         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4849         if (IsTargetTask && !SharedField) {
4850           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4851                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4852                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4853                          ->getNumParams() == 0 &&
4854                  isa<TranslationUnitDecl>(
4855                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4856                          ->getDeclContext()) &&
4857                  "Expected artificial target data variable.");
4858           SharedRefLValue =
4859               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4860         } else {
4861           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4862           SharedRefLValue = CGF.MakeAddrLValue(
4863               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4864               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4865               SharedRefLValue.getTBAAInfo());
4866         }
4867         if (Type->isArrayType()) {
4868           // Initialize firstprivate array.
4869           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4870             // Perform simple memcpy.
4871             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4872           } else {
4873             // Initialize firstprivate array using element-by-element
4874             // initialization.
4875             CGF.EmitOMPAggregateAssign(
4876                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4877                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4878                                                   Address SrcElement) {
4879                   // Clean up any temporaries needed by the initialization.
4880                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4881                   InitScope.addPrivate(
4882                       Elem, [SrcElement]() -> Address { return SrcElement; });
4883                   (void)InitScope.Privatize();
4884                   // Emit initialization for single element.
4885                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4886                       CGF, &CapturesInfo);
4887                   CGF.EmitAnyExprToMem(Init, DestElement,
4888                                        Init->getType().getQualifiers(),
4889                                        /*IsInitializer=*/false);
4890                 });
4891           }
4892         } else {
4893           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4894           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4895             return SharedRefLValue.getAddress();
4896           });
4897           (void)InitScope.Privatize();
4898           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4899           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4900                              /*capturedByInit=*/false);
4901         }
4902       } else {
4903         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4904       }
4905     }
4906     ++FI;
4907   }
4908 }
4909 
4910 /// Check if duplication function is required for taskloops.
4911 static bool checkInitIsRequired(CodeGenFunction &CGF,
4912                                 ArrayRef<PrivateDataTy> Privates) {
4913   bool InitRequired = false;
4914   for (const PrivateDataTy &Pair : Privates) {
4915     const VarDecl *VD = Pair.second.PrivateCopy;
4916     const Expr *Init = VD->getAnyInitializer();
4917     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4918                                     !CGF.isTrivialInitializer(Init));
4919     if (InitRequired)
4920       break;
4921   }
4922   return InitRequired;
4923 }
4924 
4925 
4926 /// Emit task_dup function (for initialization of
4927 /// private/firstprivate/lastprivate vars and last_iter flag)
4928 /// \code
4929 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4930 /// lastpriv) {
4931 /// // setup lastprivate flag
4932 ///    task_dst->last = lastpriv;
4933 /// // could be constructor calls here...
4934 /// }
4935 /// \endcode
4936 static llvm::Value *
4937 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4938                     const OMPExecutableDirective &D,
4939                     QualType KmpTaskTWithPrivatesPtrQTy,
4940                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4941                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4942                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4943                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4944   ASTContext &C = CGM.getContext();
4945   FunctionArgList Args;
4946   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4947                            KmpTaskTWithPrivatesPtrQTy,
4948                            ImplicitParamDecl::Other);
4949   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4950                            KmpTaskTWithPrivatesPtrQTy,
4951                            ImplicitParamDecl::Other);
4952   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4953                                 ImplicitParamDecl::Other);
4954   Args.push_back(&DstArg);
4955   Args.push_back(&SrcArg);
4956   Args.push_back(&LastprivArg);
4957   const auto &TaskDupFnInfo =
4958       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4959   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4960   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4961   auto *TaskDup = llvm::Function::Create(
4962       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4963   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4964   TaskDup->setDoesNotRecurse();
4965   CodeGenFunction CGF(CGM);
4966   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4967                     Loc);
4968 
4969   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4970       CGF.GetAddrOfLocalVar(&DstArg),
4971       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4972   // task_dst->liter = lastpriv;
4973   if (WithLastIter) {
4974     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4975     LValue Base = CGF.EmitLValueForField(
4976         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4977     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4978     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4979         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4980     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4981   }
4982 
4983   // Emit initial values for private copies (if any).
4984   assert(!Privates.empty());
4985   Address KmpTaskSharedsPtr = Address::invalid();
4986   if (!Data.FirstprivateVars.empty()) {
4987     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4988         CGF.GetAddrOfLocalVar(&SrcArg),
4989         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4990     LValue Base = CGF.EmitLValueForField(
4991         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4992     KmpTaskSharedsPtr = Address(
4993         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4994                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4995                                                   KmpTaskTShareds)),
4996                              Loc),
4997         CGF.getNaturalTypeAlignment(SharedsTy));
4998   }
4999   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
5000                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
5001   CGF.FinishFunction();
5002   return TaskDup;
5003 }
5004 
5005 /// Checks if destructor function is required to be generated.
5006 /// \return true if cleanups are required, false otherwise.
5007 static bool
5008 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
5009   bool NeedsCleanup = false;
5010   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
5011   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
5012   for (const FieldDecl *FD : PrivateRD->fields()) {
5013     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
5014     if (NeedsCleanup)
5015       break;
5016   }
5017   return NeedsCleanup;
5018 }
5019 
5020 CGOpenMPRuntime::TaskResultTy
5021 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
5022                               const OMPExecutableDirective &D,
5023                               llvm::Function *TaskFunction, QualType SharedsTy,
5024                               Address Shareds, const OMPTaskDataTy &Data) {
5025   ASTContext &C = CGM.getContext();
5026   llvm::SmallVector<PrivateDataTy, 4> Privates;
5027   // Aggregate privates and sort them by the alignment.
5028   auto I = Data.PrivateCopies.begin();
5029   for (const Expr *E : Data.PrivateVars) {
5030     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5031     Privates.emplace_back(
5032         C.getDeclAlign(VD),
5033         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5034                          /*PrivateElemInit=*/nullptr));
5035     ++I;
5036   }
5037   I = Data.FirstprivateCopies.begin();
5038   auto IElemInitRef = Data.FirstprivateInits.begin();
5039   for (const Expr *E : Data.FirstprivateVars) {
5040     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5041     Privates.emplace_back(
5042         C.getDeclAlign(VD),
5043         PrivateHelpersTy(
5044             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5045             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5046     ++I;
5047     ++IElemInitRef;
5048   }
5049   I = Data.LastprivateCopies.begin();
5050   for (const Expr *E : Data.LastprivateVars) {
5051     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5052     Privates.emplace_back(
5053         C.getDeclAlign(VD),
5054         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5055                          /*PrivateElemInit=*/nullptr));
5056     ++I;
5057   }
5058   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5059     return L.first > R.first;
5060   });
5061   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5062   // Build type kmp_routine_entry_t (if not built yet).
5063   emitKmpRoutineEntryT(KmpInt32Ty);
5064   // Build type kmp_task_t (if not built yet).
5065   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5066     if (SavedKmpTaskloopTQTy.isNull()) {
5067       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5068           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5069     }
5070     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5071   } else {
5072     assert((D.getDirectiveKind() == OMPD_task ||
5073             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5074             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5075            "Expected taskloop, task or target directive");
5076     if (SavedKmpTaskTQTy.isNull()) {
5077       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5078           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5079     }
5080     KmpTaskTQTy = SavedKmpTaskTQTy;
5081   }
5082   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5083   // Build particular struct kmp_task_t for the given task.
5084   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5085       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5086   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5087   QualType KmpTaskTWithPrivatesPtrQTy =
5088       C.getPointerType(KmpTaskTWithPrivatesQTy);
5089   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5090   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5091       KmpTaskTWithPrivatesTy->getPointerTo();
5092   llvm::Value *KmpTaskTWithPrivatesTySize =
5093       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5094   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5095 
5096   // Emit initial values for private copies (if any).
5097   llvm::Value *TaskPrivatesMap = nullptr;
5098   llvm::Type *TaskPrivatesMapTy =
5099       std::next(TaskFunction->arg_begin(), 3)->getType();
5100   if (!Privates.empty()) {
5101     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5102     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5103         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5104         FI->getType(), Privates);
5105     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5106         TaskPrivatesMap, TaskPrivatesMapTy);
5107   } else {
5108     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5109         cast<llvm::PointerType>(TaskPrivatesMapTy));
5110   }
5111   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5112   // kmp_task_t *tt);
5113   llvm::Function *TaskEntry = emitProxyTaskFunction(
5114       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5115       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5116       TaskPrivatesMap);
5117 
5118   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5119   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5120   // kmp_routine_entry_t *task_entry);
5121   // Task flags. Format is taken from
5122   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5123   // description of kmp_tasking_flags struct.
5124   enum {
5125     TiedFlag = 0x1,
5126     FinalFlag = 0x2,
5127     DestructorsFlag = 0x8,
5128     PriorityFlag = 0x20
5129   };
5130   unsigned Flags = Data.Tied ? TiedFlag : 0;
5131   bool NeedsCleanup = false;
5132   if (!Privates.empty()) {
5133     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5134     if (NeedsCleanup)
5135       Flags = Flags | DestructorsFlag;
5136   }
5137   if (Data.Priority.getInt())
5138     Flags = Flags | PriorityFlag;
5139   llvm::Value *TaskFlags =
5140       Data.Final.getPointer()
5141           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5142                                      CGF.Builder.getInt32(FinalFlag),
5143                                      CGF.Builder.getInt32(/*C=*/0))
5144           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5145   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5146   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5147   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5148       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5149       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5150           TaskEntry, KmpRoutineEntryPtrTy)};
5151   llvm::Value *NewTask;
5152   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5153     // Check if we have any device clause associated with the directive.
5154     const Expr *Device = nullptr;
5155     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5156       Device = C->getDevice();
5157     // Emit device ID if any otherwise use default value.
5158     llvm::Value *DeviceID;
5159     if (Device)
5160       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5161                                            CGF.Int64Ty, /*isSigned=*/true);
5162     else
5163       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5164     AllocArgs.push_back(DeviceID);
5165     NewTask = CGF.EmitRuntimeCall(
5166       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5167   } else {
5168     NewTask = CGF.EmitRuntimeCall(
5169       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5170   }
5171   llvm::Value *NewTaskNewTaskTTy =
5172       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5173           NewTask, KmpTaskTWithPrivatesPtrTy);
5174   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5175                                                KmpTaskTWithPrivatesQTy);
5176   LValue TDBase =
5177       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5178   // Fill the data in the resulting kmp_task_t record.
5179   // Copy shareds if there are any.
5180   Address KmpTaskSharedsPtr = Address::invalid();
5181   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5182     KmpTaskSharedsPtr =
5183         Address(CGF.EmitLoadOfScalar(
5184                     CGF.EmitLValueForField(
5185                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5186                                            KmpTaskTShareds)),
5187                     Loc),
5188                 CGF.getNaturalTypeAlignment(SharedsTy));
5189     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5190     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5191     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5192   }
5193   // Emit initial values for private copies (if any).
5194   TaskResultTy Result;
5195   if (!Privates.empty()) {
5196     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5197                      SharedsTy, SharedsPtrTy, Data, Privates,
5198                      /*ForDup=*/false);
5199     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5200         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5201       Result.TaskDupFn = emitTaskDupFunction(
5202           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5203           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5204           /*WithLastIter=*/!Data.LastprivateVars.empty());
5205     }
5206   }
5207   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5208   enum { Priority = 0, Destructors = 1 };
5209   // Provide pointer to function with destructors for privates.
5210   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5211   const RecordDecl *KmpCmplrdataUD =
5212       (*FI)->getType()->getAsUnionType()->getDecl();
5213   if (NeedsCleanup) {
5214     llvm::Value *DestructorFn = emitDestructorsFunction(
5215         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5216         KmpTaskTWithPrivatesQTy);
5217     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5218     LValue DestructorsLV = CGF.EmitLValueForField(
5219         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5220     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5221                               DestructorFn, KmpRoutineEntryPtrTy),
5222                           DestructorsLV);
5223   }
5224   // Set priority.
5225   if (Data.Priority.getInt()) {
5226     LValue Data2LV = CGF.EmitLValueForField(
5227         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5228     LValue PriorityLV = CGF.EmitLValueForField(
5229         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5230     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5231   }
5232   Result.NewTask = NewTask;
5233   Result.TaskEntry = TaskEntry;
5234   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5235   Result.TDBase = TDBase;
5236   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5237   return Result;
5238 }
5239 
5240 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5241                                    const OMPExecutableDirective &D,
5242                                    llvm::Function *TaskFunction,
5243                                    QualType SharedsTy, Address Shareds,
5244                                    const Expr *IfCond,
5245                                    const OMPTaskDataTy &Data) {
5246   if (!CGF.HaveInsertPoint())
5247     return;
5248 
5249   TaskResultTy Result =
5250       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5251   llvm::Value *NewTask = Result.NewTask;
5252   llvm::Function *TaskEntry = Result.TaskEntry;
5253   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5254   LValue TDBase = Result.TDBase;
5255   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5256   ASTContext &C = CGM.getContext();
5257   // Process list of dependences.
5258   Address DependenciesArray = Address::invalid();
5259   unsigned NumDependencies = Data.Dependences.size();
5260   if (NumDependencies) {
5261     // Dependence kind for RTL.
5262     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5263     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5264     RecordDecl *KmpDependInfoRD;
5265     QualType FlagsTy =
5266         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5267     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5268     if (KmpDependInfoTy.isNull()) {
5269       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5270       KmpDependInfoRD->startDefinition();
5271       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5272       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5273       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5274       KmpDependInfoRD->completeDefinition();
5275       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5276     } else {
5277       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5278     }
5279     // Define type kmp_depend_info[<Dependences.size()>];
5280     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5281         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5282         ArrayType::Normal, /*IndexTypeQuals=*/0);
5283     // kmp_depend_info[<Dependences.size()>] deps;
5284     DependenciesArray =
5285         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5286     for (unsigned I = 0; I < NumDependencies; ++I) {
5287       const Expr *E = Data.Dependences[I].second;
5288       LValue Addr = CGF.EmitLValue(E);
5289       llvm::Value *Size;
5290       QualType Ty = E->getType();
5291       if (const auto *ASE =
5292               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5293         LValue UpAddrLVal =
5294             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5295         llvm::Value *UpAddr =
5296             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5297         llvm::Value *LowIntPtr =
5298             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5299         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5300         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5301       } else {
5302         Size = CGF.getTypeSize(Ty);
5303       }
5304       LValue Base = CGF.MakeAddrLValue(
5305           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5306           KmpDependInfoTy);
5307       // deps[i].base_addr = &<Dependences[i].second>;
5308       LValue BaseAddrLVal = CGF.EmitLValueForField(
5309           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5310       CGF.EmitStoreOfScalar(
5311           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5312           BaseAddrLVal);
5313       // deps[i].len = sizeof(<Dependences[i].second>);
5314       LValue LenLVal = CGF.EmitLValueForField(
5315           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5316       CGF.EmitStoreOfScalar(Size, LenLVal);
5317       // deps[i].flags = <Dependences[i].first>;
5318       RTLDependenceKindTy DepKind;
5319       switch (Data.Dependences[I].first) {
5320       case OMPC_DEPEND_in:
5321         DepKind = DepIn;
5322         break;
5323       // Out and InOut dependencies must use the same code.
5324       case OMPC_DEPEND_out:
5325       case OMPC_DEPEND_inout:
5326         DepKind = DepInOut;
5327         break;
5328       case OMPC_DEPEND_mutexinoutset:
5329         DepKind = DepMutexInOutSet;
5330         break;
5331       case OMPC_DEPEND_source:
5332       case OMPC_DEPEND_sink:
5333       case OMPC_DEPEND_unknown:
5334         llvm_unreachable("Unknown task dependence type");
5335       }
5336       LValue FlagsLVal = CGF.EmitLValueForField(
5337           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5338       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5339                             FlagsLVal);
5340     }
5341     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5342         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5343   }
5344 
5345   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5346   // libcall.
5347   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5348   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5349   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5350   // list is not empty
5351   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5352   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5353   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5354   llvm::Value *DepTaskArgs[7];
5355   if (NumDependencies) {
5356     DepTaskArgs[0] = UpLoc;
5357     DepTaskArgs[1] = ThreadID;
5358     DepTaskArgs[2] = NewTask;
5359     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5360     DepTaskArgs[4] = DependenciesArray.getPointer();
5361     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5362     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5363   }
5364   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5365                         &TaskArgs,
5366                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5367     if (!Data.Tied) {
5368       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5369       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5370       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5371     }
5372     if (NumDependencies) {
5373       CGF.EmitRuntimeCall(
5374           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5375     } else {
5376       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5377                           TaskArgs);
5378     }
5379     // Check if parent region is untied and build return for untied task;
5380     if (auto *Region =
5381             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5382       Region->emitUntiedSwitch(CGF);
5383   };
5384 
5385   llvm::Value *DepWaitTaskArgs[6];
5386   if (NumDependencies) {
5387     DepWaitTaskArgs[0] = UpLoc;
5388     DepWaitTaskArgs[1] = ThreadID;
5389     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5390     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5391     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5392     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5393   }
5394   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5395                         NumDependencies, &DepWaitTaskArgs,
5396                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5397     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5398     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5399     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5400     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5401     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5402     // is specified.
5403     if (NumDependencies)
5404       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5405                           DepWaitTaskArgs);
5406     // Call proxy_task_entry(gtid, new_task);
5407     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5408                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5409       Action.Enter(CGF);
5410       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5411       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5412                                                           OutlinedFnArgs);
5413     };
5414 
5415     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5416     // kmp_task_t *new_task);
5417     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5418     // kmp_task_t *new_task);
5419     RegionCodeGenTy RCG(CodeGen);
5420     CommonActionTy Action(
5421         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5422         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5423     RCG.setAction(Action);
5424     RCG(CGF);
5425   };
5426 
5427   if (IfCond) {
5428     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5429   } else {
5430     RegionCodeGenTy ThenRCG(ThenCodeGen);
5431     ThenRCG(CGF);
5432   }
5433 }
5434 
5435 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5436                                        const OMPLoopDirective &D,
5437                                        llvm::Function *TaskFunction,
5438                                        QualType SharedsTy, Address Shareds,
5439                                        const Expr *IfCond,
5440                                        const OMPTaskDataTy &Data) {
5441   if (!CGF.HaveInsertPoint())
5442     return;
5443   TaskResultTy Result =
5444       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5445   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5446   // libcall.
5447   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5448   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5449   // sched, kmp_uint64 grainsize, void *task_dup);
5450   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5451   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5452   llvm::Value *IfVal;
5453   if (IfCond) {
5454     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5455                                       /*isSigned=*/true);
5456   } else {
5457     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5458   }
5459 
5460   LValue LBLVal = CGF.EmitLValueForField(
5461       Result.TDBase,
5462       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5463   const auto *LBVar =
5464       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5465   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5466                        /*IsInitializer=*/true);
5467   LValue UBLVal = CGF.EmitLValueForField(
5468       Result.TDBase,
5469       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5470   const auto *UBVar =
5471       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5472   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5473                        /*IsInitializer=*/true);
5474   LValue StLVal = CGF.EmitLValueForField(
5475       Result.TDBase,
5476       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5477   const auto *StVar =
5478       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5479   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5480                        /*IsInitializer=*/true);
5481   // Store reductions address.
5482   LValue RedLVal = CGF.EmitLValueForField(
5483       Result.TDBase,
5484       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5485   if (Data.Reductions) {
5486     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5487   } else {
5488     CGF.EmitNullInitialization(RedLVal.getAddress(),
5489                                CGF.getContext().VoidPtrTy);
5490   }
5491   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5492   llvm::Value *TaskArgs[] = {
5493       UpLoc,
5494       ThreadID,
5495       Result.NewTask,
5496       IfVal,
5497       LBLVal.getPointer(),
5498       UBLVal.getPointer(),
5499       CGF.EmitLoadOfScalar(StLVal, Loc),
5500       llvm::ConstantInt::getSigned(
5501               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5502       llvm::ConstantInt::getSigned(
5503           CGF.IntTy, Data.Schedule.getPointer()
5504                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5505                          : NoSchedule),
5506       Data.Schedule.getPointer()
5507           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5508                                       /*isSigned=*/false)
5509           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5510       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5511                              Result.TaskDupFn, CGF.VoidPtrTy)
5512                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5513   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5514 }
5515 
5516 /// Emit reduction operation for each element of array (required for
5517 /// array sections) LHS op = RHS.
5518 /// \param Type Type of array.
5519 /// \param LHSVar Variable on the left side of the reduction operation
5520 /// (references element of array in original variable).
5521 /// \param RHSVar Variable on the right side of the reduction operation
5522 /// (references element of array in original variable).
5523 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5524 /// RHSVar.
5525 static void EmitOMPAggregateReduction(
5526     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5527     const VarDecl *RHSVar,
5528     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5529                                   const Expr *, const Expr *)> &RedOpGen,
5530     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5531     const Expr *UpExpr = nullptr) {
5532   // Perform element-by-element initialization.
5533   QualType ElementTy;
5534   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5535   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5536 
5537   // Drill down to the base element type on both arrays.
5538   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5539   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5540 
5541   llvm::Value *RHSBegin = RHSAddr.getPointer();
5542   llvm::Value *LHSBegin = LHSAddr.getPointer();
5543   // Cast from pointer to array type to pointer to single element.
5544   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5545   // The basic structure here is a while-do loop.
5546   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5547   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5548   llvm::Value *IsEmpty =
5549       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5550   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5551 
5552   // Enter the loop body, making that address the current address.
5553   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5554   CGF.EmitBlock(BodyBB);
5555 
5556   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5557 
5558   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5559       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5560   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5561   Address RHSElementCurrent =
5562       Address(RHSElementPHI,
5563               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5564 
5565   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5566       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5567   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5568   Address LHSElementCurrent =
5569       Address(LHSElementPHI,
5570               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5571 
5572   // Emit copy.
5573   CodeGenFunction::OMPPrivateScope Scope(CGF);
5574   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5575   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5576   Scope.Privatize();
5577   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5578   Scope.ForceCleanup();
5579 
5580   // Shift the address forward by one element.
5581   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5582       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5583   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5584       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5585   // Check whether we've reached the end.
5586   llvm::Value *Done =
5587       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5588   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5589   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5590   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5591 
5592   // Done.
5593   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5594 }
5595 
5596 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5597 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5598 /// UDR combiner function.
5599 static void emitReductionCombiner(CodeGenFunction &CGF,
5600                                   const Expr *ReductionOp) {
5601   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5602     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5603       if (const auto *DRE =
5604               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5605         if (const auto *DRD =
5606                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5607           std::pair<llvm::Function *, llvm::Function *> Reduction =
5608               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5609           RValue Func = RValue::get(Reduction.first);
5610           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5611           CGF.EmitIgnoredExpr(ReductionOp);
5612           return;
5613         }
5614   CGF.EmitIgnoredExpr(ReductionOp);
5615 }
5616 
5617 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5618     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5619     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5620     ArrayRef<const Expr *> ReductionOps) {
5621   ASTContext &C = CGM.getContext();
5622 
5623   // void reduction_func(void *LHSArg, void *RHSArg);
5624   FunctionArgList Args;
5625   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5626                            ImplicitParamDecl::Other);
5627   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5628                            ImplicitParamDecl::Other);
5629   Args.push_back(&LHSArg);
5630   Args.push_back(&RHSArg);
5631   const auto &CGFI =
5632       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5633   std::string Name = getName({"omp", "reduction", "reduction_func"});
5634   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5635                                     llvm::GlobalValue::InternalLinkage, Name,
5636                                     &CGM.getModule());
5637   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5638   Fn->setDoesNotRecurse();
5639   CodeGenFunction CGF(CGM);
5640   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5641 
5642   // Dst = (void*[n])(LHSArg);
5643   // Src = (void*[n])(RHSArg);
5644   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5645       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5646       ArgsType), CGF.getPointerAlign());
5647   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5648       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5649       ArgsType), CGF.getPointerAlign());
5650 
5651   //  ...
5652   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5653   //  ...
5654   CodeGenFunction::OMPPrivateScope Scope(CGF);
5655   auto IPriv = Privates.begin();
5656   unsigned Idx = 0;
5657   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5658     const auto *RHSVar =
5659         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5660     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5661       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5662     });
5663     const auto *LHSVar =
5664         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5665     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5666       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5667     });
5668     QualType PrivTy = (*IPriv)->getType();
5669     if (PrivTy->isVariablyModifiedType()) {
5670       // Get array size and emit VLA type.
5671       ++Idx;
5672       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5673       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5674       const VariableArrayType *VLA =
5675           CGF.getContext().getAsVariableArrayType(PrivTy);
5676       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5677       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5678           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5679       CGF.EmitVariablyModifiedType(PrivTy);
5680     }
5681   }
5682   Scope.Privatize();
5683   IPriv = Privates.begin();
5684   auto ILHS = LHSExprs.begin();
5685   auto IRHS = RHSExprs.begin();
5686   for (const Expr *E : ReductionOps) {
5687     if ((*IPriv)->getType()->isArrayType()) {
5688       // Emit reduction for array section.
5689       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5690       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5691       EmitOMPAggregateReduction(
5692           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5693           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5694             emitReductionCombiner(CGF, E);
5695           });
5696     } else {
5697       // Emit reduction for array subscript or single variable.
5698       emitReductionCombiner(CGF, E);
5699     }
5700     ++IPriv;
5701     ++ILHS;
5702     ++IRHS;
5703   }
5704   Scope.ForceCleanup();
5705   CGF.FinishFunction();
5706   return Fn;
5707 }
5708 
5709 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5710                                                   const Expr *ReductionOp,
5711                                                   const Expr *PrivateRef,
5712                                                   const DeclRefExpr *LHS,
5713                                                   const DeclRefExpr *RHS) {
5714   if (PrivateRef->getType()->isArrayType()) {
5715     // Emit reduction for array section.
5716     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5717     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5718     EmitOMPAggregateReduction(
5719         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5720         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5721           emitReductionCombiner(CGF, ReductionOp);
5722         });
5723   } else {
5724     // Emit reduction for array subscript or single variable.
5725     emitReductionCombiner(CGF, ReductionOp);
5726   }
5727 }
5728 
5729 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5730                                     ArrayRef<const Expr *> Privates,
5731                                     ArrayRef<const Expr *> LHSExprs,
5732                                     ArrayRef<const Expr *> RHSExprs,
5733                                     ArrayRef<const Expr *> ReductionOps,
5734                                     ReductionOptionsTy Options) {
5735   if (!CGF.HaveInsertPoint())
5736     return;
5737 
5738   bool WithNowait = Options.WithNowait;
5739   bool SimpleReduction = Options.SimpleReduction;
5740 
5741   // Next code should be emitted for reduction:
5742   //
5743   // static kmp_critical_name lock = { 0 };
5744   //
5745   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5746   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5747   //  ...
5748   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5749   //  *(Type<n>-1*)rhs[<n>-1]);
5750   // }
5751   //
5752   // ...
5753   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5754   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5755   // RedList, reduce_func, &<lock>)) {
5756   // case 1:
5757   //  ...
5758   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5759   //  ...
5760   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5761   // break;
5762   // case 2:
5763   //  ...
5764   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5765   //  ...
5766   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5767   // break;
5768   // default:;
5769   // }
5770   //
5771   // if SimpleReduction is true, only the next code is generated:
5772   //  ...
5773   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5774   //  ...
5775 
5776   ASTContext &C = CGM.getContext();
5777 
5778   if (SimpleReduction) {
5779     CodeGenFunction::RunCleanupsScope Scope(CGF);
5780     auto IPriv = Privates.begin();
5781     auto ILHS = LHSExprs.begin();
5782     auto IRHS = RHSExprs.begin();
5783     for (const Expr *E : ReductionOps) {
5784       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5785                                   cast<DeclRefExpr>(*IRHS));
5786       ++IPriv;
5787       ++ILHS;
5788       ++IRHS;
5789     }
5790     return;
5791   }
5792 
5793   // 1. Build a list of reduction variables.
5794   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5795   auto Size = RHSExprs.size();
5796   for (const Expr *E : Privates) {
5797     if (E->getType()->isVariablyModifiedType())
5798       // Reserve place for array size.
5799       ++Size;
5800   }
5801   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5802   QualType ReductionArrayTy =
5803       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5804                              /*IndexTypeQuals=*/0);
5805   Address ReductionList =
5806       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5807   auto IPriv = Privates.begin();
5808   unsigned Idx = 0;
5809   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5810     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5811     CGF.Builder.CreateStore(
5812         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5813             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5814         Elem);
5815     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5816       // Store array size.
5817       ++Idx;
5818       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5819       llvm::Value *Size = CGF.Builder.CreateIntCast(
5820           CGF.getVLASize(
5821                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5822               .NumElts,
5823           CGF.SizeTy, /*isSigned=*/false);
5824       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5825                               Elem);
5826     }
5827   }
5828 
5829   // 2. Emit reduce_func().
5830   llvm::Function *ReductionFn = emitReductionFunction(
5831       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5832       LHSExprs, RHSExprs, ReductionOps);
5833 
5834   // 3. Create static kmp_critical_name lock = { 0 };
5835   std::string Name = getName({"reduction"});
5836   llvm::Value *Lock = getCriticalRegionLock(Name);
5837 
5838   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5839   // RedList, reduce_func, &<lock>);
5840   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5841   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5842   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5843   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5844       ReductionList.getPointer(), CGF.VoidPtrTy);
5845   llvm::Value *Args[] = {
5846       IdentTLoc,                             // ident_t *<loc>
5847       ThreadId,                              // i32 <gtid>
5848       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5849       ReductionArrayTySize,                  // size_type sizeof(RedList)
5850       RL,                                    // void *RedList
5851       ReductionFn, // void (*) (void *, void *) <reduce_func>
5852       Lock         // kmp_critical_name *&<lock>
5853   };
5854   llvm::Value *Res = CGF.EmitRuntimeCall(
5855       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5856                                        : OMPRTL__kmpc_reduce),
5857       Args);
5858 
5859   // 5. Build switch(res)
5860   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5861   llvm::SwitchInst *SwInst =
5862       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5863 
5864   // 6. Build case 1:
5865   //  ...
5866   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5867   //  ...
5868   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5869   // break;
5870   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5871   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5872   CGF.EmitBlock(Case1BB);
5873 
5874   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5875   llvm::Value *EndArgs[] = {
5876       IdentTLoc, // ident_t *<loc>
5877       ThreadId,  // i32 <gtid>
5878       Lock       // kmp_critical_name *&<lock>
5879   };
5880   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5881                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5882     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5883     auto IPriv = Privates.begin();
5884     auto ILHS = LHSExprs.begin();
5885     auto IRHS = RHSExprs.begin();
5886     for (const Expr *E : ReductionOps) {
5887       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5888                                      cast<DeclRefExpr>(*IRHS));
5889       ++IPriv;
5890       ++ILHS;
5891       ++IRHS;
5892     }
5893   };
5894   RegionCodeGenTy RCG(CodeGen);
5895   CommonActionTy Action(
5896       nullptr, llvm::None,
5897       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5898                                        : OMPRTL__kmpc_end_reduce),
5899       EndArgs);
5900   RCG.setAction(Action);
5901   RCG(CGF);
5902 
5903   CGF.EmitBranch(DefaultBB);
5904 
5905   // 7. Build case 2:
5906   //  ...
5907   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5908   //  ...
5909   // break;
5910   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5911   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5912   CGF.EmitBlock(Case2BB);
5913 
5914   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5915                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5916     auto ILHS = LHSExprs.begin();
5917     auto IRHS = RHSExprs.begin();
5918     auto IPriv = Privates.begin();
5919     for (const Expr *E : ReductionOps) {
5920       const Expr *XExpr = nullptr;
5921       const Expr *EExpr = nullptr;
5922       const Expr *UpExpr = nullptr;
5923       BinaryOperatorKind BO = BO_Comma;
5924       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5925         if (BO->getOpcode() == BO_Assign) {
5926           XExpr = BO->getLHS();
5927           UpExpr = BO->getRHS();
5928         }
5929       }
5930       // Try to emit update expression as a simple atomic.
5931       const Expr *RHSExpr = UpExpr;
5932       if (RHSExpr) {
5933         // Analyze RHS part of the whole expression.
5934         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5935                 RHSExpr->IgnoreParenImpCasts())) {
5936           // If this is a conditional operator, analyze its condition for
5937           // min/max reduction operator.
5938           RHSExpr = ACO->getCond();
5939         }
5940         if (const auto *BORHS =
5941                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5942           EExpr = BORHS->getRHS();
5943           BO = BORHS->getOpcode();
5944         }
5945       }
5946       if (XExpr) {
5947         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5948         auto &&AtomicRedGen = [BO, VD,
5949                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5950                                     const Expr *EExpr, const Expr *UpExpr) {
5951           LValue X = CGF.EmitLValue(XExpr);
5952           RValue E;
5953           if (EExpr)
5954             E = CGF.EmitAnyExpr(EExpr);
5955           CGF.EmitOMPAtomicSimpleUpdateExpr(
5956               X, E, BO, /*IsXLHSInRHSPart=*/true,
5957               llvm::AtomicOrdering::Monotonic, Loc,
5958               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5959                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5960                 PrivateScope.addPrivate(
5961                     VD, [&CGF, VD, XRValue, Loc]() {
5962                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5963                       CGF.emitOMPSimpleStore(
5964                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5965                           VD->getType().getNonReferenceType(), Loc);
5966                       return LHSTemp;
5967                     });
5968                 (void)PrivateScope.Privatize();
5969                 return CGF.EmitAnyExpr(UpExpr);
5970               });
5971         };
5972         if ((*IPriv)->getType()->isArrayType()) {
5973           // Emit atomic reduction for array section.
5974           const auto *RHSVar =
5975               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5976           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5977                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5978         } else {
5979           // Emit atomic reduction for array subscript or single variable.
5980           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5981         }
5982       } else {
5983         // Emit as a critical region.
5984         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5985                                            const Expr *, const Expr *) {
5986           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5987           std::string Name = RT.getName({"atomic_reduction"});
5988           RT.emitCriticalRegion(
5989               CGF, Name,
5990               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5991                 Action.Enter(CGF);
5992                 emitReductionCombiner(CGF, E);
5993               },
5994               Loc);
5995         };
5996         if ((*IPriv)->getType()->isArrayType()) {
5997           const auto *LHSVar =
5998               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5999           const auto *RHSVar =
6000               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
6001           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
6002                                     CritRedGen);
6003         } else {
6004           CritRedGen(CGF, nullptr, nullptr, nullptr);
6005         }
6006       }
6007       ++ILHS;
6008       ++IRHS;
6009       ++IPriv;
6010     }
6011   };
6012   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
6013   if (!WithNowait) {
6014     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
6015     llvm::Value *EndArgs[] = {
6016         IdentTLoc, // ident_t *<loc>
6017         ThreadId,  // i32 <gtid>
6018         Lock       // kmp_critical_name *&<lock>
6019     };
6020     CommonActionTy Action(nullptr, llvm::None,
6021                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
6022                           EndArgs);
6023     AtomicRCG.setAction(Action);
6024     AtomicRCG(CGF);
6025   } else {
6026     AtomicRCG(CGF);
6027   }
6028 
6029   CGF.EmitBranch(DefaultBB);
6030   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
6031 }
6032 
6033 /// Generates unique name for artificial threadprivate variables.
6034 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
6035 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6036                                       const Expr *Ref) {
6037   SmallString<256> Buffer;
6038   llvm::raw_svector_ostream Out(Buffer);
6039   const clang::DeclRefExpr *DE;
6040   const VarDecl *D = ::getBaseDecl(Ref, DE);
6041   if (!D)
6042     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6043   D = D->getCanonicalDecl();
6044   std::string Name = CGM.getOpenMPRuntime().getName(
6045       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6046   Out << Prefix << Name << "_"
6047       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6048   return Out.str();
6049 }
6050 
6051 /// Emits reduction initializer function:
6052 /// \code
6053 /// void @.red_init(void* %arg) {
6054 /// %0 = bitcast void* %arg to <type>*
6055 /// store <type> <init>, <type>* %0
6056 /// ret void
6057 /// }
6058 /// \endcode
6059 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6060                                            SourceLocation Loc,
6061                                            ReductionCodeGen &RCG, unsigned N) {
6062   ASTContext &C = CGM.getContext();
6063   FunctionArgList Args;
6064   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6065                           ImplicitParamDecl::Other);
6066   Args.emplace_back(&Param);
6067   const auto &FnInfo =
6068       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6069   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6070   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6071   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6072                                     Name, &CGM.getModule());
6073   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6074   Fn->setDoesNotRecurse();
6075   CodeGenFunction CGF(CGM);
6076   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6077   Address PrivateAddr = CGF.EmitLoadOfPointer(
6078       CGF.GetAddrOfLocalVar(&Param),
6079       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6080   llvm::Value *Size = nullptr;
6081   // If the size of the reduction item is non-constant, load it from global
6082   // threadprivate variable.
6083   if (RCG.getSizes(N).second) {
6084     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6085         CGF, CGM.getContext().getSizeType(),
6086         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6087     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6088                                 CGM.getContext().getSizeType(), Loc);
6089   }
6090   RCG.emitAggregateType(CGF, N, Size);
6091   LValue SharedLVal;
6092   // If initializer uses initializer from declare reduction construct, emit a
6093   // pointer to the address of the original reduction item (reuired by reduction
6094   // initializer)
6095   if (RCG.usesReductionInitializer(N)) {
6096     Address SharedAddr =
6097         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6098             CGF, CGM.getContext().VoidPtrTy,
6099             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6100     SharedAddr = CGF.EmitLoadOfPointer(
6101         SharedAddr,
6102         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6103     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6104   } else {
6105     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6106         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6107         CGM.getContext().VoidPtrTy);
6108   }
6109   // Emit the initializer:
6110   // %0 = bitcast void* %arg to <type>*
6111   // store <type> <init>, <type>* %0
6112   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6113                          [](CodeGenFunction &) { return false; });
6114   CGF.FinishFunction();
6115   return Fn;
6116 }
6117 
6118 /// Emits reduction combiner function:
6119 /// \code
6120 /// void @.red_comb(void* %arg0, void* %arg1) {
6121 /// %lhs = bitcast void* %arg0 to <type>*
6122 /// %rhs = bitcast void* %arg1 to <type>*
6123 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6124 /// store <type> %2, <type>* %lhs
6125 /// ret void
6126 /// }
6127 /// \endcode
6128 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6129                                            SourceLocation Loc,
6130                                            ReductionCodeGen &RCG, unsigned N,
6131                                            const Expr *ReductionOp,
6132                                            const Expr *LHS, const Expr *RHS,
6133                                            const Expr *PrivateRef) {
6134   ASTContext &C = CGM.getContext();
6135   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6136   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6137   FunctionArgList Args;
6138   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6139                                C.VoidPtrTy, ImplicitParamDecl::Other);
6140   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6141                             ImplicitParamDecl::Other);
6142   Args.emplace_back(&ParamInOut);
6143   Args.emplace_back(&ParamIn);
6144   const auto &FnInfo =
6145       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6146   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6147   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6148   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6149                                     Name, &CGM.getModule());
6150   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6151   Fn->setDoesNotRecurse();
6152   CodeGenFunction CGF(CGM);
6153   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6154   llvm::Value *Size = nullptr;
6155   // If the size of the reduction item is non-constant, load it from global
6156   // threadprivate variable.
6157   if (RCG.getSizes(N).second) {
6158     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6159         CGF, CGM.getContext().getSizeType(),
6160         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6161     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6162                                 CGM.getContext().getSizeType(), Loc);
6163   }
6164   RCG.emitAggregateType(CGF, N, Size);
6165   // Remap lhs and rhs variables to the addresses of the function arguments.
6166   // %lhs = bitcast void* %arg0 to <type>*
6167   // %rhs = bitcast void* %arg1 to <type>*
6168   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6169   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6170     // Pull out the pointer to the variable.
6171     Address PtrAddr = CGF.EmitLoadOfPointer(
6172         CGF.GetAddrOfLocalVar(&ParamInOut),
6173         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6174     return CGF.Builder.CreateElementBitCast(
6175         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6176   });
6177   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6178     // Pull out the pointer to the variable.
6179     Address PtrAddr = CGF.EmitLoadOfPointer(
6180         CGF.GetAddrOfLocalVar(&ParamIn),
6181         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6182     return CGF.Builder.CreateElementBitCast(
6183         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6184   });
6185   PrivateScope.Privatize();
6186   // Emit the combiner body:
6187   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6188   // store <type> %2, <type>* %lhs
6189   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6190       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6191       cast<DeclRefExpr>(RHS));
6192   CGF.FinishFunction();
6193   return Fn;
6194 }
6195 
6196 /// Emits reduction finalizer function:
6197 /// \code
6198 /// void @.red_fini(void* %arg) {
6199 /// %0 = bitcast void* %arg to <type>*
6200 /// <destroy>(<type>* %0)
6201 /// ret void
6202 /// }
6203 /// \endcode
6204 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6205                                            SourceLocation Loc,
6206                                            ReductionCodeGen &RCG, unsigned N) {
6207   if (!RCG.needCleanups(N))
6208     return nullptr;
6209   ASTContext &C = CGM.getContext();
6210   FunctionArgList Args;
6211   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6212                           ImplicitParamDecl::Other);
6213   Args.emplace_back(&Param);
6214   const auto &FnInfo =
6215       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6216   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6217   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6218   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6219                                     Name, &CGM.getModule());
6220   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6221   Fn->setDoesNotRecurse();
6222   CodeGenFunction CGF(CGM);
6223   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6224   Address PrivateAddr = CGF.EmitLoadOfPointer(
6225       CGF.GetAddrOfLocalVar(&Param),
6226       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6227   llvm::Value *Size = nullptr;
6228   // If the size of the reduction item is non-constant, load it from global
6229   // threadprivate variable.
6230   if (RCG.getSizes(N).second) {
6231     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6232         CGF, CGM.getContext().getSizeType(),
6233         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6234     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6235                                 CGM.getContext().getSizeType(), Loc);
6236   }
6237   RCG.emitAggregateType(CGF, N, Size);
6238   // Emit the finalizer body:
6239   // <destroy>(<type>* %0)
6240   RCG.emitCleanups(CGF, N, PrivateAddr);
6241   CGF.FinishFunction();
6242   return Fn;
6243 }
6244 
6245 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6246     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6247     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6248   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6249     return nullptr;
6250 
6251   // Build typedef struct:
6252   // kmp_task_red_input {
6253   //   void *reduce_shar; // shared reduction item
6254   //   size_t reduce_size; // size of data item
6255   //   void *reduce_init; // data initialization routine
6256   //   void *reduce_fini; // data finalization routine
6257   //   void *reduce_comb; // data combiner routine
6258   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6259   // } kmp_task_red_input_t;
6260   ASTContext &C = CGM.getContext();
6261   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6262   RD->startDefinition();
6263   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6264   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6265   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6266   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6267   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6268   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6269       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6270   RD->completeDefinition();
6271   QualType RDType = C.getRecordType(RD);
6272   unsigned Size = Data.ReductionVars.size();
6273   llvm::APInt ArraySize(/*numBits=*/64, Size);
6274   QualType ArrayRDType = C.getConstantArrayType(
6275       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6276   // kmp_task_red_input_t .rd_input.[Size];
6277   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6278   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6279                        Data.ReductionOps);
6280   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6281     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6282     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6283                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6284     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6285         TaskRedInput.getPointer(), Idxs,
6286         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6287         ".rd_input.gep.");
6288     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6289     // ElemLVal.reduce_shar = &Shareds[Cnt];
6290     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6291     RCG.emitSharedLValue(CGF, Cnt);
6292     llvm::Value *CastedShared =
6293         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6294     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6295     RCG.emitAggregateType(CGF, Cnt);
6296     llvm::Value *SizeValInChars;
6297     llvm::Value *SizeVal;
6298     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6299     // We use delayed creation/initialization for VLAs, array sections and
6300     // custom reduction initializations. It is required because runtime does not
6301     // provide the way to pass the sizes of VLAs/array sections to
6302     // initializer/combiner/finalizer functions and does not pass the pointer to
6303     // original reduction item to the initializer. Instead threadprivate global
6304     // variables are used to store these values and use them in the functions.
6305     bool DelayedCreation = !!SizeVal;
6306     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6307                                                /*isSigned=*/false);
6308     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6309     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6310     // ElemLVal.reduce_init = init;
6311     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6312     llvm::Value *InitAddr =
6313         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6314     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6315     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6316     // ElemLVal.reduce_fini = fini;
6317     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6318     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6319     llvm::Value *FiniAddr = Fini
6320                                 ? CGF.EmitCastToVoidPtr(Fini)
6321                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6322     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6323     // ElemLVal.reduce_comb = comb;
6324     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6325     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6326         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6327         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6328     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6329     // ElemLVal.flags = 0;
6330     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6331     if (DelayedCreation) {
6332       CGF.EmitStoreOfScalar(
6333           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6334           FlagsLVal);
6335     } else
6336       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6337   }
6338   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6339   // *data);
6340   llvm::Value *Args[] = {
6341       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6342                                 /*isSigned=*/true),
6343       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6344       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6345                                                       CGM.VoidPtrTy)};
6346   return CGF.EmitRuntimeCall(
6347       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6348 }
6349 
6350 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6351                                               SourceLocation Loc,
6352                                               ReductionCodeGen &RCG,
6353                                               unsigned N) {
6354   auto Sizes = RCG.getSizes(N);
6355   // Emit threadprivate global variable if the type is non-constant
6356   // (Sizes.second = nullptr).
6357   if (Sizes.second) {
6358     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6359                                                      /*isSigned=*/false);
6360     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6361         CGF, CGM.getContext().getSizeType(),
6362         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6363     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6364   }
6365   // Store address of the original reduction item if custom initializer is used.
6366   if (RCG.usesReductionInitializer(N)) {
6367     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6368         CGF, CGM.getContext().VoidPtrTy,
6369         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6370     CGF.Builder.CreateStore(
6371         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6372             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6373         SharedAddr, /*IsVolatile=*/false);
6374   }
6375 }
6376 
6377 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6378                                               SourceLocation Loc,
6379                                               llvm::Value *ReductionsPtr,
6380                                               LValue SharedLVal) {
6381   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6382   // *d);
6383   llvm::Value *Args[] = {
6384       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6385                                 /*isSigned=*/true),
6386       ReductionsPtr,
6387       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6388                                                       CGM.VoidPtrTy)};
6389   return Address(
6390       CGF.EmitRuntimeCall(
6391           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6392       SharedLVal.getAlignment());
6393 }
6394 
6395 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6396                                        SourceLocation Loc) {
6397   if (!CGF.HaveInsertPoint())
6398     return;
6399   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6400   // global_tid);
6401   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6402   // Ignore return result until untied tasks are supported.
6403   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6404   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6405     Region->emitUntiedSwitch(CGF);
6406 }
6407 
6408 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6409                                            OpenMPDirectiveKind InnerKind,
6410                                            const RegionCodeGenTy &CodeGen,
6411                                            bool HasCancel) {
6412   if (!CGF.HaveInsertPoint())
6413     return;
6414   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6415   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6416 }
6417 
6418 namespace {
6419 enum RTCancelKind {
6420   CancelNoreq = 0,
6421   CancelParallel = 1,
6422   CancelLoop = 2,
6423   CancelSections = 3,
6424   CancelTaskgroup = 4
6425 };
6426 } // anonymous namespace
6427 
6428 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6429   RTCancelKind CancelKind = CancelNoreq;
6430   if (CancelRegion == OMPD_parallel)
6431     CancelKind = CancelParallel;
6432   else if (CancelRegion == OMPD_for)
6433     CancelKind = CancelLoop;
6434   else if (CancelRegion == OMPD_sections)
6435     CancelKind = CancelSections;
6436   else {
6437     assert(CancelRegion == OMPD_taskgroup);
6438     CancelKind = CancelTaskgroup;
6439   }
6440   return CancelKind;
6441 }
6442 
6443 void CGOpenMPRuntime::emitCancellationPointCall(
6444     CodeGenFunction &CGF, SourceLocation Loc,
6445     OpenMPDirectiveKind CancelRegion) {
6446   if (!CGF.HaveInsertPoint())
6447     return;
6448   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6449   // global_tid, kmp_int32 cncl_kind);
6450   if (auto *OMPRegionInfo =
6451           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6452     // For 'cancellation point taskgroup', the task region info may not have a
6453     // cancel. This may instead happen in another adjacent task.
6454     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6455       llvm::Value *Args[] = {
6456           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6457           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6458       // Ignore return result until untied tasks are supported.
6459       llvm::Value *Result = CGF.EmitRuntimeCall(
6460           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6461       // if (__kmpc_cancellationpoint()) {
6462       //   exit from construct;
6463       // }
6464       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6465       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6466       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6467       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6468       CGF.EmitBlock(ExitBB);
6469       // exit from construct;
6470       CodeGenFunction::JumpDest CancelDest =
6471           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6472       CGF.EmitBranchThroughCleanup(CancelDest);
6473       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6474     }
6475   }
6476 }
6477 
6478 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6479                                      const Expr *IfCond,
6480                                      OpenMPDirectiveKind CancelRegion) {
6481   if (!CGF.HaveInsertPoint())
6482     return;
6483   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6484   // kmp_int32 cncl_kind);
6485   if (auto *OMPRegionInfo =
6486           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6487     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6488                                                         PrePostActionTy &) {
6489       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6490       llvm::Value *Args[] = {
6491           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6492           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6493       // Ignore return result until untied tasks are supported.
6494       llvm::Value *Result = CGF.EmitRuntimeCall(
6495           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6496       // if (__kmpc_cancel()) {
6497       //   exit from construct;
6498       // }
6499       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6500       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6501       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6502       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6503       CGF.EmitBlock(ExitBB);
6504       // exit from construct;
6505       CodeGenFunction::JumpDest CancelDest =
6506           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6507       CGF.EmitBranchThroughCleanup(CancelDest);
6508       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6509     };
6510     if (IfCond) {
6511       emitOMPIfClause(CGF, IfCond, ThenGen,
6512                       [](CodeGenFunction &, PrePostActionTy &) {});
6513     } else {
6514       RegionCodeGenTy ThenRCG(ThenGen);
6515       ThenRCG(CGF);
6516     }
6517   }
6518 }
6519 
6520 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6521     const OMPExecutableDirective &D, StringRef ParentName,
6522     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6523     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6524   assert(!ParentName.empty() && "Invalid target region parent name!");
6525   HasEmittedTargetRegion = true;
6526   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6527                                    IsOffloadEntry, CodeGen);
6528 }
6529 
6530 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6531     const OMPExecutableDirective &D, StringRef ParentName,
6532     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6533     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6534   // Create a unique name for the entry function using the source location
6535   // information of the current target region. The name will be something like:
6536   //
6537   // __omp_offloading_DD_FFFF_PP_lBB
6538   //
6539   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6540   // mangled name of the function that encloses the target region and BB is the
6541   // line number of the target region.
6542 
6543   unsigned DeviceID;
6544   unsigned FileID;
6545   unsigned Line;
6546   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6547                            Line);
6548   SmallString<64> EntryFnName;
6549   {
6550     llvm::raw_svector_ostream OS(EntryFnName);
6551     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6552        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6553   }
6554 
6555   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6556 
6557   CodeGenFunction CGF(CGM, true);
6558   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6559   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6560 
6561   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6562 
6563   // If this target outline function is not an offload entry, we don't need to
6564   // register it.
6565   if (!IsOffloadEntry)
6566     return;
6567 
6568   // The target region ID is used by the runtime library to identify the current
6569   // target region, so it only has to be unique and not necessarily point to
6570   // anything. It could be the pointer to the outlined function that implements
6571   // the target region, but we aren't using that so that the compiler doesn't
6572   // need to keep that, and could therefore inline the host function if proven
6573   // worthwhile during optimization. In the other hand, if emitting code for the
6574   // device, the ID has to be the function address so that it can retrieved from
6575   // the offloading entry and launched by the runtime library. We also mark the
6576   // outlined function to have external linkage in case we are emitting code for
6577   // the device, because these functions will be entry points to the device.
6578 
6579   if (CGM.getLangOpts().OpenMPIsDevice) {
6580     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6581     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6582     OutlinedFn->setDSOLocal(false);
6583   } else {
6584     std::string Name = getName({EntryFnName, "region_id"});
6585     OutlinedFnID = new llvm::GlobalVariable(
6586         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6587         llvm::GlobalValue::WeakAnyLinkage,
6588         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6589   }
6590 
6591   // Register the information for the entry associated with this target region.
6592   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6593       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6594       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6595 }
6596 
6597 /// Checks if the expression is constant or does not have non-trivial function
6598 /// calls.
6599 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6600   // We can skip constant expressions.
6601   // We can skip expressions with trivial calls or simple expressions.
6602   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6603           !E->hasNonTrivialCall(Ctx)) &&
6604          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6605 }
6606 
6607 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6608                                                     const Stmt *Body) {
6609   const Stmt *Child = Body->IgnoreContainers();
6610   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6611     Child = nullptr;
6612     for (const Stmt *S : C->body()) {
6613       if (const auto *E = dyn_cast<Expr>(S)) {
6614         if (isTrivial(Ctx, E))
6615           continue;
6616       }
6617       // Some of the statements can be ignored.
6618       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6619           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6620         continue;
6621       // Analyze declarations.
6622       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6623         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6624               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6625                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6626                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6627                   isa<UsingDirectiveDecl>(D) ||
6628                   isa<OMPDeclareReductionDecl>(D) ||
6629                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6630                 return true;
6631               const auto *VD = dyn_cast<VarDecl>(D);
6632               if (!VD)
6633                 return false;
6634               return VD->isConstexpr() ||
6635                      ((VD->getType().isTrivialType(Ctx) ||
6636                        VD->getType()->isReferenceType()) &&
6637                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6638             }))
6639           continue;
6640       }
6641       // Found multiple children - cannot get the one child only.
6642       if (Child)
6643         return nullptr;
6644       Child = S;
6645     }
6646     if (Child)
6647       Child = Child->IgnoreContainers();
6648   }
6649   return Child;
6650 }
6651 
6652 /// Emit the number of teams for a target directive.  Inspect the num_teams
6653 /// clause associated with a teams construct combined or closely nested
6654 /// with the target directive.
6655 ///
6656 /// Emit a team of size one for directives such as 'target parallel' that
6657 /// have no associated teams construct.
6658 ///
6659 /// Otherwise, return nullptr.
6660 static llvm::Value *
6661 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6662                                const OMPExecutableDirective &D) {
6663   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6664          "Clauses associated with the teams directive expected to be emitted "
6665          "only for the host!");
6666   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6667   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6668          "Expected target-based executable directive.");
6669   CGBuilderTy &Bld = CGF.Builder;
6670   switch (DirectiveKind) {
6671   case OMPD_target: {
6672     const auto *CS = D.getInnermostCapturedStmt();
6673     const auto *Body =
6674         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6675     const Stmt *ChildStmt =
6676         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6677     if (const auto *NestedDir =
6678             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6679       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6680         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6681           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6682           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6683           const Expr *NumTeams =
6684               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6685           llvm::Value *NumTeamsVal =
6686               CGF.EmitScalarExpr(NumTeams,
6687                                  /*IgnoreResultAssign*/ true);
6688           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6689                                    /*isSigned=*/true);
6690         }
6691         return Bld.getInt32(0);
6692       }
6693       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6694           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6695         return Bld.getInt32(1);
6696       return Bld.getInt32(0);
6697     }
6698     return nullptr;
6699   }
6700   case OMPD_target_teams:
6701   case OMPD_target_teams_distribute:
6702   case OMPD_target_teams_distribute_simd:
6703   case OMPD_target_teams_distribute_parallel_for:
6704   case OMPD_target_teams_distribute_parallel_for_simd: {
6705     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6706       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6707       const Expr *NumTeams =
6708           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6709       llvm::Value *NumTeamsVal =
6710           CGF.EmitScalarExpr(NumTeams,
6711                              /*IgnoreResultAssign*/ true);
6712       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6713                                /*isSigned=*/true);
6714     }
6715     return Bld.getInt32(0);
6716   }
6717   case OMPD_target_parallel:
6718   case OMPD_target_parallel_for:
6719   case OMPD_target_parallel_for_simd:
6720   case OMPD_target_simd:
6721     return Bld.getInt32(1);
6722   case OMPD_parallel:
6723   case OMPD_for:
6724   case OMPD_parallel_for:
6725   case OMPD_parallel_sections:
6726   case OMPD_for_simd:
6727   case OMPD_parallel_for_simd:
6728   case OMPD_cancel:
6729   case OMPD_cancellation_point:
6730   case OMPD_ordered:
6731   case OMPD_threadprivate:
6732   case OMPD_allocate:
6733   case OMPD_task:
6734   case OMPD_simd:
6735   case OMPD_sections:
6736   case OMPD_section:
6737   case OMPD_single:
6738   case OMPD_master:
6739   case OMPD_critical:
6740   case OMPD_taskyield:
6741   case OMPD_barrier:
6742   case OMPD_taskwait:
6743   case OMPD_taskgroup:
6744   case OMPD_atomic:
6745   case OMPD_flush:
6746   case OMPD_teams:
6747   case OMPD_target_data:
6748   case OMPD_target_exit_data:
6749   case OMPD_target_enter_data:
6750   case OMPD_distribute:
6751   case OMPD_distribute_simd:
6752   case OMPD_distribute_parallel_for:
6753   case OMPD_distribute_parallel_for_simd:
6754   case OMPD_teams_distribute:
6755   case OMPD_teams_distribute_simd:
6756   case OMPD_teams_distribute_parallel_for:
6757   case OMPD_teams_distribute_parallel_for_simd:
6758   case OMPD_target_update:
6759   case OMPD_declare_simd:
6760   case OMPD_declare_target:
6761   case OMPD_end_declare_target:
6762   case OMPD_declare_reduction:
6763   case OMPD_declare_mapper:
6764   case OMPD_taskloop:
6765   case OMPD_taskloop_simd:
6766   case OMPD_requires:
6767   case OMPD_unknown:
6768     break;
6769   }
6770   llvm_unreachable("Unexpected directive kind.");
6771 }
6772 
6773 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6774                                   llvm::Value *DefaultThreadLimitVal) {
6775   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6776       CGF.getContext(), CS->getCapturedStmt());
6777   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6778     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6779       llvm::Value *NumThreads = nullptr;
6780       llvm::Value *CondVal = nullptr;
6781       // Handle if clause. If if clause present, the number of threads is
6782       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6783       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6784         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6785         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6786         const OMPIfClause *IfClause = nullptr;
6787         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6788           if (C->getNameModifier() == OMPD_unknown ||
6789               C->getNameModifier() == OMPD_parallel) {
6790             IfClause = C;
6791             break;
6792           }
6793         }
6794         if (IfClause) {
6795           const Expr *Cond = IfClause->getCondition();
6796           bool Result;
6797           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6798             if (!Result)
6799               return CGF.Builder.getInt32(1);
6800           } else {
6801             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6802             if (const auto *PreInit =
6803                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6804               for (const auto *I : PreInit->decls()) {
6805                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6806                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6807                 } else {
6808                   CodeGenFunction::AutoVarEmission Emission =
6809                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6810                   CGF.EmitAutoVarCleanups(Emission);
6811                 }
6812               }
6813             }
6814             CondVal = CGF.EvaluateExprAsBool(Cond);
6815           }
6816         }
6817       }
6818       // Check the value of num_threads clause iff if clause was not specified
6819       // or is not evaluated to false.
6820       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6821         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6822         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6823         const auto *NumThreadsClause =
6824             Dir->getSingleClause<OMPNumThreadsClause>();
6825         CodeGenFunction::LexicalScope Scope(
6826             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6827         if (const auto *PreInit =
6828                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6829           for (const auto *I : PreInit->decls()) {
6830             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6831               CGF.EmitVarDecl(cast<VarDecl>(*I));
6832             } else {
6833               CodeGenFunction::AutoVarEmission Emission =
6834                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6835               CGF.EmitAutoVarCleanups(Emission);
6836             }
6837           }
6838         }
6839         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6840         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6841                                                /*isSigned=*/false);
6842         if (DefaultThreadLimitVal)
6843           NumThreads = CGF.Builder.CreateSelect(
6844               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6845               DefaultThreadLimitVal, NumThreads);
6846       } else {
6847         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6848                                            : CGF.Builder.getInt32(0);
6849       }
6850       // Process condition of the if clause.
6851       if (CondVal) {
6852         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6853                                               CGF.Builder.getInt32(1));
6854       }
6855       return NumThreads;
6856     }
6857     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6858       return CGF.Builder.getInt32(1);
6859     return DefaultThreadLimitVal;
6860   }
6861   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6862                                : CGF.Builder.getInt32(0);
6863 }
6864 
6865 /// Emit the number of threads for a target directive.  Inspect the
6866 /// thread_limit clause associated with a teams construct combined or closely
6867 /// nested with the target directive.
6868 ///
6869 /// Emit the num_threads clause for directives such as 'target parallel' that
6870 /// have no associated teams construct.
6871 ///
6872 /// Otherwise, return nullptr.
6873 static llvm::Value *
6874 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6875                                  const OMPExecutableDirective &D) {
6876   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6877          "Clauses associated with the teams directive expected to be emitted "
6878          "only for the host!");
6879   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6880   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6881          "Expected target-based executable directive.");
6882   CGBuilderTy &Bld = CGF.Builder;
6883   llvm::Value *ThreadLimitVal = nullptr;
6884   llvm::Value *NumThreadsVal = nullptr;
6885   switch (DirectiveKind) {
6886   case OMPD_target: {
6887     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6888     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6889       return NumThreads;
6890     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6891         CGF.getContext(), CS->getCapturedStmt());
6892     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6893       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6894         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6895         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6896         const auto *ThreadLimitClause =
6897             Dir->getSingleClause<OMPThreadLimitClause>();
6898         CodeGenFunction::LexicalScope Scope(
6899             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6900         if (const auto *PreInit =
6901                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6902           for (const auto *I : PreInit->decls()) {
6903             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6904               CGF.EmitVarDecl(cast<VarDecl>(*I));
6905             } else {
6906               CodeGenFunction::AutoVarEmission Emission =
6907                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6908               CGF.EmitAutoVarCleanups(Emission);
6909             }
6910           }
6911         }
6912         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6913             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6914         ThreadLimitVal =
6915             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6916       }
6917       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6918           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6919         CS = Dir->getInnermostCapturedStmt();
6920         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6921             CGF.getContext(), CS->getCapturedStmt());
6922         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6923       }
6924       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6925           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6926         CS = Dir->getInnermostCapturedStmt();
6927         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6928           return NumThreads;
6929       }
6930       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6931         return Bld.getInt32(1);
6932     }
6933     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6934   }
6935   case OMPD_target_teams: {
6936     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6937       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6938       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6939       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6940           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6941       ThreadLimitVal =
6942           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6943     }
6944     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6945     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6946       return NumThreads;
6947     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6948         CGF.getContext(), CS->getCapturedStmt());
6949     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6950       if (Dir->getDirectiveKind() == OMPD_distribute) {
6951         CS = Dir->getInnermostCapturedStmt();
6952         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6953           return NumThreads;
6954       }
6955     }
6956     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6957   }
6958   case OMPD_target_teams_distribute:
6959     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6960       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6961       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6962       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6963           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6964       ThreadLimitVal =
6965           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6966     }
6967     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6968   case OMPD_target_parallel:
6969   case OMPD_target_parallel_for:
6970   case OMPD_target_parallel_for_simd:
6971   case OMPD_target_teams_distribute_parallel_for:
6972   case OMPD_target_teams_distribute_parallel_for_simd: {
6973     llvm::Value *CondVal = nullptr;
6974     // Handle if clause. If if clause present, the number of threads is
6975     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6976     if (D.hasClausesOfKind<OMPIfClause>()) {
6977       const OMPIfClause *IfClause = nullptr;
6978       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6979         if (C->getNameModifier() == OMPD_unknown ||
6980             C->getNameModifier() == OMPD_parallel) {
6981           IfClause = C;
6982           break;
6983         }
6984       }
6985       if (IfClause) {
6986         const Expr *Cond = IfClause->getCondition();
6987         bool Result;
6988         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6989           if (!Result)
6990             return Bld.getInt32(1);
6991         } else {
6992           CodeGenFunction::RunCleanupsScope Scope(CGF);
6993           CondVal = CGF.EvaluateExprAsBool(Cond);
6994         }
6995       }
6996     }
6997     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6998       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6999       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7000       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7001           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7002       ThreadLimitVal =
7003           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7004     }
7005     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7006       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7007       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7008       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7009           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7010       NumThreadsVal =
7011           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7012       ThreadLimitVal = ThreadLimitVal
7013                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7014                                                                 ThreadLimitVal),
7015                                               NumThreadsVal, ThreadLimitVal)
7016                            : NumThreadsVal;
7017     }
7018     if (!ThreadLimitVal)
7019       ThreadLimitVal = Bld.getInt32(0);
7020     if (CondVal)
7021       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7022     return ThreadLimitVal;
7023   }
7024   case OMPD_target_teams_distribute_simd:
7025   case OMPD_target_simd:
7026     return Bld.getInt32(1);
7027   case OMPD_parallel:
7028   case OMPD_for:
7029   case OMPD_parallel_for:
7030   case OMPD_parallel_sections:
7031   case OMPD_for_simd:
7032   case OMPD_parallel_for_simd:
7033   case OMPD_cancel:
7034   case OMPD_cancellation_point:
7035   case OMPD_ordered:
7036   case OMPD_threadprivate:
7037   case OMPD_allocate:
7038   case OMPD_task:
7039   case OMPD_simd:
7040   case OMPD_sections:
7041   case OMPD_section:
7042   case OMPD_single:
7043   case OMPD_master:
7044   case OMPD_critical:
7045   case OMPD_taskyield:
7046   case OMPD_barrier:
7047   case OMPD_taskwait:
7048   case OMPD_taskgroup:
7049   case OMPD_atomic:
7050   case OMPD_flush:
7051   case OMPD_teams:
7052   case OMPD_target_data:
7053   case OMPD_target_exit_data:
7054   case OMPD_target_enter_data:
7055   case OMPD_distribute:
7056   case OMPD_distribute_simd:
7057   case OMPD_distribute_parallel_for:
7058   case OMPD_distribute_parallel_for_simd:
7059   case OMPD_teams_distribute:
7060   case OMPD_teams_distribute_simd:
7061   case OMPD_teams_distribute_parallel_for:
7062   case OMPD_teams_distribute_parallel_for_simd:
7063   case OMPD_target_update:
7064   case OMPD_declare_simd:
7065   case OMPD_declare_target:
7066   case OMPD_end_declare_target:
7067   case OMPD_declare_reduction:
7068   case OMPD_declare_mapper:
7069   case OMPD_taskloop:
7070   case OMPD_taskloop_simd:
7071   case OMPD_requires:
7072   case OMPD_unknown:
7073     break;
7074   }
7075   llvm_unreachable("Unsupported directive kind.");
7076 }
7077 
7078 namespace {
7079 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7080 
7081 // Utility to handle information from clauses associated with a given
7082 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7083 // It provides a convenient interface to obtain the information and generate
7084 // code for that information.
7085 class MappableExprsHandler {
7086 public:
7087   /// Values for bit flags used to specify the mapping type for
7088   /// offloading.
7089   enum OpenMPOffloadMappingFlags : uint64_t {
7090     /// No flags
7091     OMP_MAP_NONE = 0x0,
7092     /// Allocate memory on the device and move data from host to device.
7093     OMP_MAP_TO = 0x01,
7094     /// Allocate memory on the device and move data from device to host.
7095     OMP_MAP_FROM = 0x02,
7096     /// Always perform the requested mapping action on the element, even
7097     /// if it was already mapped before.
7098     OMP_MAP_ALWAYS = 0x04,
7099     /// Delete the element from the device environment, ignoring the
7100     /// current reference count associated with the element.
7101     OMP_MAP_DELETE = 0x08,
7102     /// The element being mapped is a pointer-pointee pair; both the
7103     /// pointer and the pointee should be mapped.
7104     OMP_MAP_PTR_AND_OBJ = 0x10,
7105     /// This flags signals that the base address of an entry should be
7106     /// passed to the target kernel as an argument.
7107     OMP_MAP_TARGET_PARAM = 0x20,
7108     /// Signal that the runtime library has to return the device pointer
7109     /// in the current position for the data being mapped. Used when we have the
7110     /// use_device_ptr clause.
7111     OMP_MAP_RETURN_PARAM = 0x40,
7112     /// This flag signals that the reference being passed is a pointer to
7113     /// private data.
7114     OMP_MAP_PRIVATE = 0x80,
7115     /// Pass the element to the device by value.
7116     OMP_MAP_LITERAL = 0x100,
7117     /// Implicit map
7118     OMP_MAP_IMPLICIT = 0x200,
7119     /// Close is a hint to the runtime to allocate memory close to
7120     /// the target device.
7121     OMP_MAP_CLOSE = 0x400,
7122     /// The 16 MSBs of the flags indicate whether the entry is member of some
7123     /// struct/class.
7124     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7125     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7126   };
7127 
7128   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7129   static unsigned getFlagMemberOffset() {
7130     unsigned Offset = 0;
7131     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7132          Remain = Remain >> 1)
7133       Offset++;
7134     return Offset;
7135   }
7136 
7137   /// Class that associates information with a base pointer to be passed to the
7138   /// runtime library.
7139   class BasePointerInfo {
7140     /// The base pointer.
7141     llvm::Value *Ptr = nullptr;
7142     /// The base declaration that refers to this device pointer, or null if
7143     /// there is none.
7144     const ValueDecl *DevPtrDecl = nullptr;
7145 
7146   public:
7147     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7148         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7149     llvm::Value *operator*() const { return Ptr; }
7150     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7151     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7152   };
7153 
7154   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7155   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7156   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7157 
7158   /// Map between a struct and the its lowest & highest elements which have been
7159   /// mapped.
7160   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7161   ///                    HE(FieldIndex, Pointer)}
7162   struct StructRangeInfoTy {
7163     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7164         0, Address::invalid()};
7165     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7166         0, Address::invalid()};
7167     Address Base = Address::invalid();
7168   };
7169 
7170 private:
7171   /// Kind that defines how a device pointer has to be returned.
7172   struct MapInfo {
7173     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7174     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7175     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7176     bool ReturnDevicePointer = false;
7177     bool IsImplicit = false;
7178 
7179     MapInfo() = default;
7180     MapInfo(
7181         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7182         OpenMPMapClauseKind MapType,
7183         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7184         bool ReturnDevicePointer, bool IsImplicit)
7185         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7186           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7187   };
7188 
7189   /// If use_device_ptr is used on a pointer which is a struct member and there
7190   /// is no map information about it, then emission of that entry is deferred
7191   /// until the whole struct has been processed.
7192   struct DeferredDevicePtrEntryTy {
7193     const Expr *IE = nullptr;
7194     const ValueDecl *VD = nullptr;
7195 
7196     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7197         : IE(IE), VD(VD) {}
7198   };
7199 
7200   /// The target directive from where the mappable clauses were extracted. It
7201   /// is either a executable directive or a user-defined mapper directive.
7202   llvm::PointerUnion<const OMPExecutableDirective *,
7203                      const OMPDeclareMapperDecl *>
7204       CurDir;
7205 
7206   /// Function the directive is being generated for.
7207   CodeGenFunction &CGF;
7208 
7209   /// Set of all first private variables in the current directive.
7210   /// bool data is set to true if the variable is implicitly marked as
7211   /// firstprivate, false otherwise.
7212   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7213 
7214   /// Map between device pointer declarations and their expression components.
7215   /// The key value for declarations in 'this' is null.
7216   llvm::DenseMap<
7217       const ValueDecl *,
7218       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7219       DevPointersMap;
7220 
7221   llvm::Value *getExprTypeSize(const Expr *E) const {
7222     QualType ExprTy = E->getType().getCanonicalType();
7223 
7224     // Reference types are ignored for mapping purposes.
7225     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7226       ExprTy = RefTy->getPointeeType().getCanonicalType();
7227 
7228     // Given that an array section is considered a built-in type, we need to
7229     // do the calculation based on the length of the section instead of relying
7230     // on CGF.getTypeSize(E->getType()).
7231     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7232       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7233                             OAE->getBase()->IgnoreParenImpCasts())
7234                             .getCanonicalType();
7235 
7236       // If there is no length associated with the expression, that means we
7237       // are using the whole length of the base.
7238       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7239         return CGF.getTypeSize(BaseTy);
7240 
7241       llvm::Value *ElemSize;
7242       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7243         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7244       } else {
7245         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7246         assert(ATy && "Expecting array type if not a pointer type.");
7247         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7248       }
7249 
7250       // If we don't have a length at this point, that is because we have an
7251       // array section with a single element.
7252       if (!OAE->getLength())
7253         return ElemSize;
7254 
7255       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7256       LengthVal =
7257           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7258       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7259     }
7260     return CGF.getTypeSize(ExprTy);
7261   }
7262 
7263   /// Return the corresponding bits for a given map clause modifier. Add
7264   /// a flag marking the map as a pointer if requested. Add a flag marking the
7265   /// map as the first one of a series of maps that relate to the same map
7266   /// expression.
7267   OpenMPOffloadMappingFlags getMapTypeBits(
7268       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7269       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7270     OpenMPOffloadMappingFlags Bits =
7271         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7272     switch (MapType) {
7273     case OMPC_MAP_alloc:
7274     case OMPC_MAP_release:
7275       // alloc and release is the default behavior in the runtime library,  i.e.
7276       // if we don't pass any bits alloc/release that is what the runtime is
7277       // going to do. Therefore, we don't need to signal anything for these two
7278       // type modifiers.
7279       break;
7280     case OMPC_MAP_to:
7281       Bits |= OMP_MAP_TO;
7282       break;
7283     case OMPC_MAP_from:
7284       Bits |= OMP_MAP_FROM;
7285       break;
7286     case OMPC_MAP_tofrom:
7287       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7288       break;
7289     case OMPC_MAP_delete:
7290       Bits |= OMP_MAP_DELETE;
7291       break;
7292     case OMPC_MAP_unknown:
7293       llvm_unreachable("Unexpected map type!");
7294     }
7295     if (AddPtrFlag)
7296       Bits |= OMP_MAP_PTR_AND_OBJ;
7297     if (AddIsTargetParamFlag)
7298       Bits |= OMP_MAP_TARGET_PARAM;
7299     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7300         != MapModifiers.end())
7301       Bits |= OMP_MAP_ALWAYS;
7302     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7303         != MapModifiers.end())
7304       Bits |= OMP_MAP_CLOSE;
7305     return Bits;
7306   }
7307 
7308   /// Return true if the provided expression is a final array section. A
7309   /// final array section, is one whose length can't be proved to be one.
7310   bool isFinalArraySectionExpression(const Expr *E) const {
7311     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7312 
7313     // It is not an array section and therefore not a unity-size one.
7314     if (!OASE)
7315       return false;
7316 
7317     // An array section with no colon always refer to a single element.
7318     if (OASE->getColonLoc().isInvalid())
7319       return false;
7320 
7321     const Expr *Length = OASE->getLength();
7322 
7323     // If we don't have a length we have to check if the array has size 1
7324     // for this dimension. Also, we should always expect a length if the
7325     // base type is pointer.
7326     if (!Length) {
7327       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7328                              OASE->getBase()->IgnoreParenImpCasts())
7329                              .getCanonicalType();
7330       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7331         return ATy->getSize().getSExtValue() != 1;
7332       // If we don't have a constant dimension length, we have to consider
7333       // the current section as having any size, so it is not necessarily
7334       // unitary. If it happen to be unity size, that's user fault.
7335       return true;
7336     }
7337 
7338     // Check if the length evaluates to 1.
7339     Expr::EvalResult Result;
7340     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7341       return true; // Can have more that size 1.
7342 
7343     llvm::APSInt ConstLength = Result.Val.getInt();
7344     return ConstLength.getSExtValue() != 1;
7345   }
7346 
7347   /// Generate the base pointers, section pointers, sizes and map type
7348   /// bits for the provided map type, map modifier, and expression components.
7349   /// \a IsFirstComponent should be set to true if the provided set of
7350   /// components is the first associated with a capture.
7351   void generateInfoForComponentList(
7352       OpenMPMapClauseKind MapType,
7353       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7354       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7355       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7356       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7357       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7358       bool IsImplicit,
7359       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7360           OverlappedElements = llvm::None) const {
7361     // The following summarizes what has to be generated for each map and the
7362     // types below. The generated information is expressed in this order:
7363     // base pointer, section pointer, size, flags
7364     // (to add to the ones that come from the map type and modifier).
7365     //
7366     // double d;
7367     // int i[100];
7368     // float *p;
7369     //
7370     // struct S1 {
7371     //   int i;
7372     //   float f[50];
7373     // }
7374     // struct S2 {
7375     //   int i;
7376     //   float f[50];
7377     //   S1 s;
7378     //   double *p;
7379     //   struct S2 *ps;
7380     // }
7381     // S2 s;
7382     // S2 *ps;
7383     //
7384     // map(d)
7385     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7386     //
7387     // map(i)
7388     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7389     //
7390     // map(i[1:23])
7391     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7392     //
7393     // map(p)
7394     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7395     //
7396     // map(p[1:24])
7397     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7398     //
7399     // map(s)
7400     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7401     //
7402     // map(s.i)
7403     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7404     //
7405     // map(s.s.f)
7406     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7407     //
7408     // map(s.p)
7409     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7410     //
7411     // map(to: s.p[:22])
7412     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7413     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7414     // &(s.p), &(s.p[0]), 22*sizeof(double),
7415     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7416     // (*) alloc space for struct members, only this is a target parameter
7417     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7418     //      optimizes this entry out, same in the examples below)
7419     // (***) map the pointee (map: to)
7420     //
7421     // map(s.ps)
7422     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7423     //
7424     // map(from: s.ps->s.i)
7425     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7426     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7427     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7428     //
7429     // map(to: s.ps->ps)
7430     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7431     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7432     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7433     //
7434     // map(s.ps->ps->ps)
7435     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7436     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7437     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7438     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7439     //
7440     // map(to: s.ps->ps->s.f[:22])
7441     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7442     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7443     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7444     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7445     //
7446     // map(ps)
7447     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7448     //
7449     // map(ps->i)
7450     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7451     //
7452     // map(ps->s.f)
7453     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7454     //
7455     // map(from: ps->p)
7456     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7457     //
7458     // map(to: ps->p[:22])
7459     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7460     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7461     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7462     //
7463     // map(ps->ps)
7464     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7465     //
7466     // map(from: ps->ps->s.i)
7467     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7468     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7469     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7470     //
7471     // map(from: ps->ps->ps)
7472     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7473     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7474     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7475     //
7476     // map(ps->ps->ps->ps)
7477     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7478     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7479     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7480     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7481     //
7482     // map(to: ps->ps->ps->s.f[:22])
7483     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7484     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7485     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7486     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7487     //
7488     // map(to: s.f[:22]) map(from: s.p[:33])
7489     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7490     //     sizeof(double*) (**), TARGET_PARAM
7491     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7492     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7493     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7494     // (*) allocate contiguous space needed to fit all mapped members even if
7495     //     we allocate space for members not mapped (in this example,
7496     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7497     //     them as well because they fall between &s.f[0] and &s.p)
7498     //
7499     // map(from: s.f[:22]) map(to: ps->p[:33])
7500     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7501     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7502     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7503     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7504     // (*) the struct this entry pertains to is the 2nd element in the list of
7505     //     arguments, hence MEMBER_OF(2)
7506     //
7507     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7508     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7509     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7510     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7511     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7512     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7513     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7514     // (*) the struct this entry pertains to is the 4th element in the list
7515     //     of arguments, hence MEMBER_OF(4)
7516 
7517     // Track if the map information being generated is the first for a capture.
7518     bool IsCaptureFirstInfo = IsFirstComponentList;
7519     // When the variable is on a declare target link or in a to clause with
7520     // unified memory, a reference is needed to hold the host/device address
7521     // of the variable.
7522     bool RequiresReference = false;
7523 
7524     // Scan the components from the base to the complete expression.
7525     auto CI = Components.rbegin();
7526     auto CE = Components.rend();
7527     auto I = CI;
7528 
7529     // Track if the map information being generated is the first for a list of
7530     // components.
7531     bool IsExpressionFirstInfo = true;
7532     Address BP = Address::invalid();
7533     const Expr *AssocExpr = I->getAssociatedExpression();
7534     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7535     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7536 
7537     if (isa<MemberExpr>(AssocExpr)) {
7538       // The base is the 'this' pointer. The content of the pointer is going
7539       // to be the base of the field being mapped.
7540       BP = CGF.LoadCXXThisAddress();
7541     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7542                (OASE &&
7543                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7544       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7545     } else {
7546       // The base is the reference to the variable.
7547       // BP = &Var.
7548       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7549       if (const auto *VD =
7550               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7551         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7552                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7553           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7554               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7555                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7556             RequiresReference = true;
7557             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7558           }
7559         }
7560       }
7561 
7562       // If the variable is a pointer and is being dereferenced (i.e. is not
7563       // the last component), the base has to be the pointer itself, not its
7564       // reference. References are ignored for mapping purposes.
7565       QualType Ty =
7566           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7567       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7568         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7569 
7570         // We do not need to generate individual map information for the
7571         // pointer, it can be associated with the combined storage.
7572         ++I;
7573       }
7574     }
7575 
7576     // Track whether a component of the list should be marked as MEMBER_OF some
7577     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7578     // in a component list should be marked as MEMBER_OF, all subsequent entries
7579     // do not belong to the base struct. E.g.
7580     // struct S2 s;
7581     // s.ps->ps->ps->f[:]
7582     //   (1) (2) (3) (4)
7583     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7584     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7585     // is the pointee of ps(2) which is not member of struct s, so it should not
7586     // be marked as such (it is still PTR_AND_OBJ).
7587     // The variable is initialized to false so that PTR_AND_OBJ entries which
7588     // are not struct members are not considered (e.g. array of pointers to
7589     // data).
7590     bool ShouldBeMemberOf = false;
7591 
7592     // Variable keeping track of whether or not we have encountered a component
7593     // in the component list which is a member expression. Useful when we have a
7594     // pointer or a final array section, in which case it is the previous
7595     // component in the list which tells us whether we have a member expression.
7596     // E.g. X.f[:]
7597     // While processing the final array section "[:]" it is "f" which tells us
7598     // whether we are dealing with a member of a declared struct.
7599     const MemberExpr *EncounteredME = nullptr;
7600 
7601     for (; I != CE; ++I) {
7602       // If the current component is member of a struct (parent struct) mark it.
7603       if (!EncounteredME) {
7604         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7605         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7606         // as MEMBER_OF the parent struct.
7607         if (EncounteredME)
7608           ShouldBeMemberOf = true;
7609       }
7610 
7611       auto Next = std::next(I);
7612 
7613       // We need to generate the addresses and sizes if this is the last
7614       // component, if the component is a pointer or if it is an array section
7615       // whose length can't be proved to be one. If this is a pointer, it
7616       // becomes the base address for the following components.
7617 
7618       // A final array section, is one whose length can't be proved to be one.
7619       bool IsFinalArraySection =
7620           isFinalArraySectionExpression(I->getAssociatedExpression());
7621 
7622       // Get information on whether the element is a pointer. Have to do a
7623       // special treatment for array sections given that they are built-in
7624       // types.
7625       const auto *OASE =
7626           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7627       bool IsPointer =
7628           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7629                        .getCanonicalType()
7630                        ->isAnyPointerType()) ||
7631           I->getAssociatedExpression()->getType()->isAnyPointerType();
7632 
7633       if (Next == CE || IsPointer || IsFinalArraySection) {
7634         // If this is not the last component, we expect the pointer to be
7635         // associated with an array expression or member expression.
7636         assert((Next == CE ||
7637                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7638                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7639                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7640                "Unexpected expression");
7641 
7642         Address LB =
7643             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7644 
7645         // If this component is a pointer inside the base struct then we don't
7646         // need to create any entry for it - it will be combined with the object
7647         // it is pointing to into a single PTR_AND_OBJ entry.
7648         bool IsMemberPointer =
7649             IsPointer && EncounteredME &&
7650             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7651              EncounteredME);
7652         if (!OverlappedElements.empty()) {
7653           // Handle base element with the info for overlapped elements.
7654           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7655           assert(Next == CE &&
7656                  "Expected last element for the overlapped elements.");
7657           assert(!IsPointer &&
7658                  "Unexpected base element with the pointer type.");
7659           // Mark the whole struct as the struct that requires allocation on the
7660           // device.
7661           PartialStruct.LowestElem = {0, LB};
7662           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7663               I->getAssociatedExpression()->getType());
7664           Address HB = CGF.Builder.CreateConstGEP(
7665               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7666                                                               CGF.VoidPtrTy),
7667               TypeSize.getQuantity() - 1);
7668           PartialStruct.HighestElem = {
7669               std::numeric_limits<decltype(
7670                   PartialStruct.HighestElem.first)>::max(),
7671               HB};
7672           PartialStruct.Base = BP;
7673           // Emit data for non-overlapped data.
7674           OpenMPOffloadMappingFlags Flags =
7675               OMP_MAP_MEMBER_OF |
7676               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7677                              /*AddPtrFlag=*/false,
7678                              /*AddIsTargetParamFlag=*/false);
7679           LB = BP;
7680           llvm::Value *Size = nullptr;
7681           // Do bitcopy of all non-overlapped structure elements.
7682           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7683                    Component : OverlappedElements) {
7684             Address ComponentLB = Address::invalid();
7685             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7686                  Component) {
7687               if (MC.getAssociatedDeclaration()) {
7688                 ComponentLB =
7689                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7690                         .getAddress();
7691                 Size = CGF.Builder.CreatePtrDiff(
7692                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7693                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7694                 break;
7695               }
7696             }
7697             BasePointers.push_back(BP.getPointer());
7698             Pointers.push_back(LB.getPointer());
7699             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7700                                                       /*isSigned=*/true));
7701             Types.push_back(Flags);
7702             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7703           }
7704           BasePointers.push_back(BP.getPointer());
7705           Pointers.push_back(LB.getPointer());
7706           Size = CGF.Builder.CreatePtrDiff(
7707               CGF.EmitCastToVoidPtr(
7708                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7709               CGF.EmitCastToVoidPtr(LB.getPointer()));
7710           Sizes.push_back(
7711               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7712           Types.push_back(Flags);
7713           break;
7714         }
7715         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7716         if (!IsMemberPointer) {
7717           BasePointers.push_back(BP.getPointer());
7718           Pointers.push_back(LB.getPointer());
7719           Sizes.push_back(
7720               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7721 
7722           // We need to add a pointer flag for each map that comes from the
7723           // same expression except for the first one. We also need to signal
7724           // this map is the first one that relates with the current capture
7725           // (there is a set of entries for each capture).
7726           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7727               MapType, MapModifiers, IsImplicit,
7728               !IsExpressionFirstInfo || RequiresReference,
7729               IsCaptureFirstInfo && !RequiresReference);
7730 
7731           if (!IsExpressionFirstInfo) {
7732             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7733             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7734             if (IsPointer)
7735               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7736                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7737 
7738             if (ShouldBeMemberOf) {
7739               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7740               // should be later updated with the correct value of MEMBER_OF.
7741               Flags |= OMP_MAP_MEMBER_OF;
7742               // From now on, all subsequent PTR_AND_OBJ entries should not be
7743               // marked as MEMBER_OF.
7744               ShouldBeMemberOf = false;
7745             }
7746           }
7747 
7748           Types.push_back(Flags);
7749         }
7750 
7751         // If we have encountered a member expression so far, keep track of the
7752         // mapped member. If the parent is "*this", then the value declaration
7753         // is nullptr.
7754         if (EncounteredME) {
7755           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7756           unsigned FieldIndex = FD->getFieldIndex();
7757 
7758           // Update info about the lowest and highest elements for this struct
7759           if (!PartialStruct.Base.isValid()) {
7760             PartialStruct.LowestElem = {FieldIndex, LB};
7761             PartialStruct.HighestElem = {FieldIndex, LB};
7762             PartialStruct.Base = BP;
7763           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7764             PartialStruct.LowestElem = {FieldIndex, LB};
7765           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7766             PartialStruct.HighestElem = {FieldIndex, LB};
7767           }
7768         }
7769 
7770         // If we have a final array section, we are done with this expression.
7771         if (IsFinalArraySection)
7772           break;
7773 
7774         // The pointer becomes the base for the next element.
7775         if (Next != CE)
7776           BP = LB;
7777 
7778         IsExpressionFirstInfo = false;
7779         IsCaptureFirstInfo = false;
7780       }
7781     }
7782   }
7783 
7784   /// Return the adjusted map modifiers if the declaration a capture refers to
7785   /// appears in a first-private clause. This is expected to be used only with
7786   /// directives that start with 'target'.
7787   MappableExprsHandler::OpenMPOffloadMappingFlags
7788   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7789     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7790 
7791     // A first private variable captured by reference will use only the
7792     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7793     // declaration is known as first-private in this handler.
7794     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7795       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7796           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7797         return MappableExprsHandler::OMP_MAP_ALWAYS |
7798                MappableExprsHandler::OMP_MAP_TO;
7799       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7800         return MappableExprsHandler::OMP_MAP_TO |
7801                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7802       return MappableExprsHandler::OMP_MAP_PRIVATE |
7803              MappableExprsHandler::OMP_MAP_TO;
7804     }
7805     return MappableExprsHandler::OMP_MAP_TO |
7806            MappableExprsHandler::OMP_MAP_FROM;
7807   }
7808 
7809   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7810     // Rotate by getFlagMemberOffset() bits.
7811     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7812                                                   << getFlagMemberOffset());
7813   }
7814 
7815   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7816                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7817     // If the entry is PTR_AND_OBJ but has not been marked with the special
7818     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7819     // marked as MEMBER_OF.
7820     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7821         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7822       return;
7823 
7824     // Reset the placeholder value to prepare the flag for the assignment of the
7825     // proper MEMBER_OF value.
7826     Flags &= ~OMP_MAP_MEMBER_OF;
7827     Flags |= MemberOfFlag;
7828   }
7829 
7830   void getPlainLayout(const CXXRecordDecl *RD,
7831                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7832                       bool AsBase) const {
7833     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7834 
7835     llvm::StructType *St =
7836         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7837 
7838     unsigned NumElements = St->getNumElements();
7839     llvm::SmallVector<
7840         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7841         RecordLayout(NumElements);
7842 
7843     // Fill bases.
7844     for (const auto &I : RD->bases()) {
7845       if (I.isVirtual())
7846         continue;
7847       const auto *Base = I.getType()->getAsCXXRecordDecl();
7848       // Ignore empty bases.
7849       if (Base->isEmpty() || CGF.getContext()
7850                                  .getASTRecordLayout(Base)
7851                                  .getNonVirtualSize()
7852                                  .isZero())
7853         continue;
7854 
7855       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7856       RecordLayout[FieldIndex] = Base;
7857     }
7858     // Fill in virtual bases.
7859     for (const auto &I : RD->vbases()) {
7860       const auto *Base = I.getType()->getAsCXXRecordDecl();
7861       // Ignore empty bases.
7862       if (Base->isEmpty())
7863         continue;
7864       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7865       if (RecordLayout[FieldIndex])
7866         continue;
7867       RecordLayout[FieldIndex] = Base;
7868     }
7869     // Fill in all the fields.
7870     assert(!RD->isUnion() && "Unexpected union.");
7871     for (const auto *Field : RD->fields()) {
7872       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7873       // will fill in later.)
7874       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7875         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7876         RecordLayout[FieldIndex] = Field;
7877       }
7878     }
7879     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7880              &Data : RecordLayout) {
7881       if (Data.isNull())
7882         continue;
7883       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7884         getPlainLayout(Base, Layout, /*AsBase=*/true);
7885       else
7886         Layout.push_back(Data.get<const FieldDecl *>());
7887     }
7888   }
7889 
7890 public:
7891   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7892       : CurDir(&Dir), CGF(CGF) {
7893     // Extract firstprivate clause information.
7894     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7895       for (const auto *D : C->varlists())
7896         FirstPrivateDecls.try_emplace(
7897             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7898     // Extract device pointer clause information.
7899     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7900       for (auto L : C->component_lists())
7901         DevPointersMap[L.first].push_back(L.second);
7902   }
7903 
7904   /// Constructor for the declare mapper directive.
7905   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7906       : CurDir(&Dir), CGF(CGF) {}
7907 
7908   /// Generate code for the combined entry if we have a partially mapped struct
7909   /// and take care of the mapping flags of the arguments corresponding to
7910   /// individual struct members.
7911   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7912                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7913                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7914                          const StructRangeInfoTy &PartialStruct) const {
7915     // Base is the base of the struct
7916     BasePointers.push_back(PartialStruct.Base.getPointer());
7917     // Pointer is the address of the lowest element
7918     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7919     Pointers.push_back(LB);
7920     // Size is (addr of {highest+1} element) - (addr of lowest element)
7921     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7922     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7923     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7924     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7925     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7926     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7927                                                   /*isSigned=*/false);
7928     Sizes.push_back(Size);
7929     // Map type is always TARGET_PARAM
7930     Types.push_back(OMP_MAP_TARGET_PARAM);
7931     // Remove TARGET_PARAM flag from the first element
7932     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7933 
7934     // All other current entries will be MEMBER_OF the combined entry
7935     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7936     // 0xFFFF in the MEMBER_OF field).
7937     OpenMPOffloadMappingFlags MemberOfFlag =
7938         getMemberOfFlag(BasePointers.size() - 1);
7939     for (auto &M : CurTypes)
7940       setCorrectMemberOfFlag(M, MemberOfFlag);
7941   }
7942 
7943   /// Generate all the base pointers, section pointers, sizes and map
7944   /// types for the extracted mappable expressions. Also, for each item that
7945   /// relates with a device pointer, a pair of the relevant declaration and
7946   /// index where it occurs is appended to the device pointers info array.
7947   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7948                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7949                        MapFlagsArrayTy &Types) const {
7950     // We have to process the component lists that relate with the same
7951     // declaration in a single chunk so that we can generate the map flags
7952     // correctly. Therefore, we organize all lists in a map.
7953     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7954 
7955     // Helper function to fill the information map for the different supported
7956     // clauses.
7957     auto &&InfoGen = [&Info](
7958         const ValueDecl *D,
7959         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7960         OpenMPMapClauseKind MapType,
7961         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7962         bool ReturnDevicePointer, bool IsImplicit) {
7963       const ValueDecl *VD =
7964           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7965       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7966                             IsImplicit);
7967     };
7968 
7969     assert(CurDir.is<const OMPExecutableDirective *>() &&
7970            "Expect a executable directive");
7971     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7972     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7973       for (const auto &L : C->component_lists()) {
7974         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7975             /*ReturnDevicePointer=*/false, C->isImplicit());
7976       }
7977     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7978       for (const auto &L : C->component_lists()) {
7979         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7980             /*ReturnDevicePointer=*/false, C->isImplicit());
7981       }
7982     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7983       for (const auto &L : C->component_lists()) {
7984         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7985             /*ReturnDevicePointer=*/false, C->isImplicit());
7986       }
7987 
7988     // Look at the use_device_ptr clause information and mark the existing map
7989     // entries as such. If there is no map information for an entry in the
7990     // use_device_ptr list, we create one with map type 'alloc' and zero size
7991     // section. It is the user fault if that was not mapped before. If there is
7992     // no map information and the pointer is a struct member, then we defer the
7993     // emission of that entry until the whole struct has been processed.
7994     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7995         DeferredInfo;
7996 
7997     for (const auto *C :
7998          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7999       for (const auto &L : C->component_lists()) {
8000         assert(!L.second.empty() && "Not expecting empty list of components!");
8001         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8002         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8003         const Expr *IE = L.second.back().getAssociatedExpression();
8004         // If the first component is a member expression, we have to look into
8005         // 'this', which maps to null in the map of map information. Otherwise
8006         // look directly for the information.
8007         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8008 
8009         // We potentially have map information for this declaration already.
8010         // Look for the first set of components that refer to it.
8011         if (It != Info.end()) {
8012           auto CI = std::find_if(
8013               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8014                 return MI.Components.back().getAssociatedDeclaration() == VD;
8015               });
8016           // If we found a map entry, signal that the pointer has to be returned
8017           // and move on to the next declaration.
8018           if (CI != It->second.end()) {
8019             CI->ReturnDevicePointer = true;
8020             continue;
8021           }
8022         }
8023 
8024         // We didn't find any match in our map information - generate a zero
8025         // size array section - if the pointer is a struct member we defer this
8026         // action until the whole struct has been processed.
8027         if (isa<MemberExpr>(IE)) {
8028           // Insert the pointer into Info to be processed by
8029           // generateInfoForComponentList. Because it is a member pointer
8030           // without a pointee, no entry will be generated for it, therefore
8031           // we need to generate one after the whole struct has been processed.
8032           // Nonetheless, generateInfoForComponentList must be called to take
8033           // the pointer into account for the calculation of the range of the
8034           // partial struct.
8035           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8036                   /*ReturnDevicePointer=*/false, C->isImplicit());
8037           DeferredInfo[nullptr].emplace_back(IE, VD);
8038         } else {
8039           llvm::Value *Ptr =
8040               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8041           BasePointers.emplace_back(Ptr, VD);
8042           Pointers.push_back(Ptr);
8043           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8044           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8045         }
8046       }
8047     }
8048 
8049     for (const auto &M : Info) {
8050       // We need to know when we generate information for the first component
8051       // associated with a capture, because the mapping flags depend on it.
8052       bool IsFirstComponentList = true;
8053 
8054       // Temporary versions of arrays
8055       MapBaseValuesArrayTy CurBasePointers;
8056       MapValuesArrayTy CurPointers;
8057       MapValuesArrayTy CurSizes;
8058       MapFlagsArrayTy CurTypes;
8059       StructRangeInfoTy PartialStruct;
8060 
8061       for (const MapInfo &L : M.second) {
8062         assert(!L.Components.empty() &&
8063                "Not expecting declaration with no component lists.");
8064 
8065         // Remember the current base pointer index.
8066         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8067         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8068                                      CurBasePointers, CurPointers, CurSizes,
8069                                      CurTypes, PartialStruct,
8070                                      IsFirstComponentList, L.IsImplicit);
8071 
8072         // If this entry relates with a device pointer, set the relevant
8073         // declaration and add the 'return pointer' flag.
8074         if (L.ReturnDevicePointer) {
8075           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8076                  "Unexpected number of mapped base pointers.");
8077 
8078           const ValueDecl *RelevantVD =
8079               L.Components.back().getAssociatedDeclaration();
8080           assert(RelevantVD &&
8081                  "No relevant declaration related with device pointer??");
8082 
8083           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8084           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8085         }
8086         IsFirstComponentList = false;
8087       }
8088 
8089       // Append any pending zero-length pointers which are struct members and
8090       // used with use_device_ptr.
8091       auto CI = DeferredInfo.find(M.first);
8092       if (CI != DeferredInfo.end()) {
8093         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8094           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8095           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8096               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8097           CurBasePointers.emplace_back(BasePtr, L.VD);
8098           CurPointers.push_back(Ptr);
8099           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8100           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8101           // value MEMBER_OF=FFFF so that the entry is later updated with the
8102           // correct value of MEMBER_OF.
8103           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8104                              OMP_MAP_MEMBER_OF);
8105         }
8106       }
8107 
8108       // If there is an entry in PartialStruct it means we have a struct with
8109       // individual members mapped. Emit an extra combined entry.
8110       if (PartialStruct.Base.isValid())
8111         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8112                           PartialStruct);
8113 
8114       // We need to append the results of this capture to what we already have.
8115       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8116       Pointers.append(CurPointers.begin(), CurPointers.end());
8117       Sizes.append(CurSizes.begin(), CurSizes.end());
8118       Types.append(CurTypes.begin(), CurTypes.end());
8119     }
8120   }
8121 
8122   /// Generate all the base pointers, section pointers, sizes and map types for
8123   /// the extracted map clauses of user-defined mapper.
8124   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8125                                 MapValuesArrayTy &Pointers,
8126                                 MapValuesArrayTy &Sizes,
8127                                 MapFlagsArrayTy &Types) const {
8128     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8129            "Expect a declare mapper directive");
8130     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8131     // We have to process the component lists that relate with the same
8132     // declaration in a single chunk so that we can generate the map flags
8133     // correctly. Therefore, we organize all lists in a map.
8134     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8135 
8136     // Helper function to fill the information map for the different supported
8137     // clauses.
8138     auto &&InfoGen = [&Info](
8139         const ValueDecl *D,
8140         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8141         OpenMPMapClauseKind MapType,
8142         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8143         bool ReturnDevicePointer, bool IsImplicit) {
8144       const ValueDecl *VD =
8145           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8146       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8147                             IsImplicit);
8148     };
8149 
8150     for (const auto *C : CurMapperDir->clauselists()) {
8151       const auto *MC = cast<OMPMapClause>(C);
8152       for (const auto &L : MC->component_lists()) {
8153         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8154                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8155       }
8156     }
8157 
8158     for (const auto &M : Info) {
8159       // We need to know when we generate information for the first component
8160       // associated with a capture, because the mapping flags depend on it.
8161       bool IsFirstComponentList = true;
8162 
8163       // Temporary versions of arrays
8164       MapBaseValuesArrayTy CurBasePointers;
8165       MapValuesArrayTy CurPointers;
8166       MapValuesArrayTy CurSizes;
8167       MapFlagsArrayTy CurTypes;
8168       StructRangeInfoTy PartialStruct;
8169 
8170       for (const MapInfo &L : M.second) {
8171         assert(!L.Components.empty() &&
8172                "Not expecting declaration with no component lists.");
8173         generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8174                                      CurBasePointers, CurPointers, CurSizes,
8175                                      CurTypes, PartialStruct,
8176                                      IsFirstComponentList, L.IsImplicit);
8177         IsFirstComponentList = false;
8178       }
8179 
8180       // If there is an entry in PartialStruct it means we have a struct with
8181       // individual members mapped. Emit an extra combined entry.
8182       if (PartialStruct.Base.isValid())
8183         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8184                           PartialStruct);
8185 
8186       // We need to append the results of this capture to what we already have.
8187       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8188       Pointers.append(CurPointers.begin(), CurPointers.end());
8189       Sizes.append(CurSizes.begin(), CurSizes.end());
8190       Types.append(CurTypes.begin(), CurTypes.end());
8191     }
8192   }
8193 
8194   /// Emit capture info for lambdas for variables captured by reference.
8195   void generateInfoForLambdaCaptures(
8196       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8197       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8198       MapFlagsArrayTy &Types,
8199       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8200     const auto *RD = VD->getType()
8201                          .getCanonicalType()
8202                          .getNonReferenceType()
8203                          ->getAsCXXRecordDecl();
8204     if (!RD || !RD->isLambda())
8205       return;
8206     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8207     LValue VDLVal = CGF.MakeAddrLValue(
8208         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8209     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8210     FieldDecl *ThisCapture = nullptr;
8211     RD->getCaptureFields(Captures, ThisCapture);
8212     if (ThisCapture) {
8213       LValue ThisLVal =
8214           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8215       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8216       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8217       BasePointers.push_back(ThisLVal.getPointer());
8218       Pointers.push_back(ThisLValVal.getPointer());
8219       Sizes.push_back(
8220           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8221                                     CGF.Int64Ty, /*isSigned=*/true));
8222       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8223                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8224     }
8225     for (const LambdaCapture &LC : RD->captures()) {
8226       if (!LC.capturesVariable())
8227         continue;
8228       const VarDecl *VD = LC.getCapturedVar();
8229       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8230         continue;
8231       auto It = Captures.find(VD);
8232       assert(It != Captures.end() && "Found lambda capture without field.");
8233       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8234       if (LC.getCaptureKind() == LCK_ByRef) {
8235         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8236         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8237         BasePointers.push_back(VarLVal.getPointer());
8238         Pointers.push_back(VarLValVal.getPointer());
8239         Sizes.push_back(CGF.Builder.CreateIntCast(
8240             CGF.getTypeSize(
8241                 VD->getType().getCanonicalType().getNonReferenceType()),
8242             CGF.Int64Ty, /*isSigned=*/true));
8243       } else {
8244         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8245         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8246         BasePointers.push_back(VarLVal.getPointer());
8247         Pointers.push_back(VarRVal.getScalarVal());
8248         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8249       }
8250       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8251                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8252     }
8253   }
8254 
8255   /// Set correct indices for lambdas captures.
8256   void adjustMemberOfForLambdaCaptures(
8257       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8258       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8259       MapFlagsArrayTy &Types) const {
8260     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8261       // Set correct member_of idx for all implicit lambda captures.
8262       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8263                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8264         continue;
8265       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8266       assert(BasePtr && "Unable to find base lambda address.");
8267       int TgtIdx = -1;
8268       for (unsigned J = I; J > 0; --J) {
8269         unsigned Idx = J - 1;
8270         if (Pointers[Idx] != BasePtr)
8271           continue;
8272         TgtIdx = Idx;
8273         break;
8274       }
8275       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8276       // All other current entries will be MEMBER_OF the combined entry
8277       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8278       // 0xFFFF in the MEMBER_OF field).
8279       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8280       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8281     }
8282   }
8283 
8284   /// Generate the base pointers, section pointers, sizes and map types
8285   /// associated to a given capture.
8286   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8287                               llvm::Value *Arg,
8288                               MapBaseValuesArrayTy &BasePointers,
8289                               MapValuesArrayTy &Pointers,
8290                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8291                               StructRangeInfoTy &PartialStruct) const {
8292     assert(!Cap->capturesVariableArrayType() &&
8293            "Not expecting to generate map info for a variable array type!");
8294 
8295     // We need to know when we generating information for the first component
8296     const ValueDecl *VD = Cap->capturesThis()
8297                               ? nullptr
8298                               : Cap->getCapturedVar()->getCanonicalDecl();
8299 
8300     // If this declaration appears in a is_device_ptr clause we just have to
8301     // pass the pointer by value. If it is a reference to a declaration, we just
8302     // pass its value.
8303     if (DevPointersMap.count(VD)) {
8304       BasePointers.emplace_back(Arg, VD);
8305       Pointers.push_back(Arg);
8306       Sizes.push_back(
8307           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8308                                     CGF.Int64Ty, /*isSigned=*/true));
8309       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8310       return;
8311     }
8312 
8313     using MapData =
8314         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8315                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8316     SmallVector<MapData, 4> DeclComponentLists;
8317     assert(CurDir.is<const OMPExecutableDirective *>() &&
8318            "Expect a executable directive");
8319     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8320     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8321       for (const auto &L : C->decl_component_lists(VD)) {
8322         assert(L.first == VD &&
8323                "We got information for the wrong declaration??");
8324         assert(!L.second.empty() &&
8325                "Not expecting declaration with no component lists.");
8326         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8327                                         C->getMapTypeModifiers(),
8328                                         C->isImplicit());
8329       }
8330     }
8331 
8332     // Find overlapping elements (including the offset from the base element).
8333     llvm::SmallDenseMap<
8334         const MapData *,
8335         llvm::SmallVector<
8336             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8337         4>
8338         OverlappedData;
8339     size_t Count = 0;
8340     for (const MapData &L : DeclComponentLists) {
8341       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8342       OpenMPMapClauseKind MapType;
8343       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8344       bool IsImplicit;
8345       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8346       ++Count;
8347       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8348         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8349         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8350         auto CI = Components.rbegin();
8351         auto CE = Components.rend();
8352         auto SI = Components1.rbegin();
8353         auto SE = Components1.rend();
8354         for (; CI != CE && SI != SE; ++CI, ++SI) {
8355           if (CI->getAssociatedExpression()->getStmtClass() !=
8356               SI->getAssociatedExpression()->getStmtClass())
8357             break;
8358           // Are we dealing with different variables/fields?
8359           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8360             break;
8361         }
8362         // Found overlapping if, at least for one component, reached the head of
8363         // the components list.
8364         if (CI == CE || SI == SE) {
8365           assert((CI != CE || SI != SE) &&
8366                  "Unexpected full match of the mapping components.");
8367           const MapData &BaseData = CI == CE ? L : L1;
8368           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8369               SI == SE ? Components : Components1;
8370           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8371           OverlappedElements.getSecond().push_back(SubData);
8372         }
8373       }
8374     }
8375     // Sort the overlapped elements for each item.
8376     llvm::SmallVector<const FieldDecl *, 4> Layout;
8377     if (!OverlappedData.empty()) {
8378       if (const auto *CRD =
8379               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8380         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8381       else {
8382         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8383         Layout.append(RD->field_begin(), RD->field_end());
8384       }
8385     }
8386     for (auto &Pair : OverlappedData) {
8387       llvm::sort(
8388           Pair.getSecond(),
8389           [&Layout](
8390               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8391               OMPClauseMappableExprCommon::MappableExprComponentListRef
8392                   Second) {
8393             auto CI = First.rbegin();
8394             auto CE = First.rend();
8395             auto SI = Second.rbegin();
8396             auto SE = Second.rend();
8397             for (; CI != CE && SI != SE; ++CI, ++SI) {
8398               if (CI->getAssociatedExpression()->getStmtClass() !=
8399                   SI->getAssociatedExpression()->getStmtClass())
8400                 break;
8401               // Are we dealing with different variables/fields?
8402               if (CI->getAssociatedDeclaration() !=
8403                   SI->getAssociatedDeclaration())
8404                 break;
8405             }
8406 
8407             // Lists contain the same elements.
8408             if (CI == CE && SI == SE)
8409               return false;
8410 
8411             // List with less elements is less than list with more elements.
8412             if (CI == CE || SI == SE)
8413               return CI == CE;
8414 
8415             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8416             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8417             if (FD1->getParent() == FD2->getParent())
8418               return FD1->getFieldIndex() < FD2->getFieldIndex();
8419             const auto It =
8420                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8421                   return FD == FD1 || FD == FD2;
8422                 });
8423             return *It == FD1;
8424           });
8425     }
8426 
8427     // Associated with a capture, because the mapping flags depend on it.
8428     // Go through all of the elements with the overlapped elements.
8429     for (const auto &Pair : OverlappedData) {
8430       const MapData &L = *Pair.getFirst();
8431       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8432       OpenMPMapClauseKind MapType;
8433       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8434       bool IsImplicit;
8435       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8436       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8437           OverlappedComponents = Pair.getSecond();
8438       bool IsFirstComponentList = true;
8439       generateInfoForComponentList(MapType, MapModifiers, Components,
8440                                    BasePointers, Pointers, Sizes, Types,
8441                                    PartialStruct, IsFirstComponentList,
8442                                    IsImplicit, OverlappedComponents);
8443     }
8444     // Go through other elements without overlapped elements.
8445     bool IsFirstComponentList = OverlappedData.empty();
8446     for (const MapData &L : DeclComponentLists) {
8447       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8448       OpenMPMapClauseKind MapType;
8449       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8450       bool IsImplicit;
8451       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8452       auto It = OverlappedData.find(&L);
8453       if (It == OverlappedData.end())
8454         generateInfoForComponentList(MapType, MapModifiers, Components,
8455                                      BasePointers, Pointers, Sizes, Types,
8456                                      PartialStruct, IsFirstComponentList,
8457                                      IsImplicit);
8458       IsFirstComponentList = false;
8459     }
8460   }
8461 
8462   /// Generate the base pointers, section pointers, sizes and map types
8463   /// associated with the declare target link variables.
8464   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8465                                         MapValuesArrayTy &Pointers,
8466                                         MapValuesArrayTy &Sizes,
8467                                         MapFlagsArrayTy &Types) const {
8468     assert(CurDir.is<const OMPExecutableDirective *>() &&
8469            "Expect a executable directive");
8470     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8471     // Map other list items in the map clause which are not captured variables
8472     // but "declare target link" global variables.
8473     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8474       for (const auto &L : C->component_lists()) {
8475         if (!L.first)
8476           continue;
8477         const auto *VD = dyn_cast<VarDecl>(L.first);
8478         if (!VD)
8479           continue;
8480         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8481             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8482         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8483             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8484           continue;
8485         StructRangeInfoTy PartialStruct;
8486         generateInfoForComponentList(
8487             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8488             Pointers, Sizes, Types, PartialStruct,
8489             /*IsFirstComponentList=*/true, C->isImplicit());
8490         assert(!PartialStruct.Base.isValid() &&
8491                "No partial structs for declare target link expected.");
8492       }
8493     }
8494   }
8495 
8496   /// Generate the default map information for a given capture \a CI,
8497   /// record field declaration \a RI and captured value \a CV.
8498   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8499                               const FieldDecl &RI, llvm::Value *CV,
8500                               MapBaseValuesArrayTy &CurBasePointers,
8501                               MapValuesArrayTy &CurPointers,
8502                               MapValuesArrayTy &CurSizes,
8503                               MapFlagsArrayTy &CurMapTypes) const {
8504     bool IsImplicit = true;
8505     // Do the default mapping.
8506     if (CI.capturesThis()) {
8507       CurBasePointers.push_back(CV);
8508       CurPointers.push_back(CV);
8509       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8510       CurSizes.push_back(
8511           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8512                                     CGF.Int64Ty, /*isSigned=*/true));
8513       // Default map type.
8514       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8515     } else if (CI.capturesVariableByCopy()) {
8516       CurBasePointers.push_back(CV);
8517       CurPointers.push_back(CV);
8518       if (!RI.getType()->isAnyPointerType()) {
8519         // We have to signal to the runtime captures passed by value that are
8520         // not pointers.
8521         CurMapTypes.push_back(OMP_MAP_LITERAL);
8522         CurSizes.push_back(CGF.Builder.CreateIntCast(
8523             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8524       } else {
8525         // Pointers are implicitly mapped with a zero size and no flags
8526         // (other than first map that is added for all implicit maps).
8527         CurMapTypes.push_back(OMP_MAP_NONE);
8528         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8529       }
8530       const VarDecl *VD = CI.getCapturedVar();
8531       auto I = FirstPrivateDecls.find(VD);
8532       if (I != FirstPrivateDecls.end())
8533         IsImplicit = I->getSecond();
8534     } else {
8535       assert(CI.capturesVariable() && "Expected captured reference.");
8536       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8537       QualType ElementType = PtrTy->getPointeeType();
8538       CurSizes.push_back(CGF.Builder.CreateIntCast(
8539           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8540       // The default map type for a scalar/complex type is 'to' because by
8541       // default the value doesn't have to be retrieved. For an aggregate
8542       // type, the default is 'tofrom'.
8543       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8544       const VarDecl *VD = CI.getCapturedVar();
8545       auto I = FirstPrivateDecls.find(VD);
8546       if (I != FirstPrivateDecls.end() &&
8547           VD->getType().isConstant(CGF.getContext())) {
8548         llvm::Constant *Addr =
8549             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8550         // Copy the value of the original variable to the new global copy.
8551         CGF.Builder.CreateMemCpy(
8552             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8553             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8554             CurSizes.back(), /*IsVolatile=*/false);
8555         // Use new global variable as the base pointers.
8556         CurBasePointers.push_back(Addr);
8557         CurPointers.push_back(Addr);
8558       } else {
8559         CurBasePointers.push_back(CV);
8560         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8561           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8562               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8563               AlignmentSource::Decl));
8564           CurPointers.push_back(PtrAddr.getPointer());
8565         } else {
8566           CurPointers.push_back(CV);
8567         }
8568       }
8569       if (I != FirstPrivateDecls.end())
8570         IsImplicit = I->getSecond();
8571     }
8572     // Every default map produces a single argument which is a target parameter.
8573     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8574 
8575     // Add flag stating this is an implicit map.
8576     if (IsImplicit)
8577       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8578   }
8579 };
8580 } // anonymous namespace
8581 
8582 /// Emit the arrays used to pass the captures and map information to the
8583 /// offloading runtime library. If there is no map or capture information,
8584 /// return nullptr by reference.
8585 static void
8586 emitOffloadingArrays(CodeGenFunction &CGF,
8587                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8588                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8589                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8590                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8591                      CGOpenMPRuntime::TargetDataInfo &Info) {
8592   CodeGenModule &CGM = CGF.CGM;
8593   ASTContext &Ctx = CGF.getContext();
8594 
8595   // Reset the array information.
8596   Info.clearArrayInfo();
8597   Info.NumberOfPtrs = BasePointers.size();
8598 
8599   if (Info.NumberOfPtrs) {
8600     // Detect if we have any capture size requiring runtime evaluation of the
8601     // size so that a constant array could be eventually used.
8602     bool hasRuntimeEvaluationCaptureSize = false;
8603     for (llvm::Value *S : Sizes)
8604       if (!isa<llvm::Constant>(S)) {
8605         hasRuntimeEvaluationCaptureSize = true;
8606         break;
8607       }
8608 
8609     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8610     QualType PointerArrayType =
8611         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8612                                  /*IndexTypeQuals=*/0);
8613 
8614     Info.BasePointersArray =
8615         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8616     Info.PointersArray =
8617         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8618 
8619     // If we don't have any VLA types or other types that require runtime
8620     // evaluation, we can use a constant array for the map sizes, otherwise we
8621     // need to fill up the arrays as we do for the pointers.
8622     QualType Int64Ty =
8623         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8624     if (hasRuntimeEvaluationCaptureSize) {
8625       QualType SizeArrayType =
8626           Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8627                                    /*IndexTypeQuals=*/0);
8628       Info.SizesArray =
8629           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8630     } else {
8631       // We expect all the sizes to be constant, so we collect them to create
8632       // a constant array.
8633       SmallVector<llvm::Constant *, 16> ConstSizes;
8634       for (llvm::Value *S : Sizes)
8635         ConstSizes.push_back(cast<llvm::Constant>(S));
8636 
8637       auto *SizesArrayInit = llvm::ConstantArray::get(
8638           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8639       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8640       auto *SizesArrayGbl = new llvm::GlobalVariable(
8641           CGM.getModule(), SizesArrayInit->getType(),
8642           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8643           SizesArrayInit, Name);
8644       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8645       Info.SizesArray = SizesArrayGbl;
8646     }
8647 
8648     // The map types are always constant so we don't need to generate code to
8649     // fill arrays. Instead, we create an array constant.
8650     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8651     llvm::copy(MapTypes, Mapping.begin());
8652     llvm::Constant *MapTypesArrayInit =
8653         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8654     std::string MaptypesName =
8655         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8656     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8657         CGM.getModule(), MapTypesArrayInit->getType(),
8658         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8659         MapTypesArrayInit, MaptypesName);
8660     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8661     Info.MapTypesArray = MapTypesArrayGbl;
8662 
8663     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8664       llvm::Value *BPVal = *BasePointers[I];
8665       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8666           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8667           Info.BasePointersArray, 0, I);
8668       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8669           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8670       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8671       CGF.Builder.CreateStore(BPVal, BPAddr);
8672 
8673       if (Info.requiresDevicePointerInfo())
8674         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8675           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8676 
8677       llvm::Value *PVal = Pointers[I];
8678       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8679           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8680           Info.PointersArray, 0, I);
8681       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8682           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8683       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8684       CGF.Builder.CreateStore(PVal, PAddr);
8685 
8686       if (hasRuntimeEvaluationCaptureSize) {
8687         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8688             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8689             Info.SizesArray,
8690             /*Idx0=*/0,
8691             /*Idx1=*/I);
8692         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8693         CGF.Builder.CreateStore(
8694             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8695             SAddr);
8696       }
8697     }
8698   }
8699 }
8700 
8701 /// Emit the arguments to be passed to the runtime library based on the
8702 /// arrays of pointers, sizes and map types.
8703 static void emitOffloadingArraysArgument(
8704     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8705     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8706     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8707   CodeGenModule &CGM = CGF.CGM;
8708   if (Info.NumberOfPtrs) {
8709     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8710         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8711         Info.BasePointersArray,
8712         /*Idx0=*/0, /*Idx1=*/0);
8713     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8714         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8715         Info.PointersArray,
8716         /*Idx0=*/0,
8717         /*Idx1=*/0);
8718     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8719         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8720         /*Idx0=*/0, /*Idx1=*/0);
8721     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8722         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8723         Info.MapTypesArray,
8724         /*Idx0=*/0,
8725         /*Idx1=*/0);
8726   } else {
8727     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8728     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8729     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8730     MapTypesArrayArg =
8731         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8732   }
8733 }
8734 
8735 /// Check for inner distribute directive.
8736 static const OMPExecutableDirective *
8737 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8738   const auto *CS = D.getInnermostCapturedStmt();
8739   const auto *Body =
8740       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8741   const Stmt *ChildStmt =
8742       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8743 
8744   if (const auto *NestedDir =
8745           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8746     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8747     switch (D.getDirectiveKind()) {
8748     case OMPD_target:
8749       if (isOpenMPDistributeDirective(DKind))
8750         return NestedDir;
8751       if (DKind == OMPD_teams) {
8752         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8753             /*IgnoreCaptured=*/true);
8754         if (!Body)
8755           return nullptr;
8756         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8757         if (const auto *NND =
8758                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8759           DKind = NND->getDirectiveKind();
8760           if (isOpenMPDistributeDirective(DKind))
8761             return NND;
8762         }
8763       }
8764       return nullptr;
8765     case OMPD_target_teams:
8766       if (isOpenMPDistributeDirective(DKind))
8767         return NestedDir;
8768       return nullptr;
8769     case OMPD_target_parallel:
8770     case OMPD_target_simd:
8771     case OMPD_target_parallel_for:
8772     case OMPD_target_parallel_for_simd:
8773       return nullptr;
8774     case OMPD_target_teams_distribute:
8775     case OMPD_target_teams_distribute_simd:
8776     case OMPD_target_teams_distribute_parallel_for:
8777     case OMPD_target_teams_distribute_parallel_for_simd:
8778     case OMPD_parallel:
8779     case OMPD_for:
8780     case OMPD_parallel_for:
8781     case OMPD_parallel_sections:
8782     case OMPD_for_simd:
8783     case OMPD_parallel_for_simd:
8784     case OMPD_cancel:
8785     case OMPD_cancellation_point:
8786     case OMPD_ordered:
8787     case OMPD_threadprivate:
8788     case OMPD_allocate:
8789     case OMPD_task:
8790     case OMPD_simd:
8791     case OMPD_sections:
8792     case OMPD_section:
8793     case OMPD_single:
8794     case OMPD_master:
8795     case OMPD_critical:
8796     case OMPD_taskyield:
8797     case OMPD_barrier:
8798     case OMPD_taskwait:
8799     case OMPD_taskgroup:
8800     case OMPD_atomic:
8801     case OMPD_flush:
8802     case OMPD_teams:
8803     case OMPD_target_data:
8804     case OMPD_target_exit_data:
8805     case OMPD_target_enter_data:
8806     case OMPD_distribute:
8807     case OMPD_distribute_simd:
8808     case OMPD_distribute_parallel_for:
8809     case OMPD_distribute_parallel_for_simd:
8810     case OMPD_teams_distribute:
8811     case OMPD_teams_distribute_simd:
8812     case OMPD_teams_distribute_parallel_for:
8813     case OMPD_teams_distribute_parallel_for_simd:
8814     case OMPD_target_update:
8815     case OMPD_declare_simd:
8816     case OMPD_declare_target:
8817     case OMPD_end_declare_target:
8818     case OMPD_declare_reduction:
8819     case OMPD_declare_mapper:
8820     case OMPD_taskloop:
8821     case OMPD_taskloop_simd:
8822     case OMPD_requires:
8823     case OMPD_unknown:
8824       llvm_unreachable("Unexpected directive.");
8825     }
8826   }
8827 
8828   return nullptr;
8829 }
8830 
8831 /// Emit the user-defined mapper function. The code generation follows the
8832 /// pattern in the example below.
8833 /// \code
8834 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8835 ///                                           void *base, void *begin,
8836 ///                                           int64_t size, int64_t type) {
8837 ///   // Allocate space for an array section first.
8838 ///   if (size > 1 && !maptype.IsDelete)
8839 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8840 ///                                 size*sizeof(Ty), clearToFrom(type));
8841 ///   // Map members.
8842 ///   for (unsigned i = 0; i < size; i++) {
8843 ///     // For each component specified by this mapper:
8844 ///     for (auto c : all_components) {
8845 ///       if (c.hasMapper())
8846 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8847 ///                       c.arg_type);
8848 ///       else
8849 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8850 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8851 ///     }
8852 ///   }
8853 ///   // Delete the array section.
8854 ///   if (size > 1 && maptype.IsDelete)
8855 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8856 ///                                 size*sizeof(Ty), clearToFrom(type));
8857 /// }
8858 /// \endcode
8859 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8860                                             CodeGenFunction *CGF) {
8861   if (UDMMap.count(D) > 0)
8862     return;
8863   ASTContext &C = CGM.getContext();
8864   QualType Ty = D->getType();
8865   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8866   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8867   auto *MapperVarDecl =
8868       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8869   SourceLocation Loc = D->getLocation();
8870   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8871 
8872   // Prepare mapper function arguments and attributes.
8873   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8874                               C.VoidPtrTy, ImplicitParamDecl::Other);
8875   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8876                             ImplicitParamDecl::Other);
8877   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8878                              C.VoidPtrTy, ImplicitParamDecl::Other);
8879   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8880                             ImplicitParamDecl::Other);
8881   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8882                             ImplicitParamDecl::Other);
8883   FunctionArgList Args;
8884   Args.push_back(&HandleArg);
8885   Args.push_back(&BaseArg);
8886   Args.push_back(&BeginArg);
8887   Args.push_back(&SizeArg);
8888   Args.push_back(&TypeArg);
8889   const CGFunctionInfo &FnInfo =
8890       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8891   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8892   SmallString<64> TyStr;
8893   llvm::raw_svector_ostream Out(TyStr);
8894   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8895   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8896   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8897                                     Name, &CGM.getModule());
8898   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8899   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8900   // Start the mapper function code generation.
8901   CodeGenFunction MapperCGF(CGM);
8902   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8903   // Compute the starting and end addreses of array elements.
8904   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8905       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8906       C.getPointerType(Int64Ty), Loc);
8907   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8908       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8909       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8910   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8911   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8912       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8913       C.getPointerType(Int64Ty), Loc);
8914   // Prepare common arguments for array initiation and deletion.
8915   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8916       MapperCGF.GetAddrOfLocalVar(&HandleArg),
8917       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8918   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8919       MapperCGF.GetAddrOfLocalVar(&BaseArg),
8920       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8921   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8922       MapperCGF.GetAddrOfLocalVar(&BeginArg),
8923       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8924 
8925   // Emit array initiation if this is an array section and \p MapType indicates
8926   // that memory allocation is required.
8927   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8928   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8929                              ElementSize, HeadBB, /*IsInit=*/true);
8930 
8931   // Emit a for loop to iterate through SizeArg of elements and map all of them.
8932 
8933   // Emit the loop header block.
8934   MapperCGF.EmitBlock(HeadBB);
8935   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8936   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8937   // Evaluate whether the initial condition is satisfied.
8938   llvm::Value *IsEmpty =
8939       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8940   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8941   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8942 
8943   // Emit the loop body block.
8944   MapperCGF.EmitBlock(BodyBB);
8945   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8946       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8947   PtrPHI->addIncoming(PtrBegin, EntryBB);
8948   Address PtrCurrent =
8949       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8950                           .getAlignment()
8951                           .alignmentOfArrayElement(ElementSize));
8952   // Privatize the declared variable of mapper to be the current array element.
8953   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8954   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8955     return MapperCGF
8956         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8957         .getAddress();
8958   });
8959   (void)Scope.Privatize();
8960 
8961   // Get map clause information. Fill up the arrays with all mapped variables.
8962   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8963   MappableExprsHandler::MapValuesArrayTy Pointers;
8964   MappableExprsHandler::MapValuesArrayTy Sizes;
8965   MappableExprsHandler::MapFlagsArrayTy MapTypes;
8966   MappableExprsHandler MEHandler(*D, MapperCGF);
8967   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8968 
8969   // Call the runtime API __tgt_mapper_num_components to get the number of
8970   // pre-existing components.
8971   llvm::Value *OffloadingArgs[] = {Handle};
8972   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8973       createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8974   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8975       PreviousSize,
8976       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8977 
8978   // Fill up the runtime mapper handle for all components.
8979   for (unsigned I = 0; I < BasePointers.size(); ++I) {
8980     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8981         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8982     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8983         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8984     llvm::Value *CurSizeArg = Sizes[I];
8985 
8986     // Extract the MEMBER_OF field from the map type.
8987     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8988     MapperCGF.EmitBlock(MemberBB);
8989     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8990     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8991         OriMapType,
8992         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8993     llvm::BasicBlock *MemberCombineBB =
8994         MapperCGF.createBasicBlock("omp.member.combine");
8995     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8996     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8997     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8998     // Add the number of pre-existing components to the MEMBER_OF field if it
8999     // is valid.
9000     MapperCGF.EmitBlock(MemberCombineBB);
9001     llvm::Value *CombinedMember =
9002         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9003     // Do nothing if it is not a member of previous components.
9004     MapperCGF.EmitBlock(TypeBB);
9005     llvm::PHINode *MemberMapType =
9006         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9007     MemberMapType->addIncoming(OriMapType, MemberBB);
9008     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9009 
9010     // Combine the map type inherited from user-defined mapper with that
9011     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9012     // bits of the \a MapType, which is the input argument of the mapper
9013     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9014     // bits of MemberMapType.
9015     // [OpenMP 5.0], 1.2.6. map-type decay.
9016     //        | alloc |  to   | from  | tofrom | release | delete
9017     // ----------------------------------------------------------
9018     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9019     // to     | alloc |  to   | alloc |   to   | release | delete
9020     // from   | alloc | alloc | from  |  from  | release | delete
9021     // tofrom | alloc |  to   | from  | tofrom | release | delete
9022     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9023         MapType,
9024         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9025                                    MappableExprsHandler::OMP_MAP_FROM));
9026     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9027     llvm::BasicBlock *AllocElseBB =
9028         MapperCGF.createBasicBlock("omp.type.alloc.else");
9029     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9030     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9031     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9032     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9033     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9034     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9035     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9036     MapperCGF.EmitBlock(AllocBB);
9037     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9038         MemberMapType,
9039         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9040                                      MappableExprsHandler::OMP_MAP_FROM)));
9041     MapperCGF.Builder.CreateBr(EndBB);
9042     MapperCGF.EmitBlock(AllocElseBB);
9043     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9044         LeftToFrom,
9045         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9046     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9047     // In case of to, clear OMP_MAP_FROM.
9048     MapperCGF.EmitBlock(ToBB);
9049     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9050         MemberMapType,
9051         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9052     MapperCGF.Builder.CreateBr(EndBB);
9053     MapperCGF.EmitBlock(ToElseBB);
9054     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9055         LeftToFrom,
9056         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9057     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9058     // In case of from, clear OMP_MAP_TO.
9059     MapperCGF.EmitBlock(FromBB);
9060     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9061         MemberMapType,
9062         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9063     // In case of tofrom, do nothing.
9064     MapperCGF.EmitBlock(EndBB);
9065     llvm::PHINode *CurMapType =
9066         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9067     CurMapType->addIncoming(AllocMapType, AllocBB);
9068     CurMapType->addIncoming(ToMapType, ToBB);
9069     CurMapType->addIncoming(FromMapType, FromBB);
9070     CurMapType->addIncoming(MemberMapType, ToElseBB);
9071 
9072     // TODO: call the corresponding mapper function if a user-defined mapper is
9073     // associated with this map clause.
9074     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9075     // data structure.
9076     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9077                                      CurSizeArg, CurMapType};
9078     MapperCGF.EmitRuntimeCall(
9079         createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9080         OffloadingArgs);
9081   }
9082 
9083   // Update the pointer to point to the next element that needs to be mapped,
9084   // and check whether we have mapped all elements.
9085   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9086       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9087   PtrPHI->addIncoming(PtrNext, BodyBB);
9088   llvm::Value *IsDone =
9089       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9090   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9091   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9092 
9093   MapperCGF.EmitBlock(ExitBB);
9094   // Emit array deletion if this is an array section and \p MapType indicates
9095   // that deletion is required.
9096   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9097                              ElementSize, DoneBB, /*IsInit=*/false);
9098 
9099   // Emit the function exit block.
9100   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9101   MapperCGF.FinishFunction();
9102   UDMMap.try_emplace(D, Fn);
9103   if (CGF) {
9104     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9105     Decls.second.push_back(D);
9106   }
9107 }
9108 
9109 /// Emit the array initialization or deletion portion for user-defined mapper
9110 /// code generation. First, it evaluates whether an array section is mapped and
9111 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9112 /// true, and \a MapType indicates to not delete this array, array
9113 /// initialization code is generated. If \a IsInit is false, and \a MapType
9114 /// indicates to not this array, array deletion code is generated.
9115 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9116     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9117     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9118     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9119   StringRef Prefix = IsInit ? ".init" : ".del";
9120 
9121   // Evaluate if this is an array section.
9122   llvm::BasicBlock *IsDeleteBB =
9123       MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9124   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9125   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9126       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9127   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9128 
9129   // Evaluate if we are going to delete this section.
9130   MapperCGF.EmitBlock(IsDeleteBB);
9131   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9132       MapType,
9133       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9134   llvm::Value *DeleteCond;
9135   if (IsInit) {
9136     DeleteCond = MapperCGF.Builder.CreateIsNull(
9137         DeleteBit, "omp.array" + Prefix + ".delete");
9138   } else {
9139     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9140         DeleteBit, "omp.array" + Prefix + ".delete");
9141   }
9142   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9143 
9144   MapperCGF.EmitBlock(BodyBB);
9145   // Get the array size by multiplying element size and element number (i.e., \p
9146   // Size).
9147   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9148       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9149   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9150   // memory allocation/deletion purpose only.
9151   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9152       MapType,
9153       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9154                                    MappableExprsHandler::OMP_MAP_FROM)));
9155   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9156   // data structure.
9157   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9158   MapperCGF.EmitRuntimeCall(
9159       createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9160 }
9161 
9162 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9163     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
9164     const llvm::function_ref<llvm::Value *(
9165         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
9166   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9167   const OMPExecutableDirective *TD = &D;
9168   // Get nested teams distribute kind directive, if any.
9169   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9170     TD = getNestedDistributeDirective(CGM.getContext(), D);
9171   if (!TD)
9172     return;
9173   const auto *LD = cast<OMPLoopDirective>(TD);
9174   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
9175                                                      PrePostActionTy &) {
9176     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
9177 
9178     // Emit device ID if any.
9179     llvm::Value *DeviceID;
9180     if (Device)
9181       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9182                                            CGF.Int64Ty, /*isSigned=*/true);
9183     else
9184       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9185 
9186     llvm::Value *Args[] = {DeviceID, NumIterations};
9187     CGF.EmitRuntimeCall(
9188         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9189   };
9190   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9191 }
9192 
9193 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
9194                                      const OMPExecutableDirective &D,
9195                                      llvm::Function *OutlinedFn,
9196                                      llvm::Value *OutlinedFnID,
9197                                      const Expr *IfCond, const Expr *Device) {
9198   if (!CGF.HaveInsertPoint())
9199     return;
9200 
9201   assert(OutlinedFn && "Invalid outlined function!");
9202 
9203   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9204   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9205   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9206   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9207                                             PrePostActionTy &) {
9208     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9209   };
9210   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9211 
9212   CodeGenFunction::OMPTargetDataInfo InputInfo;
9213   llvm::Value *MapTypesArray = nullptr;
9214   // Fill up the pointer arrays and transfer execution to the device.
9215   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9216                     &MapTypesArray, &CS, RequiresOuterTask,
9217                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
9218     // On top of the arrays that were filled up, the target offloading call
9219     // takes as arguments the device id as well as the host pointer. The host
9220     // pointer is used by the runtime library to identify the current target
9221     // region, so it only has to be unique and not necessarily point to
9222     // anything. It could be the pointer to the outlined function that
9223     // implements the target region, but we aren't using that so that the
9224     // compiler doesn't need to keep that, and could therefore inline the host
9225     // function if proven worthwhile during optimization.
9226 
9227     // From this point on, we need to have an ID of the target region defined.
9228     assert(OutlinedFnID && "Invalid outlined function ID!");
9229 
9230     // Emit device ID if any.
9231     llvm::Value *DeviceID;
9232     if (Device) {
9233       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9234                                            CGF.Int64Ty, /*isSigned=*/true);
9235     } else {
9236       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9237     }
9238 
9239     // Emit the number of elements in the offloading arrays.
9240     llvm::Value *PointerNum =
9241         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9242 
9243     // Return value of the runtime offloading call.
9244     llvm::Value *Return;
9245 
9246     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9247     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9248 
9249     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9250     // The target region is an outlined function launched by the runtime
9251     // via calls __tgt_target() or __tgt_target_teams().
9252     //
9253     // __tgt_target() launches a target region with one team and one thread,
9254     // executing a serial region.  This master thread may in turn launch
9255     // more threads within its team upon encountering a parallel region,
9256     // however, no additional teams can be launched on the device.
9257     //
9258     // __tgt_target_teams() launches a target region with one or more teams,
9259     // each with one or more threads.  This call is required for target
9260     // constructs such as:
9261     //  'target teams'
9262     //  'target' / 'teams'
9263     //  'target teams distribute parallel for'
9264     //  'target parallel'
9265     // and so on.
9266     //
9267     // Note that on the host and CPU targets, the runtime implementation of
9268     // these calls simply call the outlined function without forking threads.
9269     // The outlined functions themselves have runtime calls to
9270     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9271     // the compiler in emitTeamsCall() and emitParallelCall().
9272     //
9273     // In contrast, on the NVPTX target, the implementation of
9274     // __tgt_target_teams() launches a GPU kernel with the requested number
9275     // of teams and threads so no additional calls to the runtime are required.
9276     if (NumTeams) {
9277       // If we have NumTeams defined this means that we have an enclosed teams
9278       // region. Therefore we also expect to have NumThreads defined. These two
9279       // values should be defined in the presence of a teams directive,
9280       // regardless of having any clauses associated. If the user is using teams
9281       // but no clauses, these two values will be the default that should be
9282       // passed to the runtime library - a 32-bit integer with the value zero.
9283       assert(NumThreads && "Thread limit expression should be available along "
9284                            "with number of teams.");
9285       llvm::Value *OffloadingArgs[] = {DeviceID,
9286                                        OutlinedFnID,
9287                                        PointerNum,
9288                                        InputInfo.BasePointersArray.getPointer(),
9289                                        InputInfo.PointersArray.getPointer(),
9290                                        InputInfo.SizesArray.getPointer(),
9291                                        MapTypesArray,
9292                                        NumTeams,
9293                                        NumThreads};
9294       Return = CGF.EmitRuntimeCall(
9295           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9296                                           : OMPRTL__tgt_target_teams),
9297           OffloadingArgs);
9298     } else {
9299       llvm::Value *OffloadingArgs[] = {DeviceID,
9300                                        OutlinedFnID,
9301                                        PointerNum,
9302                                        InputInfo.BasePointersArray.getPointer(),
9303                                        InputInfo.PointersArray.getPointer(),
9304                                        InputInfo.SizesArray.getPointer(),
9305                                        MapTypesArray};
9306       Return = CGF.EmitRuntimeCall(
9307           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9308                                           : OMPRTL__tgt_target),
9309           OffloadingArgs);
9310     }
9311 
9312     // Check the error code and execute the host version if required.
9313     llvm::BasicBlock *OffloadFailedBlock =
9314         CGF.createBasicBlock("omp_offload.failed");
9315     llvm::BasicBlock *OffloadContBlock =
9316         CGF.createBasicBlock("omp_offload.cont");
9317     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9318     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9319 
9320     CGF.EmitBlock(OffloadFailedBlock);
9321     if (RequiresOuterTask) {
9322       CapturedVars.clear();
9323       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9324     }
9325     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9326     CGF.EmitBranch(OffloadContBlock);
9327 
9328     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9329   };
9330 
9331   // Notify that the host version must be executed.
9332   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9333                     RequiresOuterTask](CodeGenFunction &CGF,
9334                                        PrePostActionTy &) {
9335     if (RequiresOuterTask) {
9336       CapturedVars.clear();
9337       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9338     }
9339     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9340   };
9341 
9342   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9343                           &CapturedVars, RequiresOuterTask,
9344                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9345     // Fill up the arrays with all the captured variables.
9346     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9347     MappableExprsHandler::MapValuesArrayTy Pointers;
9348     MappableExprsHandler::MapValuesArrayTy Sizes;
9349     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9350 
9351     // Get mappable expression information.
9352     MappableExprsHandler MEHandler(D, CGF);
9353     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9354 
9355     auto RI = CS.getCapturedRecordDecl()->field_begin();
9356     auto CV = CapturedVars.begin();
9357     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9358                                               CE = CS.capture_end();
9359          CI != CE; ++CI, ++RI, ++CV) {
9360       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9361       MappableExprsHandler::MapValuesArrayTy CurPointers;
9362       MappableExprsHandler::MapValuesArrayTy CurSizes;
9363       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9364       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9365 
9366       // VLA sizes are passed to the outlined region by copy and do not have map
9367       // information associated.
9368       if (CI->capturesVariableArrayType()) {
9369         CurBasePointers.push_back(*CV);
9370         CurPointers.push_back(*CV);
9371         CurSizes.push_back(CGF.Builder.CreateIntCast(
9372             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9373         // Copy to the device as an argument. No need to retrieve it.
9374         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9375                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9376                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9377       } else {
9378         // If we have any information in the map clause, we use it, otherwise we
9379         // just do a default mapping.
9380         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9381                                          CurSizes, CurMapTypes, PartialStruct);
9382         if (CurBasePointers.empty())
9383           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9384                                            CurPointers, CurSizes, CurMapTypes);
9385         // Generate correct mapping for variables captured by reference in
9386         // lambdas.
9387         if (CI->capturesVariable())
9388           MEHandler.generateInfoForLambdaCaptures(
9389               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9390               CurMapTypes, LambdaPointers);
9391       }
9392       // We expect to have at least an element of information for this capture.
9393       assert(!CurBasePointers.empty() &&
9394              "Non-existing map pointer for capture!");
9395       assert(CurBasePointers.size() == CurPointers.size() &&
9396              CurBasePointers.size() == CurSizes.size() &&
9397              CurBasePointers.size() == CurMapTypes.size() &&
9398              "Inconsistent map information sizes!");
9399 
9400       // If there is an entry in PartialStruct it means we have a struct with
9401       // individual members mapped. Emit an extra combined entry.
9402       if (PartialStruct.Base.isValid())
9403         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9404                                     CurMapTypes, PartialStruct);
9405 
9406       // We need to append the results of this capture to what we already have.
9407       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9408       Pointers.append(CurPointers.begin(), CurPointers.end());
9409       Sizes.append(CurSizes.begin(), CurSizes.end());
9410       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9411     }
9412     // Adjust MEMBER_OF flags for the lambdas captures.
9413     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9414                                               Pointers, MapTypes);
9415     // Map other list items in the map clause which are not captured variables
9416     // but "declare target link" global variables.
9417     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9418                                                MapTypes);
9419 
9420     TargetDataInfo Info;
9421     // Fill up the arrays and create the arguments.
9422     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9423     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9424                                  Info.PointersArray, Info.SizesArray,
9425                                  Info.MapTypesArray, Info);
9426     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9427     InputInfo.BasePointersArray =
9428         Address(Info.BasePointersArray, CGM.getPointerAlign());
9429     InputInfo.PointersArray =
9430         Address(Info.PointersArray, CGM.getPointerAlign());
9431     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9432     MapTypesArray = Info.MapTypesArray;
9433     if (RequiresOuterTask)
9434       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9435     else
9436       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9437   };
9438 
9439   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9440                              CodeGenFunction &CGF, PrePostActionTy &) {
9441     if (RequiresOuterTask) {
9442       CodeGenFunction::OMPTargetDataInfo InputInfo;
9443       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9444     } else {
9445       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9446     }
9447   };
9448 
9449   // If we have a target function ID it means that we need to support
9450   // offloading, otherwise, just execute on the host. We need to execute on host
9451   // regardless of the conditional in the if clause if, e.g., the user do not
9452   // specify target triples.
9453   if (OutlinedFnID) {
9454     if (IfCond) {
9455       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9456     } else {
9457       RegionCodeGenTy ThenRCG(TargetThenGen);
9458       ThenRCG(CGF);
9459     }
9460   } else {
9461     RegionCodeGenTy ElseRCG(TargetElseGen);
9462     ElseRCG(CGF);
9463   }
9464 }
9465 
9466 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9467                                                     StringRef ParentName) {
9468   if (!S)
9469     return;
9470 
9471   // Codegen OMP target directives that offload compute to the device.
9472   bool RequiresDeviceCodegen =
9473       isa<OMPExecutableDirective>(S) &&
9474       isOpenMPTargetExecutionDirective(
9475           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9476 
9477   if (RequiresDeviceCodegen) {
9478     const auto &E = *cast<OMPExecutableDirective>(S);
9479     unsigned DeviceID;
9480     unsigned FileID;
9481     unsigned Line;
9482     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9483                              FileID, Line);
9484 
9485     // Is this a target region that should not be emitted as an entry point? If
9486     // so just signal we are done with this target region.
9487     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9488                                                             ParentName, Line))
9489       return;
9490 
9491     switch (E.getDirectiveKind()) {
9492     case OMPD_target:
9493       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9494                                                    cast<OMPTargetDirective>(E));
9495       break;
9496     case OMPD_target_parallel:
9497       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9498           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9499       break;
9500     case OMPD_target_teams:
9501       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9502           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9503       break;
9504     case OMPD_target_teams_distribute:
9505       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9506           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9507       break;
9508     case OMPD_target_teams_distribute_simd:
9509       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9510           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9511       break;
9512     case OMPD_target_parallel_for:
9513       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9514           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9515       break;
9516     case OMPD_target_parallel_for_simd:
9517       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9518           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9519       break;
9520     case OMPD_target_simd:
9521       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9522           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9523       break;
9524     case OMPD_target_teams_distribute_parallel_for:
9525       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9526           CGM, ParentName,
9527           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9528       break;
9529     case OMPD_target_teams_distribute_parallel_for_simd:
9530       CodeGenFunction::
9531           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9532               CGM, ParentName,
9533               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9534       break;
9535     case OMPD_parallel:
9536     case OMPD_for:
9537     case OMPD_parallel_for:
9538     case OMPD_parallel_sections:
9539     case OMPD_for_simd:
9540     case OMPD_parallel_for_simd:
9541     case OMPD_cancel:
9542     case OMPD_cancellation_point:
9543     case OMPD_ordered:
9544     case OMPD_threadprivate:
9545     case OMPD_allocate:
9546     case OMPD_task:
9547     case OMPD_simd:
9548     case OMPD_sections:
9549     case OMPD_section:
9550     case OMPD_single:
9551     case OMPD_master:
9552     case OMPD_critical:
9553     case OMPD_taskyield:
9554     case OMPD_barrier:
9555     case OMPD_taskwait:
9556     case OMPD_taskgroup:
9557     case OMPD_atomic:
9558     case OMPD_flush:
9559     case OMPD_teams:
9560     case OMPD_target_data:
9561     case OMPD_target_exit_data:
9562     case OMPD_target_enter_data:
9563     case OMPD_distribute:
9564     case OMPD_distribute_simd:
9565     case OMPD_distribute_parallel_for:
9566     case OMPD_distribute_parallel_for_simd:
9567     case OMPD_teams_distribute:
9568     case OMPD_teams_distribute_simd:
9569     case OMPD_teams_distribute_parallel_for:
9570     case OMPD_teams_distribute_parallel_for_simd:
9571     case OMPD_target_update:
9572     case OMPD_declare_simd:
9573     case OMPD_declare_target:
9574     case OMPD_end_declare_target:
9575     case OMPD_declare_reduction:
9576     case OMPD_declare_mapper:
9577     case OMPD_taskloop:
9578     case OMPD_taskloop_simd:
9579     case OMPD_requires:
9580     case OMPD_unknown:
9581       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9582     }
9583     return;
9584   }
9585 
9586   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9587     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9588       return;
9589 
9590     scanForTargetRegionsFunctions(
9591         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9592     return;
9593   }
9594 
9595   // If this is a lambda function, look into its body.
9596   if (const auto *L = dyn_cast<LambdaExpr>(S))
9597     S = L->getBody();
9598 
9599   // Keep looking for target regions recursively.
9600   for (const Stmt *II : S->children())
9601     scanForTargetRegionsFunctions(II, ParentName);
9602 }
9603 
9604 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9605   // If emitting code for the host, we do not process FD here. Instead we do
9606   // the normal code generation.
9607   if (!CGM.getLangOpts().OpenMPIsDevice)
9608     return false;
9609 
9610   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9611   StringRef Name = CGM.getMangledName(GD);
9612   // Try to detect target regions in the function.
9613   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9614     scanForTargetRegionsFunctions(FD->getBody(), Name);
9615 
9616   // Do not to emit function if it is not marked as declare target.
9617   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9618          AlreadyEmittedTargetFunctions.count(Name) == 0;
9619 }
9620 
9621 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9622   if (!CGM.getLangOpts().OpenMPIsDevice)
9623     return false;
9624 
9625   // Check if there are Ctors/Dtors in this declaration and look for target
9626   // regions in it. We use the complete variant to produce the kernel name
9627   // mangling.
9628   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9629   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9630     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9631       StringRef ParentName =
9632           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9633       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9634     }
9635     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9636       StringRef ParentName =
9637           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9638       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9639     }
9640   }
9641 
9642   // Do not to emit variable if it is not marked as declare target.
9643   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9644       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9645           cast<VarDecl>(GD.getDecl()));
9646   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9647       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9648        HasRequiresUnifiedSharedMemory)) {
9649     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9650     return true;
9651   }
9652   return false;
9653 }
9654 
9655 llvm::Constant *
9656 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9657                                                 const VarDecl *VD) {
9658   assert(VD->getType().isConstant(CGM.getContext()) &&
9659          "Expected constant variable.");
9660   StringRef VarName;
9661   llvm::Constant *Addr;
9662   llvm::GlobalValue::LinkageTypes Linkage;
9663   QualType Ty = VD->getType();
9664   SmallString<128> Buffer;
9665   {
9666     unsigned DeviceID;
9667     unsigned FileID;
9668     unsigned Line;
9669     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9670                              FileID, Line);
9671     llvm::raw_svector_ostream OS(Buffer);
9672     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9673        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9674     VarName = OS.str();
9675   }
9676   Linkage = llvm::GlobalValue::InternalLinkage;
9677   Addr =
9678       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9679                                   getDefaultFirstprivateAddressSpace());
9680   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9681   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9682   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9683   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9684       VarName, Addr, VarSize,
9685       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9686   return Addr;
9687 }
9688 
9689 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9690                                                    llvm::Constant *Addr) {
9691   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9692       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9693   if (!Res) {
9694     if (CGM.getLangOpts().OpenMPIsDevice) {
9695       // Register non-target variables being emitted in device code (debug info
9696       // may cause this).
9697       StringRef VarName = CGM.getMangledName(VD);
9698       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9699     }
9700     return;
9701   }
9702   // Register declare target variables.
9703   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9704   StringRef VarName;
9705   CharUnits VarSize;
9706   llvm::GlobalValue::LinkageTypes Linkage;
9707 
9708   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9709       !HasRequiresUnifiedSharedMemory) {
9710     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9711     VarName = CGM.getMangledName(VD);
9712     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9713       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9714       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9715     } else {
9716       VarSize = CharUnits::Zero();
9717     }
9718     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9719     // Temp solution to prevent optimizations of the internal variables.
9720     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9721       std::string RefName = getName({VarName, "ref"});
9722       if (!CGM.GetGlobalValue(RefName)) {
9723         llvm::Constant *AddrRef =
9724             getOrCreateInternalVariable(Addr->getType(), RefName);
9725         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9726         GVAddrRef->setConstant(/*Val=*/true);
9727         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9728         GVAddrRef->setInitializer(Addr);
9729         CGM.addCompilerUsedGlobal(GVAddrRef);
9730       }
9731     }
9732   } else {
9733     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9734             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9735              HasRequiresUnifiedSharedMemory)) &&
9736            "Declare target attribute must link or to with unified memory.");
9737     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9738       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9739     else
9740       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9741 
9742     if (CGM.getLangOpts().OpenMPIsDevice) {
9743       VarName = Addr->getName();
9744       Addr = nullptr;
9745     } else {
9746       VarName = getAddrOfDeclareTargetVar(VD).getName();
9747       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9748     }
9749     VarSize = CGM.getPointerSize();
9750     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9751   }
9752 
9753   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9754       VarName, Addr, VarSize, Flags, Linkage);
9755 }
9756 
9757 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9758   if (isa<FunctionDecl>(GD.getDecl()) ||
9759       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9760     return emitTargetFunctions(GD);
9761 
9762   return emitTargetGlobalVariable(GD);
9763 }
9764 
9765 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9766   for (const VarDecl *VD : DeferredGlobalVariables) {
9767     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9768         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9769     if (!Res)
9770       continue;
9771     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9772         !HasRequiresUnifiedSharedMemory) {
9773       CGM.EmitGlobal(VD);
9774     } else {
9775       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9776               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9777                HasRequiresUnifiedSharedMemory)) &&
9778              "Expected link clause or to clause with unified memory.");
9779       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9780     }
9781   }
9782 }
9783 
9784 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9785     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9786   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9787          " Expected target-based directive.");
9788 }
9789 
9790 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9791     const OMPRequiresDecl *D) {
9792   for (const OMPClause *Clause : D->clauselists()) {
9793     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9794       HasRequiresUnifiedSharedMemory = true;
9795       break;
9796     }
9797   }
9798 }
9799 
9800 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9801                                                        LangAS &AS) {
9802   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9803     return false;
9804   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9805   switch(A->getAllocatorType()) {
9806   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9807   // Not supported, fallback to the default mem space.
9808   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9809   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9810   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9811   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9812   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9813   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9814   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9815     AS = LangAS::Default;
9816     return true;
9817   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9818     llvm_unreachable("Expected predefined allocator for the variables with the "
9819                      "static storage.");
9820   }
9821   return false;
9822 }
9823 
9824 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9825   return HasRequiresUnifiedSharedMemory;
9826 }
9827 
9828 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9829     CodeGenModule &CGM)
9830     : CGM(CGM) {
9831   if (CGM.getLangOpts().OpenMPIsDevice) {
9832     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9833     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9834   }
9835 }
9836 
9837 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9838   if (CGM.getLangOpts().OpenMPIsDevice)
9839     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9840 }
9841 
9842 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9843   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9844     return true;
9845 
9846   StringRef Name = CGM.getMangledName(GD);
9847   const auto *D = cast<FunctionDecl>(GD.getDecl());
9848   // Do not to emit function if it is marked as declare target as it was already
9849   // emitted.
9850   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9851     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9852       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9853         return !F->isDeclaration();
9854       return false;
9855     }
9856     return true;
9857   }
9858 
9859   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9860 }
9861 
9862 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9863   // If we don't have entries or if we are emitting code for the device, we
9864   // don't need to do anything.
9865   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9866       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9867       (OffloadEntriesInfoManager.empty() &&
9868        !HasEmittedDeclareTargetRegion &&
9869        !HasEmittedTargetRegion))
9870     return nullptr;
9871 
9872   // Create and register the function that handles the requires directives.
9873   ASTContext &C = CGM.getContext();
9874 
9875   llvm::Function *RequiresRegFn;
9876   {
9877     CodeGenFunction CGF(CGM);
9878     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9879     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9880     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9881     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9882     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9883     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9884     // TODO: check for other requires clauses.
9885     // The requires directive takes effect only when a target region is
9886     // present in the compilation unit. Otherwise it is ignored and not
9887     // passed to the runtime. This avoids the runtime from throwing an error
9888     // for mismatching requires clauses across compilation units that don't
9889     // contain at least 1 target region.
9890     assert((HasEmittedTargetRegion ||
9891             HasEmittedDeclareTargetRegion ||
9892             !OffloadEntriesInfoManager.empty()) &&
9893            "Target or declare target region expected.");
9894     if (HasRequiresUnifiedSharedMemory)
9895       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9896     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9897         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9898     CGF.FinishFunction();
9899   }
9900   return RequiresRegFn;
9901 }
9902 
9903 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9904   // If we have offloading in the current module, we need to emit the entries
9905   // now and register the offloading descriptor.
9906   createOffloadEntriesAndInfoMetadata();
9907 
9908   // Create and register the offloading binary descriptors. This is the main
9909   // entity that captures all the information about offloading in the current
9910   // compilation unit.
9911   return createOffloadingBinaryDescriptorRegistration();
9912 }
9913 
9914 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9915                                     const OMPExecutableDirective &D,
9916                                     SourceLocation Loc,
9917                                     llvm::Function *OutlinedFn,
9918                                     ArrayRef<llvm::Value *> CapturedVars) {
9919   if (!CGF.HaveInsertPoint())
9920     return;
9921 
9922   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9923   CodeGenFunction::RunCleanupsScope Scope(CGF);
9924 
9925   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9926   llvm::Value *Args[] = {
9927       RTLoc,
9928       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9929       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9930   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9931   RealArgs.append(std::begin(Args), std::end(Args));
9932   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9933 
9934   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9935   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9936 }
9937 
9938 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9939                                          const Expr *NumTeams,
9940                                          const Expr *ThreadLimit,
9941                                          SourceLocation Loc) {
9942   if (!CGF.HaveInsertPoint())
9943     return;
9944 
9945   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9946 
9947   llvm::Value *NumTeamsVal =
9948       NumTeams
9949           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9950                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9951           : CGF.Builder.getInt32(0);
9952 
9953   llvm::Value *ThreadLimitVal =
9954       ThreadLimit
9955           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9956                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9957           : CGF.Builder.getInt32(0);
9958 
9959   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9960   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9961                                      ThreadLimitVal};
9962   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9963                       PushNumTeamsArgs);
9964 }
9965 
9966 void CGOpenMPRuntime::emitTargetDataCalls(
9967     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9968     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9969   if (!CGF.HaveInsertPoint())
9970     return;
9971 
9972   // Action used to replace the default codegen action and turn privatization
9973   // off.
9974   PrePostActionTy NoPrivAction;
9975 
9976   // Generate the code for the opening of the data environment. Capture all the
9977   // arguments of the runtime call by reference because they are used in the
9978   // closing of the region.
9979   auto &&BeginThenGen = [this, &D, Device, &Info,
9980                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9981     // Fill up the arrays with all the mapped variables.
9982     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9983     MappableExprsHandler::MapValuesArrayTy Pointers;
9984     MappableExprsHandler::MapValuesArrayTy Sizes;
9985     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9986 
9987     // Get map clause information.
9988     MappableExprsHandler MCHandler(D, CGF);
9989     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9990 
9991     // Fill up the arrays and create the arguments.
9992     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9993 
9994     llvm::Value *BasePointersArrayArg = nullptr;
9995     llvm::Value *PointersArrayArg = nullptr;
9996     llvm::Value *SizesArrayArg = nullptr;
9997     llvm::Value *MapTypesArrayArg = nullptr;
9998     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9999                                  SizesArrayArg, MapTypesArrayArg, Info);
10000 
10001     // Emit device ID if any.
10002     llvm::Value *DeviceID = nullptr;
10003     if (Device) {
10004       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10005                                            CGF.Int64Ty, /*isSigned=*/true);
10006     } else {
10007       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10008     }
10009 
10010     // Emit the number of elements in the offloading arrays.
10011     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10012 
10013     llvm::Value *OffloadingArgs[] = {
10014         DeviceID,         PointerNum,    BasePointersArrayArg,
10015         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10016     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
10017                         OffloadingArgs);
10018 
10019     // If device pointer privatization is required, emit the body of the region
10020     // here. It will have to be duplicated: with and without privatization.
10021     if (!Info.CaptureDeviceAddrMap.empty())
10022       CodeGen(CGF);
10023   };
10024 
10025   // Generate code for the closing of the data region.
10026   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10027                                             PrePostActionTy &) {
10028     assert(Info.isValid() && "Invalid data environment closing arguments.");
10029 
10030     llvm::Value *BasePointersArrayArg = nullptr;
10031     llvm::Value *PointersArrayArg = nullptr;
10032     llvm::Value *SizesArrayArg = nullptr;
10033     llvm::Value *MapTypesArrayArg = nullptr;
10034     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10035                                  SizesArrayArg, MapTypesArrayArg, Info);
10036 
10037     // Emit device ID if any.
10038     llvm::Value *DeviceID = nullptr;
10039     if (Device) {
10040       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10041                                            CGF.Int64Ty, /*isSigned=*/true);
10042     } else {
10043       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10044     }
10045 
10046     // Emit the number of elements in the offloading arrays.
10047     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10048 
10049     llvm::Value *OffloadingArgs[] = {
10050         DeviceID,         PointerNum,    BasePointersArrayArg,
10051         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10052     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10053                         OffloadingArgs);
10054   };
10055 
10056   // If we need device pointer privatization, we need to emit the body of the
10057   // region with no privatization in the 'else' branch of the conditional.
10058   // Otherwise, we don't have to do anything.
10059   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10060                                                          PrePostActionTy &) {
10061     if (!Info.CaptureDeviceAddrMap.empty()) {
10062       CodeGen.setAction(NoPrivAction);
10063       CodeGen(CGF);
10064     }
10065   };
10066 
10067   // We don't have to do anything to close the region if the if clause evaluates
10068   // to false.
10069   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10070 
10071   if (IfCond) {
10072     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10073   } else {
10074     RegionCodeGenTy RCG(BeginThenGen);
10075     RCG(CGF);
10076   }
10077 
10078   // If we don't require privatization of device pointers, we emit the body in
10079   // between the runtime calls. This avoids duplicating the body code.
10080   if (Info.CaptureDeviceAddrMap.empty()) {
10081     CodeGen.setAction(NoPrivAction);
10082     CodeGen(CGF);
10083   }
10084 
10085   if (IfCond) {
10086     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10087   } else {
10088     RegionCodeGenTy RCG(EndThenGen);
10089     RCG(CGF);
10090   }
10091 }
10092 
10093 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10094     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10095     const Expr *Device) {
10096   if (!CGF.HaveInsertPoint())
10097     return;
10098 
10099   assert((isa<OMPTargetEnterDataDirective>(D) ||
10100           isa<OMPTargetExitDataDirective>(D) ||
10101           isa<OMPTargetUpdateDirective>(D)) &&
10102          "Expecting either target enter, exit data, or update directives.");
10103 
10104   CodeGenFunction::OMPTargetDataInfo InputInfo;
10105   llvm::Value *MapTypesArray = nullptr;
10106   // Generate the code for the opening of the data environment.
10107   auto &&ThenGen = [this, &D, Device, &InputInfo,
10108                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10109     // Emit device ID if any.
10110     llvm::Value *DeviceID = nullptr;
10111     if (Device) {
10112       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10113                                            CGF.Int64Ty, /*isSigned=*/true);
10114     } else {
10115       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10116     }
10117 
10118     // Emit the number of elements in the offloading arrays.
10119     llvm::Constant *PointerNum =
10120         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10121 
10122     llvm::Value *OffloadingArgs[] = {DeviceID,
10123                                      PointerNum,
10124                                      InputInfo.BasePointersArray.getPointer(),
10125                                      InputInfo.PointersArray.getPointer(),
10126                                      InputInfo.SizesArray.getPointer(),
10127                                      MapTypesArray};
10128 
10129     // Select the right runtime function call for each expected standalone
10130     // directive.
10131     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10132     OpenMPRTLFunction RTLFn;
10133     switch (D.getDirectiveKind()) {
10134     case OMPD_target_enter_data:
10135       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10136                         : OMPRTL__tgt_target_data_begin;
10137       break;
10138     case OMPD_target_exit_data:
10139       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10140                         : OMPRTL__tgt_target_data_end;
10141       break;
10142     case OMPD_target_update:
10143       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10144                         : OMPRTL__tgt_target_data_update;
10145       break;
10146     case OMPD_parallel:
10147     case OMPD_for:
10148     case OMPD_parallel_for:
10149     case OMPD_parallel_sections:
10150     case OMPD_for_simd:
10151     case OMPD_parallel_for_simd:
10152     case OMPD_cancel:
10153     case OMPD_cancellation_point:
10154     case OMPD_ordered:
10155     case OMPD_threadprivate:
10156     case OMPD_allocate:
10157     case OMPD_task:
10158     case OMPD_simd:
10159     case OMPD_sections:
10160     case OMPD_section:
10161     case OMPD_single:
10162     case OMPD_master:
10163     case OMPD_critical:
10164     case OMPD_taskyield:
10165     case OMPD_barrier:
10166     case OMPD_taskwait:
10167     case OMPD_taskgroup:
10168     case OMPD_atomic:
10169     case OMPD_flush:
10170     case OMPD_teams:
10171     case OMPD_target_data:
10172     case OMPD_distribute:
10173     case OMPD_distribute_simd:
10174     case OMPD_distribute_parallel_for:
10175     case OMPD_distribute_parallel_for_simd:
10176     case OMPD_teams_distribute:
10177     case OMPD_teams_distribute_simd:
10178     case OMPD_teams_distribute_parallel_for:
10179     case OMPD_teams_distribute_parallel_for_simd:
10180     case OMPD_declare_simd:
10181     case OMPD_declare_target:
10182     case OMPD_end_declare_target:
10183     case OMPD_declare_reduction:
10184     case OMPD_declare_mapper:
10185     case OMPD_taskloop:
10186     case OMPD_taskloop_simd:
10187     case OMPD_target:
10188     case OMPD_target_simd:
10189     case OMPD_target_teams_distribute:
10190     case OMPD_target_teams_distribute_simd:
10191     case OMPD_target_teams_distribute_parallel_for:
10192     case OMPD_target_teams_distribute_parallel_for_simd:
10193     case OMPD_target_teams:
10194     case OMPD_target_parallel:
10195     case OMPD_target_parallel_for:
10196     case OMPD_target_parallel_for_simd:
10197     case OMPD_requires:
10198     case OMPD_unknown:
10199       llvm_unreachable("Unexpected standalone target data directive.");
10200       break;
10201     }
10202     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10203   };
10204 
10205   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10206                              CodeGenFunction &CGF, PrePostActionTy &) {
10207     // Fill up the arrays with all the mapped variables.
10208     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10209     MappableExprsHandler::MapValuesArrayTy Pointers;
10210     MappableExprsHandler::MapValuesArrayTy Sizes;
10211     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10212 
10213     // Get map clause information.
10214     MappableExprsHandler MEHandler(D, CGF);
10215     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10216 
10217     TargetDataInfo Info;
10218     // Fill up the arrays and create the arguments.
10219     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10220     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10221                                  Info.PointersArray, Info.SizesArray,
10222                                  Info.MapTypesArray, Info);
10223     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10224     InputInfo.BasePointersArray =
10225         Address(Info.BasePointersArray, CGM.getPointerAlign());
10226     InputInfo.PointersArray =
10227         Address(Info.PointersArray, CGM.getPointerAlign());
10228     InputInfo.SizesArray =
10229         Address(Info.SizesArray, CGM.getPointerAlign());
10230     MapTypesArray = Info.MapTypesArray;
10231     if (D.hasClausesOfKind<OMPDependClause>())
10232       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10233     else
10234       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10235   };
10236 
10237   if (IfCond) {
10238     emitOMPIfClause(CGF, IfCond, TargetThenGen,
10239                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
10240   } else {
10241     RegionCodeGenTy ThenRCG(TargetThenGen);
10242     ThenRCG(CGF);
10243   }
10244 }
10245 
10246 namespace {
10247   /// Kind of parameter in a function with 'declare simd' directive.
10248   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10249   /// Attribute set of the parameter.
10250   struct ParamAttrTy {
10251     ParamKindTy Kind = Vector;
10252     llvm::APSInt StrideOrArg;
10253     llvm::APSInt Alignment;
10254   };
10255 } // namespace
10256 
10257 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10258                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10259   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10260   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10261   // of that clause. The VLEN value must be power of 2.
10262   // In other case the notion of the function`s "characteristic data type" (CDT)
10263   // is used to compute the vector length.
10264   // CDT is defined in the following order:
10265   //   a) For non-void function, the CDT is the return type.
10266   //   b) If the function has any non-uniform, non-linear parameters, then the
10267   //   CDT is the type of the first such parameter.
10268   //   c) If the CDT determined by a) or b) above is struct, union, or class
10269   //   type which is pass-by-value (except for the type that maps to the
10270   //   built-in complex data type), the characteristic data type is int.
10271   //   d) If none of the above three cases is applicable, the CDT is int.
10272   // The VLEN is then determined based on the CDT and the size of vector
10273   // register of that ISA for which current vector version is generated. The
10274   // VLEN is computed using the formula below:
10275   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10276   // where vector register size specified in section 3.2.1 Registers and the
10277   // Stack Frame of original AMD64 ABI document.
10278   QualType RetType = FD->getReturnType();
10279   if (RetType.isNull())
10280     return 0;
10281   ASTContext &C = FD->getASTContext();
10282   QualType CDT;
10283   if (!RetType.isNull() && !RetType->isVoidType()) {
10284     CDT = RetType;
10285   } else {
10286     unsigned Offset = 0;
10287     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10288       if (ParamAttrs[Offset].Kind == Vector)
10289         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10290       ++Offset;
10291     }
10292     if (CDT.isNull()) {
10293       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10294         if (ParamAttrs[I + Offset].Kind == Vector) {
10295           CDT = FD->getParamDecl(I)->getType();
10296           break;
10297         }
10298       }
10299     }
10300   }
10301   if (CDT.isNull())
10302     CDT = C.IntTy;
10303   CDT = CDT->getCanonicalTypeUnqualified();
10304   if (CDT->isRecordType() || CDT->isUnionType())
10305     CDT = C.IntTy;
10306   return C.getTypeSize(CDT);
10307 }
10308 
10309 static void
10310 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10311                            const llvm::APSInt &VLENVal,
10312                            ArrayRef<ParamAttrTy> ParamAttrs,
10313                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10314   struct ISADataTy {
10315     char ISA;
10316     unsigned VecRegSize;
10317   };
10318   ISADataTy ISAData[] = {
10319       {
10320           'b', 128
10321       }, // SSE
10322       {
10323           'c', 256
10324       }, // AVX
10325       {
10326           'd', 256
10327       }, // AVX2
10328       {
10329           'e', 512
10330       }, // AVX512
10331   };
10332   llvm::SmallVector<char, 2> Masked;
10333   switch (State) {
10334   case OMPDeclareSimdDeclAttr::BS_Undefined:
10335     Masked.push_back('N');
10336     Masked.push_back('M');
10337     break;
10338   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10339     Masked.push_back('N');
10340     break;
10341   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10342     Masked.push_back('M');
10343     break;
10344   }
10345   for (char Mask : Masked) {
10346     for (const ISADataTy &Data : ISAData) {
10347       SmallString<256> Buffer;
10348       llvm::raw_svector_ostream Out(Buffer);
10349       Out << "_ZGV" << Data.ISA << Mask;
10350       if (!VLENVal) {
10351         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10352         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10353         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10354       } else {
10355         Out << VLENVal;
10356       }
10357       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10358         switch (ParamAttr.Kind){
10359         case LinearWithVarStride:
10360           Out << 's' << ParamAttr.StrideOrArg;
10361           break;
10362         case Linear:
10363           Out << 'l';
10364           if (!!ParamAttr.StrideOrArg)
10365             Out << ParamAttr.StrideOrArg;
10366           break;
10367         case Uniform:
10368           Out << 'u';
10369           break;
10370         case Vector:
10371           Out << 'v';
10372           break;
10373         }
10374         if (!!ParamAttr.Alignment)
10375           Out << 'a' << ParamAttr.Alignment;
10376       }
10377       Out << '_' << Fn->getName();
10378       Fn->addFnAttr(Out.str());
10379     }
10380   }
10381 }
10382 
10383 // This are the Functions that are needed to mangle the name of the
10384 // vector functions generated by the compiler, according to the rules
10385 // defined in the "Vector Function ABI specifications for AArch64",
10386 // available at
10387 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10388 
10389 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10390 ///
10391 /// TODO: Need to implement the behavior for reference marked with a
10392 /// var or no linear modifiers (1.b in the section). For this, we
10393 /// need to extend ParamKindTy to support the linear modifiers.
10394 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10395   QT = QT.getCanonicalType();
10396 
10397   if (QT->isVoidType())
10398     return false;
10399 
10400   if (Kind == ParamKindTy::Uniform)
10401     return false;
10402 
10403   if (Kind == ParamKindTy::Linear)
10404     return false;
10405 
10406   // TODO: Handle linear references with modifiers
10407 
10408   if (Kind == ParamKindTy::LinearWithVarStride)
10409     return false;
10410 
10411   return true;
10412 }
10413 
10414 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10415 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10416   QT = QT.getCanonicalType();
10417   unsigned Size = C.getTypeSize(QT);
10418 
10419   // Only scalars and complex within 16 bytes wide set PVB to true.
10420   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10421     return false;
10422 
10423   if (QT->isFloatingType())
10424     return true;
10425 
10426   if (QT->isIntegerType())
10427     return true;
10428 
10429   if (QT->isPointerType())
10430     return true;
10431 
10432   // TODO: Add support for complex types (section 3.1.2, item 2).
10433 
10434   return false;
10435 }
10436 
10437 /// Computes the lane size (LS) of a return type or of an input parameter,
10438 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10439 /// TODO: Add support for references, section 3.2.1, item 1.
10440 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10441   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10442     QualType PTy = QT.getCanonicalType()->getPointeeType();
10443     if (getAArch64PBV(PTy, C))
10444       return C.getTypeSize(PTy);
10445   }
10446   if (getAArch64PBV(QT, C))
10447     return C.getTypeSize(QT);
10448 
10449   return C.getTypeSize(C.getUIntPtrType());
10450 }
10451 
10452 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10453 // signature of the scalar function, as defined in 3.2.2 of the
10454 // AAVFABI.
10455 static std::tuple<unsigned, unsigned, bool>
10456 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10457   QualType RetType = FD->getReturnType().getCanonicalType();
10458 
10459   ASTContext &C = FD->getASTContext();
10460 
10461   bool OutputBecomesInput = false;
10462 
10463   llvm::SmallVector<unsigned, 8> Sizes;
10464   if (!RetType->isVoidType()) {
10465     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10466     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10467       OutputBecomesInput = true;
10468   }
10469   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10470     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10471     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10472   }
10473 
10474   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10475   // The LS of a function parameter / return value can only be a power
10476   // of 2, starting from 8 bits, up to 128.
10477   assert(std::all_of(Sizes.begin(), Sizes.end(),
10478                      [](unsigned Size) {
10479                        return Size == 8 || Size == 16 || Size == 32 ||
10480                               Size == 64 || Size == 128;
10481                      }) &&
10482          "Invalid size");
10483 
10484   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10485                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10486                          OutputBecomesInput);
10487 }
10488 
10489 /// Mangle the parameter part of the vector function name according to
10490 /// their OpenMP classification. The mangling function is defined in
10491 /// section 3.5 of the AAVFABI.
10492 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10493   SmallString<256> Buffer;
10494   llvm::raw_svector_ostream Out(Buffer);
10495   for (const auto &ParamAttr : ParamAttrs) {
10496     switch (ParamAttr.Kind) {
10497     case LinearWithVarStride:
10498       Out << "ls" << ParamAttr.StrideOrArg;
10499       break;
10500     case Linear:
10501       Out << 'l';
10502       // Don't print the step value if it is not present or if it is
10503       // equal to 1.
10504       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10505         Out << ParamAttr.StrideOrArg;
10506       break;
10507     case Uniform:
10508       Out << 'u';
10509       break;
10510     case Vector:
10511       Out << 'v';
10512       break;
10513     }
10514 
10515     if (!!ParamAttr.Alignment)
10516       Out << 'a' << ParamAttr.Alignment;
10517   }
10518 
10519   return Out.str();
10520 }
10521 
10522 // Function used to add the attribute. The parameter `VLEN` is
10523 // templated to allow the use of "x" when targeting scalable functions
10524 // for SVE.
10525 template <typename T>
10526 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10527                                  char ISA, StringRef ParSeq,
10528                                  StringRef MangledName, bool OutputBecomesInput,
10529                                  llvm::Function *Fn) {
10530   SmallString<256> Buffer;
10531   llvm::raw_svector_ostream Out(Buffer);
10532   Out << Prefix << ISA << LMask << VLEN;
10533   if (OutputBecomesInput)
10534     Out << "v";
10535   Out << ParSeq << "_" << MangledName;
10536   Fn->addFnAttr(Out.str());
10537 }
10538 
10539 // Helper function to generate the Advanced SIMD names depending on
10540 // the value of the NDS when simdlen is not present.
10541 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10542                                       StringRef Prefix, char ISA,
10543                                       StringRef ParSeq, StringRef MangledName,
10544                                       bool OutputBecomesInput,
10545                                       llvm::Function *Fn) {
10546   switch (NDS) {
10547   case 8:
10548     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10549                          OutputBecomesInput, Fn);
10550     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10551                          OutputBecomesInput, Fn);
10552     break;
10553   case 16:
10554     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10555                          OutputBecomesInput, Fn);
10556     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10557                          OutputBecomesInput, Fn);
10558     break;
10559   case 32:
10560     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10561                          OutputBecomesInput, Fn);
10562     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10563                          OutputBecomesInput, Fn);
10564     break;
10565   case 64:
10566   case 128:
10567     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10568                          OutputBecomesInput, Fn);
10569     break;
10570   default:
10571     llvm_unreachable("Scalar type is too wide.");
10572   }
10573 }
10574 
10575 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10576 static void emitAArch64DeclareSimdFunction(
10577     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10578     ArrayRef<ParamAttrTy> ParamAttrs,
10579     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10580     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10581 
10582   // Get basic data for building the vector signature.
10583   const auto Data = getNDSWDS(FD, ParamAttrs);
10584   const unsigned NDS = std::get<0>(Data);
10585   const unsigned WDS = std::get<1>(Data);
10586   const bool OutputBecomesInput = std::get<2>(Data);
10587 
10588   // Check the values provided via `simdlen` by the user.
10589   // 1. A `simdlen(1)` doesn't produce vector signatures,
10590   if (UserVLEN == 1) {
10591     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10592         DiagnosticsEngine::Warning,
10593         "The clause simdlen(1) has no effect when targeting aarch64.");
10594     CGM.getDiags().Report(SLoc, DiagID);
10595     return;
10596   }
10597 
10598   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10599   // Advanced SIMD output.
10600   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10601     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10602         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10603                                     "power of 2 when targeting Advanced SIMD.");
10604     CGM.getDiags().Report(SLoc, DiagID);
10605     return;
10606   }
10607 
10608   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10609   // limits.
10610   if (ISA == 's' && UserVLEN != 0) {
10611     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10612       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10613           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10614                                       "lanes in the architectural constraints "
10615                                       "for SVE (min is 128-bit, max is "
10616                                       "2048-bit, by steps of 128-bit)");
10617       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10618       return;
10619     }
10620   }
10621 
10622   // Sort out parameter sequence.
10623   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10624   StringRef Prefix = "_ZGV";
10625   // Generate simdlen from user input (if any).
10626   if (UserVLEN) {
10627     if (ISA == 's') {
10628       // SVE generates only a masked function.
10629       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10630                            OutputBecomesInput, Fn);
10631     } else {
10632       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10633       // Advanced SIMD generates one or two functions, depending on
10634       // the `[not]inbranch` clause.
10635       switch (State) {
10636       case OMPDeclareSimdDeclAttr::BS_Undefined:
10637         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10638                              OutputBecomesInput, Fn);
10639         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10640                              OutputBecomesInput, Fn);
10641         break;
10642       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10643         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10644                              OutputBecomesInput, Fn);
10645         break;
10646       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10647         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10648                              OutputBecomesInput, Fn);
10649         break;
10650       }
10651     }
10652   } else {
10653     // If no user simdlen is provided, follow the AAVFABI rules for
10654     // generating the vector length.
10655     if (ISA == 's') {
10656       // SVE, section 3.4.1, item 1.
10657       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10658                            OutputBecomesInput, Fn);
10659     } else {
10660       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10661       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10662       // two vector names depending on the use of the clause
10663       // `[not]inbranch`.
10664       switch (State) {
10665       case OMPDeclareSimdDeclAttr::BS_Undefined:
10666         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10667                                   OutputBecomesInput, Fn);
10668         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10669                                   OutputBecomesInput, Fn);
10670         break;
10671       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10672         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10673                                   OutputBecomesInput, Fn);
10674         break;
10675       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10676         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10677                                   OutputBecomesInput, Fn);
10678         break;
10679       }
10680     }
10681   }
10682 }
10683 
10684 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10685                                               llvm::Function *Fn) {
10686   ASTContext &C = CGM.getContext();
10687   FD = FD->getMostRecentDecl();
10688   // Map params to their positions in function decl.
10689   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10690   if (isa<CXXMethodDecl>(FD))
10691     ParamPositions.try_emplace(FD, 0);
10692   unsigned ParamPos = ParamPositions.size();
10693   for (const ParmVarDecl *P : FD->parameters()) {
10694     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10695     ++ParamPos;
10696   }
10697   while (FD) {
10698     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10699       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10700       // Mark uniform parameters.
10701       for (const Expr *E : Attr->uniforms()) {
10702         E = E->IgnoreParenImpCasts();
10703         unsigned Pos;
10704         if (isa<CXXThisExpr>(E)) {
10705           Pos = ParamPositions[FD];
10706         } else {
10707           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10708                                 ->getCanonicalDecl();
10709           Pos = ParamPositions[PVD];
10710         }
10711         ParamAttrs[Pos].Kind = Uniform;
10712       }
10713       // Get alignment info.
10714       auto NI = Attr->alignments_begin();
10715       for (const Expr *E : Attr->aligneds()) {
10716         E = E->IgnoreParenImpCasts();
10717         unsigned Pos;
10718         QualType ParmTy;
10719         if (isa<CXXThisExpr>(E)) {
10720           Pos = ParamPositions[FD];
10721           ParmTy = E->getType();
10722         } else {
10723           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10724                                 ->getCanonicalDecl();
10725           Pos = ParamPositions[PVD];
10726           ParmTy = PVD->getType();
10727         }
10728         ParamAttrs[Pos].Alignment =
10729             (*NI)
10730                 ? (*NI)->EvaluateKnownConstInt(C)
10731                 : llvm::APSInt::getUnsigned(
10732                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10733                           .getQuantity());
10734         ++NI;
10735       }
10736       // Mark linear parameters.
10737       auto SI = Attr->steps_begin();
10738       auto MI = Attr->modifiers_begin();
10739       for (const Expr *E : Attr->linears()) {
10740         E = E->IgnoreParenImpCasts();
10741         unsigned Pos;
10742         if (isa<CXXThisExpr>(E)) {
10743           Pos = ParamPositions[FD];
10744         } else {
10745           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10746                                 ->getCanonicalDecl();
10747           Pos = ParamPositions[PVD];
10748         }
10749         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10750         ParamAttr.Kind = Linear;
10751         if (*SI) {
10752           Expr::EvalResult Result;
10753           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10754             if (const auto *DRE =
10755                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10756               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10757                 ParamAttr.Kind = LinearWithVarStride;
10758                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10759                     ParamPositions[StridePVD->getCanonicalDecl()]);
10760               }
10761             }
10762           } else {
10763             ParamAttr.StrideOrArg = Result.Val.getInt();
10764           }
10765         }
10766         ++SI;
10767         ++MI;
10768       }
10769       llvm::APSInt VLENVal;
10770       SourceLocation ExprLoc;
10771       const Expr *VLENExpr = Attr->getSimdlen();
10772       if (VLENExpr) {
10773         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10774         ExprLoc = VLENExpr->getExprLoc();
10775       }
10776       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10777       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10778           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10779         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10780       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10781         unsigned VLEN = VLENVal.getExtValue();
10782         StringRef MangledName = Fn->getName();
10783         if (CGM.getTarget().hasFeature("sve"))
10784           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10785                                          MangledName, 's', 128, Fn, ExprLoc);
10786         if (CGM.getTarget().hasFeature("neon"))
10787           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10788                                          MangledName, 'n', 128, Fn, ExprLoc);
10789       }
10790     }
10791     FD = FD->getPreviousDecl();
10792   }
10793 }
10794 
10795 namespace {
10796 /// Cleanup action for doacross support.
10797 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10798 public:
10799   static const int DoacrossFinArgs = 2;
10800 
10801 private:
10802   llvm::FunctionCallee RTLFn;
10803   llvm::Value *Args[DoacrossFinArgs];
10804 
10805 public:
10806   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10807                     ArrayRef<llvm::Value *> CallArgs)
10808       : RTLFn(RTLFn) {
10809     assert(CallArgs.size() == DoacrossFinArgs);
10810     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10811   }
10812   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10813     if (!CGF.HaveInsertPoint())
10814       return;
10815     CGF.EmitRuntimeCall(RTLFn, Args);
10816   }
10817 };
10818 } // namespace
10819 
10820 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10821                                        const OMPLoopDirective &D,
10822                                        ArrayRef<Expr *> NumIterations) {
10823   if (!CGF.HaveInsertPoint())
10824     return;
10825 
10826   ASTContext &C = CGM.getContext();
10827   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10828   RecordDecl *RD;
10829   if (KmpDimTy.isNull()) {
10830     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10831     //  kmp_int64 lo; // lower
10832     //  kmp_int64 up; // upper
10833     //  kmp_int64 st; // stride
10834     // };
10835     RD = C.buildImplicitRecord("kmp_dim");
10836     RD->startDefinition();
10837     addFieldToRecordDecl(C, RD, Int64Ty);
10838     addFieldToRecordDecl(C, RD, Int64Ty);
10839     addFieldToRecordDecl(C, RD, Int64Ty);
10840     RD->completeDefinition();
10841     KmpDimTy = C.getRecordType(RD);
10842   } else {
10843     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10844   }
10845   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10846   QualType ArrayTy =
10847       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10848 
10849   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10850   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10851   enum { LowerFD = 0, UpperFD, StrideFD };
10852   // Fill dims with data.
10853   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10854     LValue DimsLVal = CGF.MakeAddrLValue(
10855         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10856     // dims.upper = num_iterations;
10857     LValue UpperLVal = CGF.EmitLValueForField(
10858         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10859     llvm::Value *NumIterVal =
10860         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10861                                  D.getNumIterations()->getType(), Int64Ty,
10862                                  D.getNumIterations()->getExprLoc());
10863     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10864     // dims.stride = 1;
10865     LValue StrideLVal = CGF.EmitLValueForField(
10866         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10867     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10868                           StrideLVal);
10869   }
10870 
10871   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10872   // kmp_int32 num_dims, struct kmp_dim * dims);
10873   llvm::Value *Args[] = {
10874       emitUpdateLocation(CGF, D.getBeginLoc()),
10875       getThreadID(CGF, D.getBeginLoc()),
10876       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10877       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10878           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10879           CGM.VoidPtrTy)};
10880 
10881   llvm::FunctionCallee RTLFn =
10882       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10883   CGF.EmitRuntimeCall(RTLFn, Args);
10884   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10885       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10886   llvm::FunctionCallee FiniRTLFn =
10887       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10888   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10889                                              llvm::makeArrayRef(FiniArgs));
10890 }
10891 
10892 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10893                                           const OMPDependClause *C) {
10894   QualType Int64Ty =
10895       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10896   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10897   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10898       Int64Ty, Size, ArrayType::Normal, 0);
10899   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10900   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10901     const Expr *CounterVal = C->getLoopData(I);
10902     assert(CounterVal);
10903     llvm::Value *CntVal = CGF.EmitScalarConversion(
10904         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10905         CounterVal->getExprLoc());
10906     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10907                           /*Volatile=*/false, Int64Ty);
10908   }
10909   llvm::Value *Args[] = {
10910       emitUpdateLocation(CGF, C->getBeginLoc()),
10911       getThreadID(CGF, C->getBeginLoc()),
10912       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10913   llvm::FunctionCallee RTLFn;
10914   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10915     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10916   } else {
10917     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10918     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10919   }
10920   CGF.EmitRuntimeCall(RTLFn, Args);
10921 }
10922 
10923 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10924                                llvm::FunctionCallee Callee,
10925                                ArrayRef<llvm::Value *> Args) const {
10926   assert(Loc.isValid() && "Outlined function call location must be valid.");
10927   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10928 
10929   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10930     if (Fn->doesNotThrow()) {
10931       CGF.EmitNounwindRuntimeCall(Fn, Args);
10932       return;
10933     }
10934   }
10935   CGF.EmitRuntimeCall(Callee, Args);
10936 }
10937 
10938 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10939     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10940     ArrayRef<llvm::Value *> Args) const {
10941   emitCall(CGF, Loc, OutlinedFn, Args);
10942 }
10943 
10944 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10945   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10946     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10947       HasEmittedDeclareTargetRegion = true;
10948 }
10949 
10950 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10951                                              const VarDecl *NativeParam,
10952                                              const VarDecl *TargetParam) const {
10953   return CGF.GetAddrOfLocalVar(NativeParam);
10954 }
10955 
10956 namespace {
10957 /// Cleanup action for allocate support.
10958 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10959 public:
10960   static const int CleanupArgs = 3;
10961 
10962 private:
10963   llvm::FunctionCallee RTLFn;
10964   llvm::Value *Args[CleanupArgs];
10965 
10966 public:
10967   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10968                        ArrayRef<llvm::Value *> CallArgs)
10969       : RTLFn(RTLFn) {
10970     assert(CallArgs.size() == CleanupArgs &&
10971            "Size of arguments does not match.");
10972     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10973   }
10974   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10975     if (!CGF.HaveInsertPoint())
10976       return;
10977     CGF.EmitRuntimeCall(RTLFn, Args);
10978   }
10979 };
10980 } // namespace
10981 
10982 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10983                                                    const VarDecl *VD) {
10984   if (!VD)
10985     return Address::invalid();
10986   const VarDecl *CVD = VD->getCanonicalDecl();
10987   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10988     return Address::invalid();
10989   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10990   // Use the default allocation.
10991   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10992       !AA->getAllocator())
10993     return Address::invalid();
10994   llvm::Value *Size;
10995   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10996   if (CVD->getType()->isVariablyModifiedType()) {
10997     Size = CGF.getTypeSize(CVD->getType());
10998     // Align the size: ((size + align - 1) / align) * align
10999     Size = CGF.Builder.CreateNUWAdd(
11000         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11001     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11002     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11003   } else {
11004     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11005     Size = CGM.getSize(Sz.alignTo(Align));
11006   }
11007   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11008   assert(AA->getAllocator() &&
11009          "Expected allocator expression for non-default allocator.");
11010   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11011   // According to the standard, the original allocator type is a enum (integer).
11012   // Convert to pointer type, if required.
11013   if (Allocator->getType()->isIntegerTy())
11014     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11015   else if (Allocator->getType()->isPointerTy())
11016     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11017                                                                 CGM.VoidPtrTy);
11018   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11019 
11020   llvm::Value *Addr =
11021       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11022                           CVD->getName() + ".void.addr");
11023   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11024                                                               Allocator};
11025   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11026 
11027   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11028                                                 llvm::makeArrayRef(FiniArgs));
11029   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11030       Addr,
11031       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11032       CVD->getName() + ".addr");
11033   return Address(Addr, Align);
11034 }
11035 
11036 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11037     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11038     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11039   llvm_unreachable("Not supported in SIMD-only mode");
11040 }
11041 
11042 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11043     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11044     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11045   llvm_unreachable("Not supported in SIMD-only mode");
11046 }
11047 
11048 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11049     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11050     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11051     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11052     bool Tied, unsigned &NumberOfParts) {
11053   llvm_unreachable("Not supported in SIMD-only mode");
11054 }
11055 
11056 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11057                                            SourceLocation Loc,
11058                                            llvm::Function *OutlinedFn,
11059                                            ArrayRef<llvm::Value *> CapturedVars,
11060                                            const Expr *IfCond) {
11061   llvm_unreachable("Not supported in SIMD-only mode");
11062 }
11063 
11064 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11065     CodeGenFunction &CGF, StringRef CriticalName,
11066     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11067     const Expr *Hint) {
11068   llvm_unreachable("Not supported in SIMD-only mode");
11069 }
11070 
11071 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11072                                            const RegionCodeGenTy &MasterOpGen,
11073                                            SourceLocation Loc) {
11074   llvm_unreachable("Not supported in SIMD-only mode");
11075 }
11076 
11077 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11078                                             SourceLocation Loc) {
11079   llvm_unreachable("Not supported in SIMD-only mode");
11080 }
11081 
11082 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11083     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11084     SourceLocation Loc) {
11085   llvm_unreachable("Not supported in SIMD-only mode");
11086 }
11087 
11088 void CGOpenMPSIMDRuntime::emitSingleRegion(
11089     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11090     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11091     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11092     ArrayRef<const Expr *> AssignmentOps) {
11093   llvm_unreachable("Not supported in SIMD-only mode");
11094 }
11095 
11096 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11097                                             const RegionCodeGenTy &OrderedOpGen,
11098                                             SourceLocation Loc,
11099                                             bool IsThreads) {
11100   llvm_unreachable("Not supported in SIMD-only mode");
11101 }
11102 
11103 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11104                                           SourceLocation Loc,
11105                                           OpenMPDirectiveKind Kind,
11106                                           bool EmitChecks,
11107                                           bool ForceSimpleCall) {
11108   llvm_unreachable("Not supported in SIMD-only mode");
11109 }
11110 
11111 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11112     CodeGenFunction &CGF, SourceLocation Loc,
11113     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11114     bool Ordered, const DispatchRTInput &DispatchValues) {
11115   llvm_unreachable("Not supported in SIMD-only mode");
11116 }
11117 
11118 void CGOpenMPSIMDRuntime::emitForStaticInit(
11119     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11120     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11121   llvm_unreachable("Not supported in SIMD-only mode");
11122 }
11123 
11124 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11125     CodeGenFunction &CGF, SourceLocation Loc,
11126     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11127   llvm_unreachable("Not supported in SIMD-only mode");
11128 }
11129 
11130 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11131                                                      SourceLocation Loc,
11132                                                      unsigned IVSize,
11133                                                      bool IVSigned) {
11134   llvm_unreachable("Not supported in SIMD-only mode");
11135 }
11136 
11137 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11138                                               SourceLocation Loc,
11139                                               OpenMPDirectiveKind DKind) {
11140   llvm_unreachable("Not supported in SIMD-only mode");
11141 }
11142 
11143 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11144                                               SourceLocation Loc,
11145                                               unsigned IVSize, bool IVSigned,
11146                                               Address IL, Address LB,
11147                                               Address UB, Address ST) {
11148   llvm_unreachable("Not supported in SIMD-only mode");
11149 }
11150 
11151 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11152                                                llvm::Value *NumThreads,
11153                                                SourceLocation Loc) {
11154   llvm_unreachable("Not supported in SIMD-only mode");
11155 }
11156 
11157 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11158                                              OpenMPProcBindClauseKind ProcBind,
11159                                              SourceLocation Loc) {
11160   llvm_unreachable("Not supported in SIMD-only mode");
11161 }
11162 
11163 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11164                                                     const VarDecl *VD,
11165                                                     Address VDAddr,
11166                                                     SourceLocation Loc) {
11167   llvm_unreachable("Not supported in SIMD-only mode");
11168 }
11169 
11170 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11171     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11172     CodeGenFunction *CGF) {
11173   llvm_unreachable("Not supported in SIMD-only mode");
11174 }
11175 
11176 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11177     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11178   llvm_unreachable("Not supported in SIMD-only mode");
11179 }
11180 
11181 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11182                                     ArrayRef<const Expr *> Vars,
11183                                     SourceLocation Loc) {
11184   llvm_unreachable("Not supported in SIMD-only mode");
11185 }
11186 
11187 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11188                                        const OMPExecutableDirective &D,
11189                                        llvm::Function *TaskFunction,
11190                                        QualType SharedsTy, Address Shareds,
11191                                        const Expr *IfCond,
11192                                        const OMPTaskDataTy &Data) {
11193   llvm_unreachable("Not supported in SIMD-only mode");
11194 }
11195 
11196 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11197     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11198     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11199     const Expr *IfCond, const OMPTaskDataTy &Data) {
11200   llvm_unreachable("Not supported in SIMD-only mode");
11201 }
11202 
11203 void CGOpenMPSIMDRuntime::emitReduction(
11204     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11205     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11206     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11207   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11208   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11209                                  ReductionOps, Options);
11210 }
11211 
11212 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11213     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11214     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11215   llvm_unreachable("Not supported in SIMD-only mode");
11216 }
11217 
11218 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11219                                                   SourceLocation Loc,
11220                                                   ReductionCodeGen &RCG,
11221                                                   unsigned N) {
11222   llvm_unreachable("Not supported in SIMD-only mode");
11223 }
11224 
11225 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11226                                                   SourceLocation Loc,
11227                                                   llvm::Value *ReductionsPtr,
11228                                                   LValue SharedLVal) {
11229   llvm_unreachable("Not supported in SIMD-only mode");
11230 }
11231 
11232 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11233                                            SourceLocation Loc) {
11234   llvm_unreachable("Not supported in SIMD-only mode");
11235 }
11236 
11237 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11238     CodeGenFunction &CGF, SourceLocation Loc,
11239     OpenMPDirectiveKind CancelRegion) {
11240   llvm_unreachable("Not supported in SIMD-only mode");
11241 }
11242 
11243 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11244                                          SourceLocation Loc, const Expr *IfCond,
11245                                          OpenMPDirectiveKind CancelRegion) {
11246   llvm_unreachable("Not supported in SIMD-only mode");
11247 }
11248 
11249 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11250     const OMPExecutableDirective &D, StringRef ParentName,
11251     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11252     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11253   llvm_unreachable("Not supported in SIMD-only mode");
11254 }
11255 
11256 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
11257                                          const OMPExecutableDirective &D,
11258                                          llvm::Function *OutlinedFn,
11259                                          llvm::Value *OutlinedFnID,
11260                                          const Expr *IfCond,
11261                                          const Expr *Device) {
11262   llvm_unreachable("Not supported in SIMD-only mode");
11263 }
11264 
11265 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11266   llvm_unreachable("Not supported in SIMD-only mode");
11267 }
11268 
11269 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11270   llvm_unreachable("Not supported in SIMD-only mode");
11271 }
11272 
11273 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11274   return false;
11275 }
11276 
11277 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
11278   return nullptr;
11279 }
11280 
11281 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11282                                         const OMPExecutableDirective &D,
11283                                         SourceLocation Loc,
11284                                         llvm::Function *OutlinedFn,
11285                                         ArrayRef<llvm::Value *> CapturedVars) {
11286   llvm_unreachable("Not supported in SIMD-only mode");
11287 }
11288 
11289 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11290                                              const Expr *NumTeams,
11291                                              const Expr *ThreadLimit,
11292                                              SourceLocation Loc) {
11293   llvm_unreachable("Not supported in SIMD-only mode");
11294 }
11295 
11296 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11297     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11298     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11299   llvm_unreachable("Not supported in SIMD-only mode");
11300 }
11301 
11302 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11303     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11304     const Expr *Device) {
11305   llvm_unreachable("Not supported in SIMD-only mode");
11306 }
11307 
11308 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11309                                            const OMPLoopDirective &D,
11310                                            ArrayRef<Expr *> NumIterations) {
11311   llvm_unreachable("Not supported in SIMD-only mode");
11312 }
11313 
11314 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11315                                               const OMPDependClause *C) {
11316   llvm_unreachable("Not supported in SIMD-only mode");
11317 }
11318 
11319 const VarDecl *
11320 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11321                                         const VarDecl *NativeParam) const {
11322   llvm_unreachable("Not supported in SIMD-only mode");
11323 }
11324 
11325 Address
11326 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11327                                          const VarDecl *NativeParam,
11328                                          const VarDecl *TargetParam) const {
11329   llvm_unreachable("Not supported in SIMD-only mode");
11330 }
11331